diff options
Diffstat (limited to 'libglusterfs/src/trie.c')
| -rw-r--r-- | libglusterfs/src/trie.c | 366 |
1 files changed, 366 insertions, 0 deletions
diff --git a/libglusterfs/src/trie.c b/libglusterfs/src/trie.c new file mode 100644 index 00000000000..809550b864c --- /dev/null +++ b/libglusterfs/src/trie.c @@ -0,0 +1,366 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <stdio.h> +#include <string.h> + +#include "glusterfs/common-utils.h" +#include "glusterfs/trie.h" + +#define DISTANCE_EDIT 1 +#define DISTANCE_INS 1 +#define DISTANCE_DEL 1 + +struct trienode { + char id; + char eow; + int depth; + void *data; + struct trie *trie; + struct trienode *parent; + struct trienode *subnodes[255]; +}; + +struct trie { + struct trienode root; + int nodecnt; + size_t len; +}; + +trie_t * +trie_new() +{ + trie_t *trie = NULL; + + trie = GF_CALLOC(1, sizeof(*trie), gf_common_mt_trie_trie); + if (!trie) + return NULL; + + trie->root.trie = trie; + + return trie; +} + +static trienode_t * +trie_subnode(trienode_t *node, int id) +{ + trienode_t *subnode = NULL; + + subnode = node->subnodes[id]; + if (!subnode) { + subnode = GF_CALLOC(1, sizeof(*subnode), gf_common_mt_trie_node); + if (!subnode) + return NULL; + + subnode->id = id; + subnode->depth = node->depth + 1; + node->subnodes[id] = subnode; + subnode->parent = node; + subnode->trie = node->trie; + node->trie->nodecnt++; + } + + return subnode; +} + +int +trie_add(trie_t *trie, const char *dword) +{ + trienode_t *node = NULL; + int i = 0; + char id = 0; + trienode_t *subnode = NULL; + + node = &trie->root; + + for (i = 0; i < strlen(dword); i++) { + id = dword[i]; + + subnode = trie_subnode(node, id); + if (!subnode) + return -1; + node = subnode; + } + + node->eow = 1; + + return 0; +} + +static void +trienode_free(trienode_t *node) +{ + trienode_t *trav = NULL; + int i = 0; + + for (i = 0; i < 255; i++) { + trav = node->subnodes[i]; + + if (trav) + trienode_free(trav); + } + + GF_FREE(node->data); + GF_FREE(node); +} + +void +trie_destroy(trie_t *trie) +{ + trienode_free((trienode_t *)trie); +} + +void +trie_destroy_bynode(trienode_t *node) +{ + trie_destroy(node->trie); +} + +static int +trienode_walk(trienode_t *node, int (*fn)(trienode_t *node, void *data), + void *data, int eowonly) +{ + trienode_t *trav = NULL; + int i = 0; + int cret = 0; + int ret = 0; + + if (!eowonly || node->eow) + ret = fn(node, data); + + if (ret) + goto out; + + for (i = 0; i < 255; i++) { + trav = node->subnodes[i]; + if (!trav) + continue; + + cret = trienode_walk(trav, fn, data, eowonly); + if (cret < 0) { + ret = cret; + goto out; + } + ret += cret; + } + +out: + return ret; +} + +static int +trie_walk(trie_t *trie, int (*fn)(trienode_t *node, void *data), void *data, + int eowonly) +{ + return trienode_walk(&trie->root, fn, data, eowonly); +} + +static void +print_node(trienode_t *node, char **buf) +{ + if (!node->parent) + return; + + if (node->parent) { + print_node(node->parent, buf); + *(*buf)++ = node->id; + } +} + +int +trienode_get_word(trienode_t *node, char **bufp) +{ + char *buf = NULL; + + buf = GF_CALLOC(1, node->depth + 1, gf_common_mt_trie_buf); + if (!buf) + return -1; + *bufp = buf; + + print_node(node, &buf); + + return 0; +} + +static int +calc_dist(trienode_t *node, void *data) +{ + const char *word = NULL; + int i = 0; + int *row = NULL; + int *uprow = NULL; + int distu = 0; + int distl = 0; + int distul = 0; + + word = data; + + node->data = GF_CALLOC(node->trie->len, sizeof(int), + gf_common_mt_trie_data); + if (!node->data) + return -1; + row = node->data; + + if (!node->parent) { + for (i = 0; i < node->trie->len; i++) + row[i] = i + 1; + + return 0; + } + + uprow = node->parent->data; + + distu = node->depth; /* up node */ + distul = node->parent->depth; /* up-left node */ + + for (i = 0; i < node->trie->len; i++) { + distl = uprow[i]; /* left node */ + + if (word[i] == node->id) + row[i] = distul; + else + row[i] = min((distul + DISTANCE_EDIT), + min((distu + DISTANCE_DEL), (distl + DISTANCE_INS))); + + distu = row[i]; + distul = distl; + } + + return 0; +} + +int +trienode_get_dist(trienode_t *node) +{ + int *row = NULL; + + row = node->data; + + return row[node->trie->len - 1]; +} + +struct trienodevec_w { + struct trienodevec *vec; + const char *word; +}; + +static void +trienodevec_clear(struct trienodevec *nodevec) +{ + memset(nodevec->nodes, 0, sizeof(*nodevec->nodes) * nodevec->cnt); +} + +static int +collect_closest(trienode_t *node, void *data) +{ + struct trienodevec_w *nodevec_w = NULL; + struct trienodevec *nodevec = NULL; + int dist = 0; + int i = 0; + + nodevec_w = data; + nodevec = nodevec_w->vec; + + if (calc_dist(node, (void *)nodevec_w->word)) + return -1; + + if (!node->eow || !nodevec->cnt) + return 0; + + dist = trienode_get_dist(node); + + /* + * I thought that when descending further after some dictionary word dw, + * if we see that child's distance is bigger than it was for dw, then we + * can prune this branch, as it can contain only worse nodes. + * + * This conjecture fails, see eg: + * + * d("AB", "B") = 1; + * d("AB", "BA") = 2; + * d("AB", "BAB") = 1; + * + * -- if both "B" and "BAB" are in dict., then pruning at "BA" * would + * miss "BAB". + * + * (example courtesy of Richard Bann <richardbann at gmail.com>) + + if (node->parent->eow && dist > trienode_get_dist (node->parent)) + return 1; + + */ + + if (nodevec->nodes[0] && dist < trienode_get_dist(nodevec->nodes[0])) { + /* improving over the findings so far */ + trienodevec_clear(nodevec); + nodevec->nodes[0] = node; + } else if (!nodevec->nodes[0] || + dist == trienode_get_dist(nodevec->nodes[0])) { + /* as good as the best so far, add if there is free space */ + for (i = 0; i < nodevec->cnt; i++) { + if (!nodevec->nodes[i]) { + nodevec->nodes[i] = node; + break; + } + } + } + + return 0; +} + +int +trie_measure(trie_t *trie, const char *word, trienode_t **nodes, int nodecnt) +{ + struct trienodevec nodevec = { + 0, + }; + + nodevec.nodes = nodes; + nodevec.cnt = nodecnt; + + return trie_measure_vec(trie, word, &nodevec); +} + +int +trie_measure_vec(trie_t *trie, const char *word, struct trienodevec *nodevec) +{ + struct trienodevec_w nodevec_w = { + 0, + }; + int ret = 0; + + trie->len = strlen(word); + + trienodevec_clear(nodevec); + nodevec_w.vec = nodevec; + nodevec_w.word = word; + + ret = trie_walk(trie, collect_closest, &nodevec_w, 0); + if (ret > 0) + ret = 0; + + return ret; +} + +static int +trienode_reset(trienode_t *node, void *data) +{ + GF_FREE(node->data); + + return 0; +} + +void +trie_reset_search(trie_t *trie) +{ + trie->len = 0; + + trie_walk(trie, trienode_reset, NULL, 0); +} |
