diff options
Diffstat (limited to 'libglusterfs/src/trie.c')
-rw-r--r-- | libglusterfs/src/trie.c | 483 |
1 files changed, 232 insertions, 251 deletions
diff --git a/libglusterfs/src/trie.c b/libglusterfs/src/trie.c index f96bbebf6d3..4f01bcfe0da 100644 --- a/libglusterfs/src/trie.c +++ b/libglusterfs/src/trie.c @@ -17,371 +17,352 @@ #include "trie.h" #define DISTANCE_EDIT 1 -#define DISTANCE_INS 1 -#define DISTANCE_DEL 1 - +#define DISTANCE_INS 1 +#define DISTANCE_DEL 1 struct trienode { - char id; - char eow; - int depth; - void *data; - struct trie *trie; - struct trienode *parent; - struct trienode *subnodes[255]; + char id; + char eow; + int depth; + void *data; + struct trie *trie; + struct trienode *parent; + struct trienode *subnodes[255]; }; struct trie { - struct trienode root; - int nodecnt; - size_t len; + struct trienode root; + int nodecnt; + size_t len; }; - trie_t * -trie_new () +trie_new() { - trie_t *trie = NULL; + trie_t *trie = NULL; - trie = GF_CALLOC (1, sizeof (*trie), gf_common_mt_trie_trie); - if (!trie) - return NULL; + trie = GF_CALLOC(1, sizeof(*trie), gf_common_mt_trie_trie); + if (!trie) + return NULL; - trie->root.trie = trie; + trie->root.trie = trie; - return trie; + return trie; } - static trienode_t * -trie_subnode (trienode_t *node, int id) +trie_subnode(trienode_t *node, int id) { - trienode_t *subnode = NULL; - - subnode = node->subnodes[id]; - if (!subnode) { - subnode = GF_CALLOC (1, sizeof (*subnode), - gf_common_mt_trie_node); - if (!subnode) - return NULL; - - subnode->id = id; - subnode->depth = node->depth + 1; - node->subnodes[id] = subnode; - subnode->parent = node; - subnode->trie = node->trie; - node->trie->nodecnt++; - } - - return subnode; + trienode_t *subnode = NULL; + + subnode = node->subnodes[id]; + if (!subnode) { + subnode = GF_CALLOC(1, sizeof(*subnode), gf_common_mt_trie_node); + if (!subnode) + return NULL; + + subnode->id = id; + subnode->depth = node->depth + 1; + node->subnodes[id] = subnode; + subnode->parent = node; + subnode->trie = node->trie; + node->trie->nodecnt++; + } + + return subnode; } - int -trie_add (trie_t *trie, const char *dword) +trie_add(trie_t *trie, const char *dword) { - trienode_t *node = NULL; - int i = 0; - char id = 0; - trienode_t *subnode = NULL; + trienode_t *node = NULL; + int i = 0; + char id = 0; + trienode_t *subnode = NULL; - node = &trie->root; + node = &trie->root; - for (i = 0; i < strlen (dword); i++) { - id = dword[i]; + for (i = 0; i < strlen(dword); i++) { + id = dword[i]; - subnode = trie_subnode (node, id); - if (!subnode) - return -1; - node = subnode; - } + subnode = trie_subnode(node, id); + if (!subnode) + return -1; + node = subnode; + } - node->eow = 1; + node->eow = 1; - return 0; + return 0; } static void -trienode_free (trienode_t *node) +trienode_free(trienode_t *node) { - trienode_t *trav = NULL; - int i = 0; + trienode_t *trav = NULL; + int i = 0; - for (i = 0; i < 255; i++) { - trav = node->subnodes[i]; + for (i = 0; i < 255; i++) { + trav = node->subnodes[i]; - if (trav) - trienode_free (trav); - } + if (trav) + trienode_free(trav); + } - GF_FREE (node->data); - GF_FREE (node); + GF_FREE(node->data); + GF_FREE(node); } - void -trie_destroy (trie_t *trie) +trie_destroy(trie_t *trie) { - trienode_free ((trienode_t *)trie); + trienode_free((trienode_t *)trie); } - void -trie_destroy_bynode (trienode_t *node) +trie_destroy_bynode(trienode_t *node) { - trie_destroy (node->trie); + trie_destroy(node->trie); } - static int -trienode_walk (trienode_t *node, int (*fn)(trienode_t *node, void *data), - void *data, int eowonly) +trienode_walk(trienode_t *node, int (*fn)(trienode_t *node, void *data), + void *data, int eowonly) { - trienode_t *trav = NULL; - int i = 0; - int cret = 0; - int ret = 0; - - if (!eowonly || node->eow) - ret = fn (node, data); - - if (ret) - goto out; - - for (i = 0; i < 255; i++) { - trav = node->subnodes[i]; - if (!trav) - continue; - - cret = trienode_walk (trav, fn, data, eowonly); - if (cret < 0) { - ret = cret; - goto out; - } - ret += cret; + trienode_t *trav = NULL; + int i = 0; + int cret = 0; + int ret = 0; + + if (!eowonly || node->eow) + ret = fn(node, data); + + if (ret) + goto out; + + for (i = 0; i < 255; i++) { + trav = node->subnodes[i]; + if (!trav) + continue; + + cret = trienode_walk(trav, fn, data, eowonly); + if (cret < 0) { + ret = cret; + goto out; } + ret += cret; + } out: - return ret; + return ret; } - static int -trie_walk (trie_t *trie, int (*fn)(trienode_t *node, void *data), - void *data, int eowonly) +trie_walk(trie_t *trie, int (*fn)(trienode_t *node, void *data), void *data, + int eowonly) { - return trienode_walk (&trie->root, fn, data, eowonly); + return trienode_walk(&trie->root, fn, data, eowonly); } - static void -print_node (trienode_t *node, char **buf) +print_node(trienode_t *node, char **buf) { - if (!node->parent) - return; + if (!node->parent) + return; - if (node->parent) { - print_node (node->parent, buf); - *(*buf)++ = node->id; - } + if (node->parent) { + print_node(node->parent, buf); + *(*buf)++ = node->id; + } } - int -trienode_get_word (trienode_t *node, char **bufp) +trienode_get_word(trienode_t *node, char **bufp) { - char *buf = NULL; + char *buf = NULL; - buf = GF_CALLOC (1, node->depth + 1, gf_common_mt_trie_buf); - if (!buf) - return -1; - *bufp = buf; + buf = GF_CALLOC(1, node->depth + 1, gf_common_mt_trie_buf); + if (!buf) + return -1; + *bufp = buf; - print_node (node, &buf); + print_node(node, &buf); - return 0; + return 0; } - static int -calc_dist (trienode_t *node, void *data) +calc_dist(trienode_t *node, void *data) { - const char *word = NULL; - int i = 0; - int *row = NULL; - int *uprow = NULL; - int distu = 0; - int distl = 0; - int distul = 0; - - word = data; - - node->data = GF_CALLOC (node->trie->len, sizeof (int), - gf_common_mt_trie_data); - if (!node->data) - return -1; - row = node->data; - - if (!node->parent) { - for (i = 0; i < node->trie->len; i++) - row[i] = i+1; - - return 0; - } + const char *word = NULL; + int i = 0; + int *row = NULL; + int *uprow = NULL; + int distu = 0; + int distl = 0; + int distul = 0; + + word = data; + + node->data = GF_CALLOC(node->trie->len, sizeof(int), + gf_common_mt_trie_data); + if (!node->data) + return -1; + row = node->data; + + if (!node->parent) { + for (i = 0; i < node->trie->len; i++) + row[i] = i + 1; - uprow = node->parent->data; + return 0; + } - distu = node->depth; /* up node */ - distul = node->parent->depth; /* up-left node */ + uprow = node->parent->data; - for (i = 0; i < node->trie->len; i++) { - distl = uprow[i]; /* left node */ + distu = node->depth; /* up node */ + distul = node->parent->depth; /* up-left node */ - if (word[i] == node->id) - row[i] = distul; - else - row[i] = min ((distul + DISTANCE_EDIT), - min ((distu + DISTANCE_DEL), - (distl + DISTANCE_INS))); + for (i = 0; i < node->trie->len; i++) { + distl = uprow[i]; /* left node */ - distu = row[i]; - distul = distl; - } + if (word[i] == node->id) + row[i] = distul; + else + row[i] = min((distul + DISTANCE_EDIT), + min((distu + DISTANCE_DEL), (distl + DISTANCE_INS))); - return 0; -} + distu = row[i]; + distul = distl; + } + return 0; +} int -trienode_get_dist (trienode_t *node) +trienode_get_dist(trienode_t *node) { - int *row = NULL; + int *row = NULL; - row = node->data; + row = node->data; - return row[node->trie->len - 1]; + return row[node->trie->len - 1]; } - struct trienodevec_w { - struct trienodevec *vec; - const char *word; + struct trienodevec *vec; + const char *word; }; - static void -trienodevec_clear (struct trienodevec *nodevec) +trienodevec_clear(struct trienodevec *nodevec) { - memset(nodevec->nodes, 0, sizeof (*nodevec->nodes) * nodevec->cnt); + memset(nodevec->nodes, 0, sizeof(*nodevec->nodes) * nodevec->cnt); } - static int -collect_closest (trienode_t *node, void *data) +collect_closest(trienode_t *node, void *data) { - struct trienodevec_w *nodevec_w = NULL; - struct trienodevec *nodevec = NULL; - int dist = 0; - int i = 0; - - nodevec_w = data; - nodevec = nodevec_w->vec; - - if (calc_dist (node, (void *)nodevec_w->word)) - return -1; - - if (!node->eow || !nodevec->cnt) - return 0; - - dist = trienode_get_dist (node); - - /* - * I thought that when descending further after some dictionary word dw, - * if we see that child's distance is bigger than it was for dw, then we - * can prune this branch, as it can contain only worse nodes. - * - * This conjecture fails, see eg: - * - * d("AB", "B") = 1; - * d("AB", "BA") = 2; - * d("AB", "BAB") = 1; - * - * -- if both "B" and "BAB" are in dict., then pruning at "BA" * would - * miss "BAB". - * - * (example courtesy of Richard Bann <richardbann at gmail.com>) - - if (node->parent->eow && dist > trienode_get_dist (node->parent)) - return 1; - - */ - - if (nodevec->nodes[0] && - dist < trienode_get_dist (nodevec->nodes[0])) { - /* improving over the findings so far */ - trienodevec_clear (nodevec); - nodevec->nodes[0] = node; - } else if (!nodevec->nodes[0] || - dist == trienode_get_dist (nodevec->nodes[0])) { - /* as good as the best so far, add if there is free space */ - for (i = 0; i < nodevec->cnt; i++) { - if (!nodevec->nodes[i]) { - nodevec->nodes[i] = node; - break; - } - } - } + struct trienodevec_w *nodevec_w = NULL; + struct trienodevec *nodevec = NULL; + int dist = 0; + int i = 0; + + nodevec_w = data; + nodevec = nodevec_w->vec; + if (calc_dist(node, (void *)nodevec_w->word)) + return -1; + + if (!node->eow || !nodevec->cnt) return 0; -} + dist = trienode_get_dist(node); + + /* + * I thought that when descending further after some dictionary word dw, + * if we see that child's distance is bigger than it was for dw, then we + * can prune this branch, as it can contain only worse nodes. + * + * This conjecture fails, see eg: + * + * d("AB", "B") = 1; + * d("AB", "BA") = 2; + * d("AB", "BAB") = 1; + * + * -- if both "B" and "BAB" are in dict., then pruning at "BA" * would + * miss "BAB". + * + * (example courtesy of Richard Bann <richardbann at gmail.com>) + + if (node->parent->eow && dist > trienode_get_dist (node->parent)) + return 1; + + */ + + if (nodevec->nodes[0] && dist < trienode_get_dist(nodevec->nodes[0])) { + /* improving over the findings so far */ + trienodevec_clear(nodevec); + nodevec->nodes[0] = node; + } else if (!nodevec->nodes[0] || + dist == trienode_get_dist(nodevec->nodes[0])) { + /* as good as the best so far, add if there is free space */ + for (i = 0; i < nodevec->cnt; i++) { + if (!nodevec->nodes[i]) { + nodevec->nodes[i] = node; + break; + } + } + } + + return 0; +} int -trie_measure (trie_t *trie, const char *word, trienode_t **nodes, - int nodecnt) +trie_measure(trie_t *trie, const char *word, trienode_t **nodes, int nodecnt) { - struct trienodevec nodevec = {0,}; + struct trienodevec nodevec = { + 0, + }; - nodevec.nodes = nodes; - nodevec.cnt = nodecnt; + nodevec.nodes = nodes; + nodevec.cnt = nodecnt; - return trie_measure_vec (trie, word, &nodevec); + return trie_measure_vec(trie, word, &nodevec); } - int -trie_measure_vec (trie_t *trie, const char *word, struct trienodevec *nodevec) +trie_measure_vec(trie_t *trie, const char *word, struct trienodevec *nodevec) { - struct trienodevec_w nodevec_w = {0,}; - int ret = 0; + struct trienodevec_w nodevec_w = { + 0, + }; + int ret = 0; - trie->len = strlen (word); + trie->len = strlen(word); - trienodevec_clear (nodevec); - nodevec_w.vec = nodevec; - nodevec_w.word = word; + trienodevec_clear(nodevec); + nodevec_w.vec = nodevec; + nodevec_w.word = word; - ret = trie_walk (trie, collect_closest, &nodevec_w, 0); - if (ret > 0) - ret = 0; + ret = trie_walk(trie, collect_closest, &nodevec_w, 0); + if (ret > 0) + ret = 0; - return ret; + return ret; } - static int -trienode_reset (trienode_t *node, void *data) +trienode_reset(trienode_t *node, void *data) { - GF_FREE (node->data); + GF_FREE(node->data); - return 0; + return 0; } - void -trie_reset_search (trie_t *trie) +trie_reset_search(trie_t *trie) { - trie->len = 0; + trie->len = 0; - trie_walk (trie, trienode_reset, NULL, 0); + trie_walk(trie, trienode_reset, NULL, 0); } |