diff options
| author | Csaba Henk <csaba@gluster.com> | 2010-10-26 04:00:29 +0000 | 
|---|---|---|
| committer | Anand V. Avati <avati@dev.gluster.com> | 2010-10-26 23:56:12 -0700 | 
| commit | db94ed06a688fb596aba4deafdf59a5af2fd6bbe (patch) | |
| tree | 84303f0d59270c75ff1e4ad1df1e3466b5cadde8 /libglusterfs/src/trie.c | |
| parent | 9f14b0a0ef26b6d41b61222dcf34fe7cdf46cb46 (diff) | |
libglusterfs, glusterfsd: add shortname resolution + optname hinting support to VOLUME SET
Trie code used for hinting is contributed by Avati.
Signed-off-by: Csaba Henk <csaba@lowlife.hu>
Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
BUG: 1750 (clean up volgen)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=1750
Diffstat (limited to 'libglusterfs/src/trie.c')
| -rw-r--r-- | libglusterfs/src/trie.c | 397 | 
1 files changed, 397 insertions, 0 deletions
diff --git a/libglusterfs/src/trie.c b/libglusterfs/src/trie.c new file mode 100644 index 000000000..2501e71a5 --- /dev/null +++ b/libglusterfs/src/trie.c @@ -0,0 +1,397 @@ +/* +  Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> +  This file is part of GlusterFS. + +  GlusterFS is free software; you can redistribute it and/or modify +  it under the terms of the GNU Affero General Public License as published +  by the Free Software Foundation; either version 3 of the License, +  or (at your option) any later version. + +  GlusterFS is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +  Affero General Public License for more details. + +  You should have received a copy of the GNU Affero General Public License +  along with this program.  If not, see +  <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <ctype.h> + +#include "common-utils.h" +#include "trie-mem-types.h" +#include "trie.h" + +#define DISTANCE_EDIT 1 +#define DISTANCE_INS  1 +#define DISTANCE_DEL  1 + + +struct trienode { +        char             id; +        char             eow; +        int              depth; +        void            *data; +        struct trie     *trie; +        struct trienode *parent; +        struct trienode *subnodes[255]; +}; + +struct trie { +        struct trienode   root; +        int               nodecnt; +        size_t            len; +}; + + +trie_t * +trie_new () +{ +        trie_t *trie = NULL; + +        trie = GF_CALLOC (1, sizeof (*trie),  gf_trie_mt_trie); +        if (!trie) +                return NULL; + +        trie->root.trie = trie; + +        return trie; +} + + +static trienode_t * +trie_subnode (trienode_t *node, int id) +{ +        trienode_t *subnode = NULL; + +        subnode = node->subnodes[id]; +        if (!subnode) { +                subnode = GF_CALLOC (1, sizeof (*subnode), gf_trie_mt_node); +                if (!subnode) +                        return NULL; + +                subnode->id        = id; +                subnode->depth     = node->depth + 1; +                node->subnodes[id] = subnode; +                subnode->parent    = node; +                subnode->trie      = node->trie; +                node->trie->nodecnt++; +        } + +        return subnode; +} + + +int +trie_add (trie_t *trie, const char *dword) +{ +        trienode_t *node = NULL; +        int              i = 0; +        char             id = 0; +        trienode_t *subnode = NULL; + +        node = &trie->root; + +        for (i = 0; i < strlen (dword); i++) { +                id = dword[i]; + +                subnode = trie_subnode (node, id); +                if (!subnode) +                        return -1; +                node = subnode; +        } + +        node->eow = 1; + +        return 0; +} + +static void +trienode_free (trienode_t *node) +{ +        trienode_t *trav = NULL; +        int              i = 0; + +        for (i = 0; i < 255; i++) { +                trav = node->subnodes[i]; + +                if (trav) +                        trienode_free (trav); +        } + +        if (node->data) +                GF_FREE (node->data); +        GF_FREE (node); +} + + +void +trie_destroy (trie_t *trie) +{ +        trienode_free ((trienode_t *)trie); +} + + +void +trie_destroy_bynode (trienode_t *node) +{ +        trie_destroy (node->trie); +} + + +static int +trienode_walk (trienode_t *node, int (*fn)(trienode_t *node, void *data), +               void *data, int eowonly) +{ +        trienode_t *trav = NULL; +        int              i = 0; +        int              cret = 0; +        int              ret = 0; + +        if (!eowonly || node->eow) +                ret = fn (node, data); + +        if (ret) +                goto out; + +        for (i = 0; i < 255; i++) { +                trav = node->subnodes[i]; +                if (!trav) +                        continue; + +                cret = trienode_walk (trav, fn, data, eowonly); +                if (cret < 0) { +                        ret = cret; +                        goto out; +                } +                ret += cret; +        } + +out: +        return ret; +} + + +static int +trie_walk (trie_t *trie, int (*fn)(trienode_t *node, void *data), +           void *data, int eowonly) +{ +        return trienode_walk (&trie->root, fn, data, eowonly); +} + + +static void +print_node (trienode_t *node, char **buf) +{ +        if (!node->parent) +                return; + +        if (node->parent) { +                print_node (node->parent, buf); +                *(*buf)++ = node->id; +        } +} + + +int +trienode_get_word (trienode_t *node, char **bufp) +{ +        char *buf = NULL; + +        buf = GF_CALLOC (1, node->depth + 1, gf_trie_mt_buf); +        if (!buf) +                return -1; +        *bufp = buf; + +        print_node (node, &buf); + +        return 0; +} + + +static int +calc_dist (trienode_t *node, void *data) +{ +        const char *word = NULL; +        int         i = 0; +        int        *row = NULL; +        int        *uprow = NULL; +        int         distu = 0; +        int         distl = 0; +        int         distul = 0; + +        word = data; + +        node->data = GF_CALLOC (node->trie->len, sizeof (int), gf_trie_mt_data); +        if (!node->data) +                return -1; +        row = node->data; + +        if (!node->parent) { +                for (i = 0; i < node->trie->len; i++) +                        row[i] = i+1; + +                return 0; +        } + +        uprow = node->parent->data; + +        distu = node->depth;          /* up node */ +        distul = node->parent->depth; /* up-left node */ + +        for (i = 0; i < node->trie->len; i++) { +                distl = uprow[i];     /* left node */ + +                if (word[i] == node->id) +                        row[i] = distul; +                else +                        row[i] = min ((distul + DISTANCE_EDIT), +                                      min ((distu + DISTANCE_DEL), +                                           (distl + DISTANCE_INS))); + +                distu  = row[i]; +                distul = distl; +        } + +        return 0; +} + + +int +trienode_get_dist (trienode_t *node) +{ +        int *row = NULL; + +        row = node->data; + +        return row[node->trie->len - 1]; +} + + +struct trienodevec_w { +        struct trienodevec *vec; +        const char *word; +}; + + +static void +trienodevec_clear (struct trienodevec *nodevec) +{ +        memset(nodevec->nodes, 0, sizeof (*nodevec->nodes) * nodevec->cnt); +} + + +static int +collect_closest (trienode_t *node, void *data) +{ +        struct trienodevec_w *nodevec_w = NULL; +        struct trienodevec *nodevec = NULL; +        int dist = 0; +        int i = 0; + +        nodevec_w = data; +        nodevec = nodevec_w->vec; + +        if (calc_dist (node, (void *)nodevec_w->word)) +                return -1; + +        if (!node->eow || !nodevec->cnt) +                return 0; + +        dist = trienode_get_dist (node); + +        /* +         * I thought that when descending further after some dictionary word dw, +         * if we see that child's distance is bigger than it was for dw, then we +         * can prune this branch, as it can contain only worse nodes. +         * +         * This conjecture fails, see eg: +         * +         * d("AB", "B") = 1; +         * d("AB", "BA") = 2; +         * d("AB", "BAB") = 1; +         * +         * -- if both "B" and "BAB" are in dict., then pruning at "BA" * would +         * miss "BAB". +         * +         * (example courtesy of Richard Bann <richardbann at gmail.com>) + +        if (node->parent->eow && dist > trienode_get_dist (node->parent)) +                return 1; + +         */ + +        if (nodevec->nodes[0] && +            dist < trienode_get_dist (nodevec->nodes[0])) { +                /* improving over the findings so far */ +                trienodevec_clear (nodevec); +                nodevec->nodes[0] = node; +        } else if (!nodevec->nodes[0] || +                   dist == trienode_get_dist (nodevec->nodes[0])) { +                /* as good as the best so far, add if there is free space */ +                for (i = 0; i < nodevec->cnt; i++) { +                        if (!nodevec->nodes[i]) { +                                nodevec->nodes[i] = node; +                                break; +                        } +                } +        } + +        return 0; +} + + +int +trie_measure (trie_t *trie, const char *word, trienode_t **nodes, +              int nodecnt) +{ +        struct trienodevec nodevec = {0,}; + +        nodevec.nodes = nodes; +        nodevec.cnt = nodecnt; + +        return trie_measure_vec (trie, word, &nodevec); +} + + +int +trie_measure_vec (trie_t *trie, const char *word, struct trienodevec *nodevec) +{ +        struct trienodevec_w nodevec_w = {0,}; +        int ret = 0; + +        trie->len = strlen (word); + +        trienodevec_clear (nodevec); +        nodevec_w.vec = nodevec; +        nodevec_w.word = word; + +        ret = trie_walk (trie, collect_closest, &nodevec_w, 0); +        if (ret > 0) +                ret = 0; + +        return ret; +} + + +static int +trienode_reset (trienode_t *node, void *data) +{ +        if (node->data) +                GF_FREE (node->data); + +        return 0; +} + + +void +trie_reset_search (trie_t *trie) +{ +        trie->len = 0; + +        trie_walk (trie, trienode_reset, NULL, 0); +}  | 
