diff options
Diffstat (limited to 'libglusterfs/src')
| -rw-r--r-- | libglusterfs/src/Makefile.am | 4 | ||||
| -rw-r--r-- | libglusterfs/src/trie-mem-types.h | 34 | ||||
| -rw-r--r-- | libglusterfs/src/trie.c | 397 | ||||
| -rw-r--r-- | libglusterfs/src/trie.h | 60 | 
4 files changed, 493 insertions, 2 deletions
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am index 71a088d98eb..619ccf158a1 100644 --- a/libglusterfs/src/Makefile.am +++ b/libglusterfs/src/Makefile.am @@ -6,9 +6,9 @@ libglusterfs_la_LIBADD = @LEXLIB@  lib_LTLIBRARIES = libglusterfs.la -libglusterfs_la_SOURCES = dict.c graph.lex.c y.tab.c xlator.c logging.c  hashfn.c defaults.c common-utils.c timer.c inode.c call-stub.c compat.c fd.c compat-errno.c event.c mem-pool.c gf-dirent.c syscall.c iobuf.c globals.c statedump.c stack.c checksum.c $(CONTRIBDIR)/md5/md5.c $(CONTRIBDIR)/rbtree/rb.c rbthash.c latency.c graph.c $(CONTRIBDIR)/uuid/clear.c $(CONTRIBDIR)/uuid/copy.c $(CONTRIBDIR)/uuid/gen_uuid.c $(CONTRIBDIR)/uuid/pack.c $(CONTRIBDIR)/uuid/tst_uuid.c $(CONTRIBDIR)/uuid/parse.c $(CONTRIBDIR)/uuid/unparse.c $(CONTRIBDIR)/uuid/uuid_time.c $(CONTRIBDIR)/uuid/compare.c $(CONTRIBDIR)/uuid/isnull.c $(CONTRIBDIR)/uuid/unpack.c syncop.c graph-print.c +libglusterfs_la_SOURCES = dict.c graph.lex.c y.tab.c xlator.c logging.c  hashfn.c defaults.c common-utils.c timer.c inode.c call-stub.c compat.c fd.c compat-errno.c event.c mem-pool.c gf-dirent.c syscall.c iobuf.c globals.c statedump.c stack.c checksum.c $(CONTRIBDIR)/md5/md5.c $(CONTRIBDIR)/rbtree/rb.c rbthash.c latency.c graph.c $(CONTRIBDIR)/uuid/clear.c $(CONTRIBDIR)/uuid/copy.c $(CONTRIBDIR)/uuid/gen_uuid.c $(CONTRIBDIR)/uuid/pack.c $(CONTRIBDIR)/uuid/tst_uuid.c $(CONTRIBDIR)/uuid/parse.c $(CONTRIBDIR)/uuid/unparse.c $(CONTRIBDIR)/uuid/uuid_time.c $(CONTRIBDIR)/uuid/compare.c $(CONTRIBDIR)/uuid/isnull.c $(CONTRIBDIR)/uuid/unpack.c syncop.c graph-print.c trie.c -noinst_HEADERS = common-utils.h defaults.h dict.h glusterfs.h hashfn.h logging.h  xlator.h  stack.h timer.h list.h inode.h call-stub.h compat.h fd.h revision.h compat-errno.h event.h mem-pool.h byte-order.h gf-dirent.h locking.h syscall.h iobuf.h globals.h statedump.h checksum.h $(CONTRIBDIR)/md5/md5.h $(CONTRIBDIR)/rbtree/rb.h rbthash.h iatt.h latency.h mem-types.h $(CONTRIBDIR)/uuid/uuidd.h $(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h $(CONTRIBDIR)/uuid/uuid_types.h syncop.h graph-utils.h graph-mem-types.h +noinst_HEADERS = common-utils.h defaults.h dict.h glusterfs.h hashfn.h logging.h  xlator.h  stack.h timer.h list.h inode.h call-stub.h compat.h fd.h revision.h compat-errno.h event.h mem-pool.h byte-order.h gf-dirent.h locking.h syscall.h iobuf.h globals.h statedump.h checksum.h $(CONTRIBDIR)/md5/md5.h $(CONTRIBDIR)/rbtree/rb.h rbthash.h iatt.h latency.h mem-types.h $(CONTRIBDIR)/uuid/uuidd.h $(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h $(CONTRIBDIR)/uuid/uuid_types.h syncop.h graph-utils.h graph-mem-types.h trie.h trie-mem-types.h  EXTRA_DIST = graph.l graph.y diff --git a/libglusterfs/src/trie-mem-types.h b/libglusterfs/src/trie-mem-types.h new file mode 100644 index 00000000000..2b21f67e1a8 --- /dev/null +++ b/libglusterfs/src/trie-mem-types.h @@ -0,0 +1,34 @@ +/* +   Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU Affero General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   Affero General Public License for more details. + +   You should have received a copy of the GNU Affero General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + +#ifndef __TRIE_MEM_TYPES_H__ +#define __TRIE_MEM_TYPES_H__ + +#include "mem-types.h" + +#define GF_MEM_TYPE_START (gf_common_mt_end + 1) + +enum gf_trie_mem_types_ { +        gf_trie_mt_trie = GF_MEM_TYPE_START, +        gf_trie_mt_data, +        gf_trie_mt_node, +        gf_trie_mt_buf, +        gf_trie_mt_end, +}; +#endif diff --git a/libglusterfs/src/trie.c b/libglusterfs/src/trie.c new file mode 100644 index 00000000000..2501e71a540 --- /dev/null +++ b/libglusterfs/src/trie.c @@ -0,0 +1,397 @@ +/* +  Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com> +  This file is part of GlusterFS. + +  GlusterFS is free software; you can redistribute it and/or modify +  it under the terms of the GNU Affero General Public License as published +  by the Free Software Foundation; either version 3 of the License, +  or (at your option) any later version. + +  GlusterFS is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +  Affero General Public License for more details. + +  You should have received a copy of the GNU Affero General Public License +  along with this program.  If not, see +  <http://www.gnu.org/licenses/>. +*/ + +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <ctype.h> + +#include "common-utils.h" +#include "trie-mem-types.h" +#include "trie.h" + +#define DISTANCE_EDIT 1 +#define DISTANCE_INS  1 +#define DISTANCE_DEL  1 + + +struct trienode { +        char             id; +        char             eow; +        int              depth; +        void            *data; +        struct trie     *trie; +        struct trienode *parent; +        struct trienode *subnodes[255]; +}; + +struct trie { +        struct trienode   root; +        int               nodecnt; +        size_t            len; +}; + + +trie_t * +trie_new () +{ +        trie_t *trie = NULL; + +        trie = GF_CALLOC (1, sizeof (*trie),  gf_trie_mt_trie); +        if (!trie) +                return NULL; + +        trie->root.trie = trie; + +        return trie; +} + + +static trienode_t * +trie_subnode (trienode_t *node, int id) +{ +        trienode_t *subnode = NULL; + +        subnode = node->subnodes[id]; +        if (!subnode) { +                subnode = GF_CALLOC (1, sizeof (*subnode), gf_trie_mt_node); +                if (!subnode) +                        return NULL; + +                subnode->id        = id; +                subnode->depth     = node->depth + 1; +                node->subnodes[id] = subnode; +                subnode->parent    = node; +                subnode->trie      = node->trie; +                node->trie->nodecnt++; +        } + +        return subnode; +} + + +int +trie_add (trie_t *trie, const char *dword) +{ +        trienode_t *node = NULL; +        int              i = 0; +        char             id = 0; +        trienode_t *subnode = NULL; + +        node = &trie->root; + +        for (i = 0; i < strlen (dword); i++) { +                id = dword[i]; + +                subnode = trie_subnode (node, id); +                if (!subnode) +                        return -1; +                node = subnode; +        } + +        node->eow = 1; + +        return 0; +} + +static void +trienode_free (trienode_t *node) +{ +        trienode_t *trav = NULL; +        int              i = 0; + +        for (i = 0; i < 255; i++) { +                trav = node->subnodes[i]; + +                if (trav) +                        trienode_free (trav); +        } + +        if (node->data) +                GF_FREE (node->data); +        GF_FREE (node); +} + + +void +trie_destroy (trie_t *trie) +{ +        trienode_free ((trienode_t *)trie); +} + + +void +trie_destroy_bynode (trienode_t *node) +{ +        trie_destroy (node->trie); +} + + +static int +trienode_walk (trienode_t *node, int (*fn)(trienode_t *node, void *data), +               void *data, int eowonly) +{ +        trienode_t *trav = NULL; +        int              i = 0; +        int              cret = 0; +        int              ret = 0; + +        if (!eowonly || node->eow) +                ret = fn (node, data); + +        if (ret) +                goto out; + +        for (i = 0; i < 255; i++) { +                trav = node->subnodes[i]; +                if (!trav) +                        continue; + +                cret = trienode_walk (trav, fn, data, eowonly); +                if (cret < 0) { +                        ret = cret; +                        goto out; +                } +                ret += cret; +        } + +out: +        return ret; +} + + +static int +trie_walk (trie_t *trie, int (*fn)(trienode_t *node, void *data), +           void *data, int eowonly) +{ +        return trienode_walk (&trie->root, fn, data, eowonly); +} + + +static void +print_node (trienode_t *node, char **buf) +{ +        if (!node->parent) +                return; + +        if (node->parent) { +                print_node (node->parent, buf); +                *(*buf)++ = node->id; +        } +} + + +int +trienode_get_word (trienode_t *node, char **bufp) +{ +        char *buf = NULL; + +        buf = GF_CALLOC (1, node->depth + 1, gf_trie_mt_buf); +        if (!buf) +                return -1; +        *bufp = buf; + +        print_node (node, &buf); + +        return 0; +} + + +static int +calc_dist (trienode_t *node, void *data) +{ +        const char *word = NULL; +        int         i = 0; +        int        *row = NULL; +        int        *uprow = NULL; +        int         distu = 0; +        int         distl = 0; +        int         distul = 0; + +        word = data; + +        node->data = GF_CALLOC (node->trie->len, sizeof (int), gf_trie_mt_data); +        if (!node->data) +                return -1; +        row = node->data; + +        if (!node->parent) { +                for (i = 0; i < node->trie->len; i++) +                        row[i] = i+1; + +                return 0; +        } + +        uprow = node->parent->data; + +        distu = node->depth;          /* up node */ +        distul = node->parent->depth; /* up-left node */ + +        for (i = 0; i < node->trie->len; i++) { +                distl = uprow[i];     /* left node */ + +                if (word[i] == node->id) +                        row[i] = distul; +                else +                        row[i] = min ((distul + DISTANCE_EDIT), +                                      min ((distu + DISTANCE_DEL), +                                           (distl + DISTANCE_INS))); + +                distu  = row[i]; +                distul = distl; +        } + +        return 0; +} + + +int +trienode_get_dist (trienode_t *node) +{ +        int *row = NULL; + +        row = node->data; + +        return row[node->trie->len - 1]; +} + + +struct trienodevec_w { +        struct trienodevec *vec; +        const char *word; +}; + + +static void +trienodevec_clear (struct trienodevec *nodevec) +{ +        memset(nodevec->nodes, 0, sizeof (*nodevec->nodes) * nodevec->cnt); +} + + +static int +collect_closest (trienode_t *node, void *data) +{ +        struct trienodevec_w *nodevec_w = NULL; +        struct trienodevec *nodevec = NULL; +        int dist = 0; +        int i = 0; + +        nodevec_w = data; +        nodevec = nodevec_w->vec; + +        if (calc_dist (node, (void *)nodevec_w->word)) +                return -1; + +        if (!node->eow || !nodevec->cnt) +                return 0; + +        dist = trienode_get_dist (node); + +        /* +         * I thought that when descending further after some dictionary word dw, +         * if we see that child's distance is bigger than it was for dw, then we +         * can prune this branch, as it can contain only worse nodes. +         * +         * This conjecture fails, see eg: +         * +         * d("AB", "B") = 1; +         * d("AB", "BA") = 2; +         * d("AB", "BAB") = 1; +         * +         * -- if both "B" and "BAB" are in dict., then pruning at "BA" * would +         * miss "BAB". +         * +         * (example courtesy of Richard Bann <richardbann at gmail.com>) + +        if (node->parent->eow && dist > trienode_get_dist (node->parent)) +                return 1; + +         */ + +        if (nodevec->nodes[0] && +            dist < trienode_get_dist (nodevec->nodes[0])) { +                /* improving over the findings so far */ +                trienodevec_clear (nodevec); +                nodevec->nodes[0] = node; +        } else if (!nodevec->nodes[0] || +                   dist == trienode_get_dist (nodevec->nodes[0])) { +                /* as good as the best so far, add if there is free space */ +                for (i = 0; i < nodevec->cnt; i++) { +                        if (!nodevec->nodes[i]) { +                                nodevec->nodes[i] = node; +                                break; +                        } +                } +        } + +        return 0; +} + + +int +trie_measure (trie_t *trie, const char *word, trienode_t **nodes, +              int nodecnt) +{ +        struct trienodevec nodevec = {0,}; + +        nodevec.nodes = nodes; +        nodevec.cnt = nodecnt; + +        return trie_measure_vec (trie, word, &nodevec); +} + + +int +trie_measure_vec (trie_t *trie, const char *word, struct trienodevec *nodevec) +{ +        struct trienodevec_w nodevec_w = {0,}; +        int ret = 0; + +        trie->len = strlen (word); + +        trienodevec_clear (nodevec); +        nodevec_w.vec = nodevec; +        nodevec_w.word = word; + +        ret = trie_walk (trie, collect_closest, &nodevec_w, 0); +        if (ret > 0) +                ret = 0; + +        return ret; +} + + +static int +trienode_reset (trienode_t *node, void *data) +{ +        if (node->data) +                GF_FREE (node->data); + +        return 0; +} + + +void +trie_reset_search (trie_t *trie) +{ +        trie->len = 0; + +        trie_walk (trie, trienode_reset, NULL, 0); +} diff --git a/libglusterfs/src/trie.h b/libglusterfs/src/trie.h new file mode 100644 index 00000000000..a670c2a277c --- /dev/null +++ b/libglusterfs/src/trie.h @@ -0,0 +1,60 @@ +/* +  Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com> +  This file is part of GlusterFS. + +  GlusterFS is free software; you can redistribute it and/or modify +  it under the terms of the GNU Affero General Public License as published +  by the Free Software Foundation; either version 3 of the License, +  or (at your option) any later version. + +  GlusterFS is distributed in the hope that it will be useful, but +  WITHOUT ANY WARRANTY; without even the implied warranty of +  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +  Affero General Public License for more details. + +  You should have received a copy of the GNU Affero General Public License +  along with this program.  If not, see +  <http://www.gnu.org/licenses/>. +*/ + +#ifndef _TRIE_H_ +#define _TRIE_H_ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +struct trienode; +typedef struct trienode trienode_t; + +struct trie; +typedef struct trie trie_t; + +struct trienodevec { +        trienode_t **nodes; +        unsigned cnt; +}; + + +trie_t *trie_new (); + +int trie_add (trie_t *trie, const char *word); + +void trie_destroy (trie_t *trie); + +void trie_destroy_bynode (trienode_t *node); + +int trie_measure (trie_t *trie, const char *word, trienode_t **nodes, +                  int nodecnt); + +int trie_measure_vec (trie_t *trie, const char *word, +                      struct trienodevec *nodevec); + +void trie_reset_search (trie_t *trie); + +int trienode_get_dist (trienode_t *node); + +int trienode_get_word (trienode_t *node, char **buf); + +#endif  | 
