From db94ed06a688fb596aba4deafdf59a5af2fd6bbe Mon Sep 17 00:00:00 2001 From: Csaba Henk Date: Tue, 26 Oct 2010 04:00:29 +0000 Subject: libglusterfs, glusterfsd: add shortname resolution + optname hinting support to VOLUME SET Trie code used for hinting is contributed by Avati. Signed-off-by: Csaba Henk Signed-off-by: Anand V. Avati BUG: 1750 (clean up volgen) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=1750 --- libglusterfs/src/Makefile.am | 4 +- libglusterfs/src/trie-mem-types.h | 34 ++++ libglusterfs/src/trie.c | 397 ++++++++++++++++++++++++++++++++++++++ libglusterfs/src/trie.h | 60 ++++++ 4 files changed, 493 insertions(+), 2 deletions(-) create mode 100644 libglusterfs/src/trie-mem-types.h create mode 100644 libglusterfs/src/trie.c create mode 100644 libglusterfs/src/trie.h (limited to 'libglusterfs/src') diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am index 71a088d98..619ccf158 100644 --- a/libglusterfs/src/Makefile.am +++ b/libglusterfs/src/Makefile.am @@ -6,9 +6,9 @@ libglusterfs_la_LIBADD = @LEXLIB@ lib_LTLIBRARIES = libglusterfs.la -libglusterfs_la_SOURCES = dict.c graph.lex.c y.tab.c xlator.c logging.c hashfn.c defaults.c common-utils.c timer.c inode.c call-stub.c compat.c fd.c compat-errno.c event.c mem-pool.c gf-dirent.c syscall.c iobuf.c globals.c statedump.c stack.c checksum.c $(CONTRIBDIR)/md5/md5.c $(CONTRIBDIR)/rbtree/rb.c rbthash.c latency.c graph.c $(CONTRIBDIR)/uuid/clear.c $(CONTRIBDIR)/uuid/copy.c $(CONTRIBDIR)/uuid/gen_uuid.c $(CONTRIBDIR)/uuid/pack.c $(CONTRIBDIR)/uuid/tst_uuid.c $(CONTRIBDIR)/uuid/parse.c $(CONTRIBDIR)/uuid/unparse.c $(CONTRIBDIR)/uuid/uuid_time.c $(CONTRIBDIR)/uuid/compare.c $(CONTRIBDIR)/uuid/isnull.c $(CONTRIBDIR)/uuid/unpack.c syncop.c graph-print.c +libglusterfs_la_SOURCES = dict.c graph.lex.c y.tab.c xlator.c logging.c hashfn.c defaults.c common-utils.c timer.c inode.c call-stub.c compat.c fd.c compat-errno.c event.c mem-pool.c gf-dirent.c syscall.c iobuf.c globals.c statedump.c stack.c checksum.c $(CONTRIBDIR)/md5/md5.c $(CONTRIBDIR)/rbtree/rb.c rbthash.c latency.c graph.c $(CONTRIBDIR)/uuid/clear.c $(CONTRIBDIR)/uuid/copy.c $(CONTRIBDIR)/uuid/gen_uuid.c $(CONTRIBDIR)/uuid/pack.c $(CONTRIBDIR)/uuid/tst_uuid.c $(CONTRIBDIR)/uuid/parse.c $(CONTRIBDIR)/uuid/unparse.c $(CONTRIBDIR)/uuid/uuid_time.c $(CONTRIBDIR)/uuid/compare.c $(CONTRIBDIR)/uuid/isnull.c $(CONTRIBDIR)/uuid/unpack.c syncop.c graph-print.c trie.c -noinst_HEADERS = common-utils.h defaults.h dict.h glusterfs.h hashfn.h logging.h xlator.h stack.h timer.h list.h inode.h call-stub.h compat.h fd.h revision.h compat-errno.h event.h mem-pool.h byte-order.h gf-dirent.h locking.h syscall.h iobuf.h globals.h statedump.h checksum.h $(CONTRIBDIR)/md5/md5.h $(CONTRIBDIR)/rbtree/rb.h rbthash.h iatt.h latency.h mem-types.h $(CONTRIBDIR)/uuid/uuidd.h $(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h $(CONTRIBDIR)/uuid/uuid_types.h syncop.h graph-utils.h graph-mem-types.h +noinst_HEADERS = common-utils.h defaults.h dict.h glusterfs.h hashfn.h logging.h xlator.h stack.h timer.h list.h inode.h call-stub.h compat.h fd.h revision.h compat-errno.h event.h mem-pool.h byte-order.h gf-dirent.h locking.h syscall.h iobuf.h globals.h statedump.h checksum.h $(CONTRIBDIR)/md5/md5.h $(CONTRIBDIR)/rbtree/rb.h rbthash.h iatt.h latency.h mem-types.h $(CONTRIBDIR)/uuid/uuidd.h $(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h $(CONTRIBDIR)/uuid/uuid_types.h syncop.h graph-utils.h graph-mem-types.h trie.h trie-mem-types.h EXTRA_DIST = graph.l graph.y diff --git a/libglusterfs/src/trie-mem-types.h b/libglusterfs/src/trie-mem-types.h new file mode 100644 index 000000000..2b21f67e1 --- /dev/null +++ b/libglusterfs/src/trie-mem-types.h @@ -0,0 +1,34 @@ +/* + Copyright (c) 2010 Gluster, Inc. + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see + . +*/ + +#ifndef __TRIE_MEM_TYPES_H__ +#define __TRIE_MEM_TYPES_H__ + +#include "mem-types.h" + +#define GF_MEM_TYPE_START (gf_common_mt_end + 1) + +enum gf_trie_mem_types_ { + gf_trie_mt_trie = GF_MEM_TYPE_START, + gf_trie_mt_data, + gf_trie_mt_node, + gf_trie_mt_buf, + gf_trie_mt_end, +}; +#endif diff --git a/libglusterfs/src/trie.c b/libglusterfs/src/trie.c new file mode 100644 index 000000000..2501e71a5 --- /dev/null +++ b/libglusterfs/src/trie.c @@ -0,0 +1,397 @@ +/* + Copyright (c) 2010 Gluster, Inc. + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see + . +*/ + +#include +#include +#include +#include + +#include "common-utils.h" +#include "trie-mem-types.h" +#include "trie.h" + +#define DISTANCE_EDIT 1 +#define DISTANCE_INS 1 +#define DISTANCE_DEL 1 + + +struct trienode { + char id; + char eow; + int depth; + void *data; + struct trie *trie; + struct trienode *parent; + struct trienode *subnodes[255]; +}; + +struct trie { + struct trienode root; + int nodecnt; + size_t len; +}; + + +trie_t * +trie_new () +{ + trie_t *trie = NULL; + + trie = GF_CALLOC (1, sizeof (*trie), gf_trie_mt_trie); + if (!trie) + return NULL; + + trie->root.trie = trie; + + return trie; +} + + +static trienode_t * +trie_subnode (trienode_t *node, int id) +{ + trienode_t *subnode = NULL; + + subnode = node->subnodes[id]; + if (!subnode) { + subnode = GF_CALLOC (1, sizeof (*subnode), gf_trie_mt_node); + if (!subnode) + return NULL; + + subnode->id = id; + subnode->depth = node->depth + 1; + node->subnodes[id] = subnode; + subnode->parent = node; + subnode->trie = node->trie; + node->trie->nodecnt++; + } + + return subnode; +} + + +int +trie_add (trie_t *trie, const char *dword) +{ + trienode_t *node = NULL; + int i = 0; + char id = 0; + trienode_t *subnode = NULL; + + node = &trie->root; + + for (i = 0; i < strlen (dword); i++) { + id = dword[i]; + + subnode = trie_subnode (node, id); + if (!subnode) + return -1; + node = subnode; + } + + node->eow = 1; + + return 0; +} + +static void +trienode_free (trienode_t *node) +{ + trienode_t *trav = NULL; + int i = 0; + + for (i = 0; i < 255; i++) { + trav = node->subnodes[i]; + + if (trav) + trienode_free (trav); + } + + if (node->data) + GF_FREE (node->data); + GF_FREE (node); +} + + +void +trie_destroy (trie_t *trie) +{ + trienode_free ((trienode_t *)trie); +} + + +void +trie_destroy_bynode (trienode_t *node) +{ + trie_destroy (node->trie); +} + + +static int +trienode_walk (trienode_t *node, int (*fn)(trienode_t *node, void *data), + void *data, int eowonly) +{ + trienode_t *trav = NULL; + int i = 0; + int cret = 0; + int ret = 0; + + if (!eowonly || node->eow) + ret = fn (node, data); + + if (ret) + goto out; + + for (i = 0; i < 255; i++) { + trav = node->subnodes[i]; + if (!trav) + continue; + + cret = trienode_walk (trav, fn, data, eowonly); + if (cret < 0) { + ret = cret; + goto out; + } + ret += cret; + } + +out: + return ret; +} + + +static int +trie_walk (trie_t *trie, int (*fn)(trienode_t *node, void *data), + void *data, int eowonly) +{ + return trienode_walk (&trie->root, fn, data, eowonly); +} + + +static void +print_node (trienode_t *node, char **buf) +{ + if (!node->parent) + return; + + if (node->parent) { + print_node (node->parent, buf); + *(*buf)++ = node->id; + } +} + + +int +trienode_get_word (trienode_t *node, char **bufp) +{ + char *buf = NULL; + + buf = GF_CALLOC (1, node->depth + 1, gf_trie_mt_buf); + if (!buf) + return -1; + *bufp = buf; + + print_node (node, &buf); + + return 0; +} + + +static int +calc_dist (trienode_t *node, void *data) +{ + const char *word = NULL; + int i = 0; + int *row = NULL; + int *uprow = NULL; + int distu = 0; + int distl = 0; + int distul = 0; + + word = data; + + node->data = GF_CALLOC (node->trie->len, sizeof (int), gf_trie_mt_data); + if (!node->data) + return -1; + row = node->data; + + if (!node->parent) { + for (i = 0; i < node->trie->len; i++) + row[i] = i+1; + + return 0; + } + + uprow = node->parent->data; + + distu = node->depth; /* up node */ + distul = node->parent->depth; /* up-left node */ + + for (i = 0; i < node->trie->len; i++) { + distl = uprow[i]; /* left node */ + + if (word[i] == node->id) + row[i] = distul; + else + row[i] = min ((distul + DISTANCE_EDIT), + min ((distu + DISTANCE_DEL), + (distl + DISTANCE_INS))); + + distu = row[i]; + distul = distl; + } + + return 0; +} + + +int +trienode_get_dist (trienode_t *node) +{ + int *row = NULL; + + row = node->data; + + return row[node->trie->len - 1]; +} + + +struct trienodevec_w { + struct trienodevec *vec; + const char *word; +}; + + +static void +trienodevec_clear (struct trienodevec *nodevec) +{ + memset(nodevec->nodes, 0, sizeof (*nodevec->nodes) * nodevec->cnt); +} + + +static int +collect_closest (trienode_t *node, void *data) +{ + struct trienodevec_w *nodevec_w = NULL; + struct trienodevec *nodevec = NULL; + int dist = 0; + int i = 0; + + nodevec_w = data; + nodevec = nodevec_w->vec; + + if (calc_dist (node, (void *)nodevec_w->word)) + return -1; + + if (!node->eow || !nodevec->cnt) + return 0; + + dist = trienode_get_dist (node); + + /* + * I thought that when descending further after some dictionary word dw, + * if we see that child's distance is bigger than it was for dw, then we + * can prune this branch, as it can contain only worse nodes. + * + * This conjecture fails, see eg: + * + * d("AB", "B") = 1; + * d("AB", "BA") = 2; + * d("AB", "BAB") = 1; + * + * -- if both "B" and "BAB" are in dict., then pruning at "BA" * would + * miss "BAB". + * + * (example courtesy of Richard Bann ) + + if (node->parent->eow && dist > trienode_get_dist (node->parent)) + return 1; + + */ + + if (nodevec->nodes[0] && + dist < trienode_get_dist (nodevec->nodes[0])) { + /* improving over the findings so far */ + trienodevec_clear (nodevec); + nodevec->nodes[0] = node; + } else if (!nodevec->nodes[0] || + dist == trienode_get_dist (nodevec->nodes[0])) { + /* as good as the best so far, add if there is free space */ + for (i = 0; i < nodevec->cnt; i++) { + if (!nodevec->nodes[i]) { + nodevec->nodes[i] = node; + break; + } + } + } + + return 0; +} + + +int +trie_measure (trie_t *trie, const char *word, trienode_t **nodes, + int nodecnt) +{ + struct trienodevec nodevec = {0,}; + + nodevec.nodes = nodes; + nodevec.cnt = nodecnt; + + return trie_measure_vec (trie, word, &nodevec); +} + + +int +trie_measure_vec (trie_t *trie, const char *word, struct trienodevec *nodevec) +{ + struct trienodevec_w nodevec_w = {0,}; + int ret = 0; + + trie->len = strlen (word); + + trienodevec_clear (nodevec); + nodevec_w.vec = nodevec; + nodevec_w.word = word; + + ret = trie_walk (trie, collect_closest, &nodevec_w, 0); + if (ret > 0) + ret = 0; + + return ret; +} + + +static int +trienode_reset (trienode_t *node, void *data) +{ + if (node->data) + GF_FREE (node->data); + + return 0; +} + + +void +trie_reset_search (trie_t *trie) +{ + trie->len = 0; + + trie_walk (trie, trienode_reset, NULL, 0); +} diff --git a/libglusterfs/src/trie.h b/libglusterfs/src/trie.h new file mode 100644 index 000000000..a670c2a27 --- /dev/null +++ b/libglusterfs/src/trie.h @@ -0,0 +1,60 @@ +/* + Copyright (c) 2007-2010 Gluster, Inc. + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see + . +*/ + +#ifndef _TRIE_H_ +#define _TRIE_H_ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +struct trienode; +typedef struct trienode trienode_t; + +struct trie; +typedef struct trie trie_t; + +struct trienodevec { + trienode_t **nodes; + unsigned cnt; +}; + + +trie_t *trie_new (); + +int trie_add (trie_t *trie, const char *word); + +void trie_destroy (trie_t *trie); + +void trie_destroy_bynode (trienode_t *node); + +int trie_measure (trie_t *trie, const char *word, trienode_t **nodes, + int nodecnt); + +int trie_measure_vec (trie_t *trie, const char *word, + struct trienodevec *nodevec); + +void trie_reset_search (trie_t *trie); + +int trienode_get_dist (trienode_t *node); + +int trienode_get_word (trienode_t *node, char **buf); + +#endif -- cgit