libglusterfs,mount/fuse: implement gidcache mechanism in fuse-bridge

This change genericizes the cache mechanism implemented in commit 8efd2845 into libglusterfs/src/gidcache.[ch] and adds fuse-bridge as a client. The cache mechanism is fundamentally equivalent, with some minor changes: - Change cache key from uid_t to uint64_t. - Modify the cache add logic to locate and use an entry with a matching ID, should it already exist. This addresses a bug in the existing mechanism where an expired entry supercedes a newly added entry in lookup, causing repeated adds and flushing of a cache bucket. The fuse group cache is disabled by default. It can be enabled via the 'gid-timeout' fuse-bridge translator option and accompanying mount option (i.e., '-o gid-timeout=1' for a 1s entry timeout). BUG: 800892 Change-Id: I0b34a2263ca48dbb154790a4a44fc70b733e9114 Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-on: http://review.gluster.com/3676 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-by: Anand Avati <avati@redhat.com>
author: Brian Foster <bfoster@redhat.com> 2012-07-16 13:51:09 -0400
committer: Anand Avati <avati@redhat.com> 2012-07-17 08:11:48 -0700
commit: 59ff893d11844eb52453ce4f7f098df05fcde174 (patch)
tree: 25d332376a461e09770e7dfdd88e7cfc13efea4b
parent: 911603eb0e1c85e79cf261f99f442c833ead8178 (diff)
14 files changed, 353 insertions, 120 deletions
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 93e840606c9..acbed554d15 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -181,6 +181,9 @@ static struct argp_option gf_options[] = {
         {"attribute-timeout", ARGP_ATTRIBUTE_TIMEOUT_KEY, "SECONDS", 0,
          "Set attribute timeout to SECONDS for inodes in fuse kernel module "
          "[default: 1]"},
+	{"gid-timeout", ARGP_GID_TIMEOUT_KEY, "SECONDS", 0,
+	 "Set auxilary group list timeout to SECONDS for fuse translator "
+	 "[default: 0]"},
         {"client-pid", ARGP_CLIENT_PID_KEY, "PID", OPTION_HIDDEN,
          "client will authenticate itself with process id PID to server"},
         {"user-map-root", ARGP_USER_MAP_ROOT_KEY, "USER", OPTION_HIDDEN,
@@ -381,6 +384,16 @@ create_fuse_mount (glusterfs_ctx_t *ctx)
 		}
 	}
 
+	if (cmd_args->gid_timeout) {
+		ret = dict_set_int32(master->options, "gid-timeout",
+			cmd_args->gid_timeout);
+		if (ret < 0) {
+			gf_log("glusterfsd", GF_LOG_ERROR, "failed to set dict "
+				"value for key gid-timeout");
+			goto err;
+		}
+	}
+
         switch (cmd_args->fuse_direct_io_mode) {
         case GF_OPTION_DISABLE: /* disable */
                 ret = dict_set_static_ptr (master->options, ZR_DIRECT_IO_OPT,
@@ -827,6 +840,13 @@ parse_opts (int key, char *arg, struct argp_state *state)
 	case ARGP_FOPEN_KEEP_CACHE_KEY:
 		cmd_args->fopen_keep_cache = 1;
 		break;
+
+	case ARGP_GID_TIMEOUT_KEY:
+		if (!gf_string2int(arg, &cmd_args->gid_timeout))
+			break;
+
+		argp_failure(state, -1, 0, "unknown group list timeout %s", arg);
+		break;
 	}
 
         return 0;
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
index 382a8cc71fc..5c00acb5db8 100644
--- a/glusterfsd/src/glusterfsd.h
+++ b/glusterfsd/src/glusterfsd.h
@@ -88,6 +88,7 @@ enum argp_option_keys {
         ARGP_MEM_ACCOUNTING_KEY           = 157,
         ARGP_SELINUX_KEY                  = 158,
 	ARGP_FOPEN_KEEP_CACHE_KEY	  = 159,
+	ARGP_GID_TIMEOUT_KEY		  = 160,
 };
 
 struct _gfd_vol_top_priv_t {
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am
index 2dab8735c96..bcfe467af62 100644
--- a/libglusterfs/src/Makefile.am
+++ b/libglusterfs/src/Makefile.am
@@ -22,7 +22,8 @@ libglusterfs_la_SOURCES = dict.c xlator.c logging.c \
 	$(CONTRIBDIR)/uuid/parse.c $(CONTRIBDIR)/uuid/unparse.c \
 	$(CONTRIBDIR)/uuid/uuid_time.c $(CONTRIBDIR)/uuid/compare.c \
 	$(CONTRIBDIR)/uuid/isnull.c $(CONTRIBDIR)/uuid/unpack.c syncop.c \
-	graph-print.c trie.c run.c options.c fd-lk.c circ-buff.c event-history.c
+	graph-print.c trie.c run.c options.c fd-lk.c circ-buff.c \
+	event-history.c gidcache.c
 
 nodist_libglusterfs_la_SOURCES = y.tab.c graph.lex.c
 
@@ -36,7 +37,7 @@ noinst_HEADERS = common-utils.h defaults.h dict.h glusterfs.h hashfn.h \
 	rbthash.h iatt.h latency.h mem-types.h $(CONTRIBDIR)/uuid/uuidd.h \
 	$(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h \
 	$(CONTRIB_BUILDDIR)/uuid/uuid_types.h syncop.h graph-utils.h trie.h run.h \
-	options.h lkowner.h fd-lk.h circ-buff.h event-history.h
+	options.h lkowner.h fd-lk.h circ-buff.h event-history.h gidcache.h
 
 EXTRA_DIST = graph.l graph.y
 
diff --git a/libglusterfs/src/gidcache.c b/libglusterfs/src/gidcache.c
new file mode 100644
index 00000000000..9e508536d7e
--- /dev/null
+++ b/libglusterfs/src/gidcache.c
@@ -0,0 +1,181 @@
+/*
+  Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+  This file is part of GlusterFS.
+
+  This file is licensed to you under your choice of the GNU Lesser
+  General Public License, version 3 or any later version (LGPLv3 or
+  later), or the GNU General Public License, version 2 (GPLv2), in all
+  cases as published by the Free Software Foundation.
+*/
+
+#include "gidcache.h"
+#include "mem-pool.h"
+
+/*
+ * We treat this as a very simple set-associative LRU cache, with entries aged
+ * out after a configurable interval.  Hardly rocket science, but lots of
+ * details to worry about.
+ */
+#define BUCKET_START(p,n)       ((p) + ((n) * AUX_GID_CACHE_ASSOC))
+
+/*
+ * Initialize the cache.
+ */
+int gid_cache_init(gid_cache_t *cache, uint32_t timeout)
+{
+	if (!cache)
+		return -1;
+
+	LOCK_INIT(&cache->gc_lock);
+	cache->gc_max_age = timeout;
+	cache->gc_nbuckets = AUX_GID_CACHE_BUCKETS;
+	memset(cache->gc_cache, 0, sizeof(gid_list_t) * AUX_GID_CACHE_SIZE);
+
+	return 0;
+}
+
+/*
+ * Look up an ID in the cache. If found, return the actual cache entry to avoid
+ * an additional allocation and memory copy. The caller should copy the data and
+ * release (unlock) the cache as soon as possible.
+ */
+const gid_list_t *gid_cache_lookup(gid_cache_t *cache, uint64_t id)
+{
+	int bucket;
+	int i;
+	time_t now;
+	const gid_list_t *agl;
+
+	LOCK(&cache->gc_lock);
+	now = time(NULL);
+	bucket = id % cache->gc_nbuckets;
+	agl = BUCKET_START(cache->gc_cache, bucket);
+	for (i = 0; i < AUX_GID_CACHE_ASSOC; i++, agl++) {
+		if (!agl->gl_list)
+			continue;
+		if (agl->gl_id != id)
+			continue;
+
+		/*
+		 * We don't put new entries in the cache when expiration=0, but
+		 * there might be entries still in there if expiration was
+		 * changed very recently.  Writing the check this way ensures
+		 * that they're not used.
+		 */
+		if (now < agl->gl_deadline) {
+			gf_log("gid-cache", GF_LOG_DEBUG, "id %lu gl found - "
+				"bucket %d, index %d", id, bucket, i);
+			return agl;
+		}
+
+		/*
+		 * We're not going to find any more UID matches, and reaping
+		 * is handled further down to maintain LRU order.
+		 */
+		break;
+	}
+	UNLOCK(&cache->gc_lock);
+	return NULL;
+}
+
+/*
+ * Release an entry found via lookup.
+ */
+void gid_cache_release(gid_cache_t *cache, const gid_list_t *agl)
+{
+	UNLOCK(&cache->gc_lock);
+}
+
+/*
+ * Add a new list entry to the cache. If an entry for this ID already exists,
+ * update it.
+ */
+int gid_cache_add(gid_cache_t *cache, gid_list_t *gl)
+{
+	gid_list_t *agl;
+	int bucket;
+	int i;
+	time_t now;
+
+	if (!gl || !gl->gl_list)
+		return -1;
+
+	if (!cache->gc_max_age)
+		return 0;
+
+	LOCK(&cache->gc_lock);
+	now = time(NULL);
+
+	/*
+	 * Scan for the first free entry or one that matches this id. The id
+	 * check is added to address a bug where the cache might contain an
+	 * expired entry for this id. Since lookup occurs in LRU order and
+	 * does not reclaim entries, it will always return failure on discovery
+	 * of an expired entry. This leads to duplicate entries being added,
+	 * which still do not satisfy lookups until the expired entry (and
+	 * everything before it) is reclaimed.
+	 *
+	 * We address this through reuse of an entry already allocated to this
+	 * id, whether expired or not, since we have obviously already received
+	 * more recent data. The entry is repopulated with the new data and a new
+	 * deadline and is pushed forward to reside as the last populated entry in
+	 * the bucket.
+	 */
+	bucket = gl->gl_id % cache->gc_nbuckets;
+	agl = BUCKET_START(cache->gc_cache, bucket);
+	for (i = 0; i < AUX_GID_CACHE_ASSOC; ++i, ++agl) {
+		if (agl->gl_id == gl->gl_id)
+			break;
+		if (!agl->gl_list)
+			break;
+	}
+
+	/*
+	 * The way we allocate free entries naturally places the newest
+	 * ones at the highest indices, so evicting the lowest makes
+	 * sense, but that also means we can't just replace it with the
+	 * one that caused the eviction.  That would cause us to thrash
+	 * the first entry while others remain idle.  Therefore, we
+	 * need to slide the other entries down and add the new one at
+	 * the end just as if the *last* slot had been free.
+	 *
+	 * Deadline expiration is also handled here, since the oldest
+	 * expired entry will be in the first position.  This does mean
+	 * the bucket can stay full of expired entries if we're idle
+	 * but, if the small amount of extra memory or scan time before
+	 * we decide to evict someone ever become issues, we could
+	 * easily add a reaper thread.
+	 */
+
+	if (i >= AUX_GID_CACHE_ASSOC) {
+		/* cache full, evict the first (LRU) entry */
+		i = 0;
+		agl = BUCKET_START(cache->gc_cache, bucket);
+		GF_FREE(agl->gl_list);
+	} else if (agl->gl_list) {
+		/* evict the old entry we plan to reuse */
+		GF_FREE(agl->gl_list);
+	}
+
+	/*
+	 * If we have evicted an entry, slide the subsequent populated entries
+	 * back and populate the last entry.
+	 */
+	for (; i < AUX_GID_CACHE_ASSOC - 1; i++) {
+		if (!agl[1].gl_list)
+			break;
+		agl[0] = agl[1];
+		agl++;
+	}
+
+	agl->gl_id = gl->gl_id;
+	agl->gl_count = gl->gl_count;
+	agl->gl_list = gl->gl_list;
+	agl->gl_deadline = now + cache->gc_max_age;
+
+	gf_log("gid-cache", GF_LOG_DEBUG, "id %lu gl added - bucket %d, index %d",
+		gl->gl_id, bucket, i);
+	UNLOCK(&cache->gc_lock);
+
+	return 1;
+}
diff --git a/libglusterfs/src/gidcache.h b/libglusterfs/src/gidcache.h
new file mode 100644
index 00000000000..f904f26eb97
--- /dev/null
+++ b/libglusterfs/src/gidcache.h
@@ -0,0 +1,52 @@
+/*
+  Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+  This file is part of GlusterFS.
+
+  This file is licensed to you under your choice of the GNU Lesser
+  General Public License, version 3 or any later version (LGPLv3 or
+  later), or the GNU General Public License, version 2 (GPLv2), in all
+  cases as published by the Free Software Foundation.
+*/
+
+#ifndef __GIDCACHE_H__
+#define __GIDCACHE_H__
+
+#include "glusterfs.h"
+#include "locking.h"
+
+/*
+ * TBD: make the cache size tunable
+ *
+ * The current size represents a pretty trivial amount of memory, and should
+ * provide good hit rates even for quite busy systems.  If we ever want to
+ * support really large cache sizes, we'll need to do dynamic allocation
+ * instead of just defining an array within a private structure. It doesn't make
+ * a whole lot of sense to change the associativity, because it won't improve
+ * hit rates all that much and will increase the maintenance cost as we have
+ * to scan more entries with every lookup/update.
+ */
+
+#define AUX_GID_CACHE_ASSOC     4
+#define AUX_GID_CACHE_BUCKETS   256
+#define AUX_GID_CACHE_SIZE      (AUX_GID_CACHE_ASSOC * AUX_GID_CACHE_BUCKETS)
+
+typedef struct {
+	uint64_t	gl_id;
+	int		gl_count;
+	gid_t		*gl_list;
+	time_t		gl_deadline;
+} gid_list_t;
+
+typedef struct {
+	gf_lock_t	gc_lock;
+	uint32_t	gc_max_age;
+	unsigned int	gc_nbuckets;
+	gid_list_t	gc_cache[AUX_GID_CACHE_SIZE];
+} gid_cache_t;
+
+int gid_cache_init(gid_cache_t *, uint32_t);
+const gid_list_t *gid_cache_lookup(gid_cache_t *, uint64_t);
+void gid_cache_release(gid_cache_t *, const gid_list_t *);
+int gid_cache_add(gid_cache_t *, gid_list_t *);
+
+#endif /* __GIDCACHE_H__ */
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 357284e27b4..7e2c8135600 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -286,6 +286,7 @@ struct _cmd_args {
         int              worm;
         int              mac_compat;
 	int		 fopen_keep_cache;
+	int		 gid_timeout;
 	struct list_head xlator_options;  /* list of xlator_option_t */
 
 	/* fuse options */
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 360e1443cf2..3a620ac7e7f 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -4604,6 +4604,15 @@ init (xlator_t *this_xl)
 	GF_OPTION_INIT("fopen-keep-cache", priv->fopen_keep_cache, bool,
 		cleanup_exit);
 
+	GF_OPTION_INIT("gid-timeout", priv->gid_cache_timeout, int32,
+		cleanup_exit);
+
+	if (gid_cache_init(&priv->gid_cache, priv->gid_cache_timeout) < 0) {
+		gf_log("glusterfs-fuse", GF_LOG_ERROR, "Failed to initialize "
+			"group cache.");
+		goto cleanup_exit;
+	}
+
         cmd_args = &this_xl->ctx->cmd_args;
         fsname = cmd_args->volfile;
         if (!fsname && cmd_args->volfile_server) {
@@ -4768,5 +4777,9 @@ struct volume_options options[] = {
 	  .type = GF_OPTION_TYPE_BOOL,
 	  .default_value = "false"
 	},
+	{ .key = {"gid-timeout"},
+	  .type = GF_OPTION_TYPE_INT,
+	  .default_value = "0"
+	},
         { .key = {NULL} },
 };
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
index dcd9629246c..bc35eb0617d 100644
--- a/xlators/mount/fuse/src/fuse-bridge.h
+++ b/xlators/mount/fuse/src/fuse-bridge.h
@@ -55,6 +55,7 @@
 #include "list.h"
 #include "dict.h"
 #include "syncop.h"
+#include "gidcache.h"
 
 #if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
 #define FUSE_OP_HIGH (FUSE_POLL + 1)
@@ -110,7 +111,9 @@ struct fuse_private {
         gf_boolean_t         selinux;
         gf_boolean_t         read_only;
 	gf_boolean_t	     fopen_keep_cache;
+	int32_t		     gid_cache_timeout;
         fdtable_t           *fdtable;
+	gid_cache_t	     gid_cache;
 
         /* For fuse-reverse-validation */
         int                  revchan_in;
diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c
index bcad97fc742..729c8fb2cd4 100644
--- a/xlators/mount/fuse/src/fuse-helpers.c
+++ b/xlators/mount/fuse/src/fuse-helpers.c
@@ -245,6 +245,46 @@ out:
 #endif /* GF_LINUX_HOST_OS */
 }
 
+/*
+ * Get the groups for the PID associated with this frame. If enabled,
+ * use the gid cache to reduce group list collection.
+ */
+static void get_groups(fuse_private_t *priv, call_frame_t *frame)
+{
+	int i;
+	const gid_list_t *gl;
+	gid_list_t agl;
+
+	if (!priv->gid_cache_timeout) {
+		frame_fill_groups(frame);
+		return;
+	}
+
+	gl = gid_cache_lookup(&priv->gid_cache, frame->root->pid);
+	if (gl) {
+		frame->root->ngrps = gl->gl_count;
+		for (i = 0; i < gl->gl_count; i++)
+			frame->root->groups[i] = gl->gl_list[i];
+		gid_cache_release(&priv->gid_cache, gl);
+		return;
+	}
+
+	frame_fill_groups (frame);
+
+	agl.gl_id = frame->root->pid;
+	agl.gl_count = frame->root->ngrps;
+	agl.gl_list = GF_CALLOC(frame->root->ngrps, sizeof(gid_t),
+			gf_fuse_mt_gids_t);
+	if (!agl.gl_list)
+		return;
+
+	for (i = 0; i < frame->root->ngrps; i++)
+		agl.gl_list[i] = frame->root->groups[i];
+
+	if (gid_cache_add(&priv->gid_cache, &agl) != 1)
+		GF_FREE(agl.gl_list);
+}
+
 call_frame_t *
 get_call_frame_for_req (fuse_state_t *state)
 {
@@ -272,7 +312,7 @@ get_call_frame_for_req (fuse_state_t *state)
                                           state->lk_owner);
         }
 
-        frame_fill_groups (frame);
+	get_groups(priv, frame);
 
         if (priv && priv->client_pid_set)
                 frame->root->pid = priv->client_pid;
diff --git a/xlators/mount/fuse/src/fuse-mem-types.h b/xlators/mount/fuse/src/fuse-mem-types.h
index 9c6a1c67a7a..d1c5e511b0e 100644
--- a/xlators/mount/fuse/src/fuse-mem-types.h
+++ b/xlators/mount/fuse/src/fuse-mem-types.h
@@ -31,6 +31,7 @@ enum gf_fuse_mem_types_ {
         gf_fuse_mt_fuse_state_t,
         gf_fuse_mt_fd_ctx_t,
         gf_fuse_mt_graph_switch_args_t,
+	gf_fuse_mt_gids_t,
         gf_fuse_mt_end
 };
 #endif
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
index b623d3428b7..e585ba3b7fd 100755
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
@@ -132,6 +132,10 @@ start_glusterfs ()
         cmd_line=$(echo "$cmd_line --entry-timeout=$entry_timeout");
     fi
 
+    if [ -n "$gid_timeout" ]; then
+	cmd_line=$(echo "$cmd_line --gid-timeout=$gid_timeout");
+    fi
+
     if [ -n "$fopen_keep_cache" ]; then
 	cmd_line=$(echo "$cmd_line --fopen-keep-cache");
     fi
@@ -325,6 +329,7 @@ main ()
                             "attribute-timeout")
                                 attribute_timeout=$value ;;
                             "entry-timeout")    entry_timeout=$value ;;
+			    "gid-timeout")	gid_timeout=$value ;;
                             *) echo "unknown option $key (ignored)" ;;
                         esac
                 esac
diff --git a/xlators/nfs/server/src/nfs-fops.c b/xlators/nfs/server/src/nfs-fops.c
index 87c511d546f..e2eedf43398 100644
--- a/xlators/nfs/server/src/nfs-fops.c
+++ b/xlators/nfs/server/src/nfs-fops.c
@@ -39,13 +39,6 @@
 #include <libgen.h>
 #include <semaphore.h>
 
-/*
- * We treat this as a very simple set-associative LRU cache, with entries aged
- * out after a configurable interval.  Hardly rocket science, but lots of
- * details to worry about.
- */
-#define BUCKET_START(p,n)       ((p) + ((n) * AUX_GID_CACHE_ASSOC))
-
 void
 nfs_fix_groups (xlator_t *this, call_stack_t *root)
 {
@@ -56,47 +49,23 @@ nfs_fix_groups (xlator_t *this, call_stack_t *root)
         int              ngroups;
         int              i;
         struct nfs_state *priv = this->private;
-        aux_gid_list_t   *agl = NULL;
-        int              bucket = 0;
-        time_t           now = 0;
+        const gid_list_t *agl;
+	gid_list_t gl;
 
         if (!priv->server_aux_gids) {
                 return;
         }
 
-        LOCK(&priv->aux_gid_lock);
-        now = time(NULL);
-        bucket = root->uid % priv->aux_gid_nbuckets;
-        agl = BUCKET_START(priv->aux_gid_cache,bucket);
-        for (i = 0; i < AUX_GID_CACHE_ASSOC; ++i, ++agl) {
-                if (!agl->gid_list) {
-                        continue;
-                }
-                if (agl->uid != root->uid) {
-                        continue;
-                }
-                /*
-                 * We don't put new entries in the cache when expiration=0, but
-                 * there might be entries still in there if expiration was
-                 * changed very recently.  Writing the check this way ensures
-                 * that they're not used.
-                 */
-                if (now < agl->deadline) {
-                        for (ngroups = 0; ngroups < agl->gid_count; ++ngroups) {
-                                root->groups[ngroups] = agl->gid_list[ngroups];
-                        }
-                        UNLOCK(&priv->aux_gid_lock);
-                        root->ngrps = ngroups;
-                        return;
-                }
-                /*
-                 * We're not going to find any more UID matches, and reaping
-                 * is handled further down to maintain LRU order.
-                 */
-                break;
-        }
-        UNLOCK(&priv->aux_gid_lock);
+	agl = gid_cache_lookup(&priv->gid_cache, root->uid);
+	if (agl) {
+		for (ngroups = 0; ngroups < agl->gl_count; ngroups++) 
+			root->groups[ngroups] = agl->gl_list[ngroups];
+		root->ngrps = ngroups;
+		gid_cache_release(&priv->gid_cache, agl);
+		return;
+	}
 
+	/* No cached list found. */
         if (getpwuid_r(root->uid,&mypw,mystrs,sizeof(mystrs),&result) != 0) {
                 gf_log (this->name, GF_LOG_ERROR,
                         "getpwuid_r(%u) failed", root->uid);
@@ -119,51 +88,18 @@ nfs_fix_groups (xlator_t *this, call_stack_t *root)
                 return;
         }
 
-        if (priv->aux_gid_max_age) {
-                LOCK(&priv->aux_gid_lock);
-                /* Bucket should still be valid from before. */
-                agl = BUCKET_START(priv->aux_gid_cache,bucket);
-                for (i = 0; i < AUX_GID_CACHE_ASSOC; ++i, ++agl) {
-                        if (!agl->gid_list) {
-                                break;
-                        }
-                }
-                /*
-                 * The way we allocate free entries naturally places the newest
-                 * ones at the highest indices, so evicting the lowest makes
-                 * sense, but that also means we can't just replace it with the
-                 * one that caused the eviction.  That would cause us to thrash
-                 * the first entry while others remain idle.  Therefore, we
-                 * need to slide the other entries down and add the new one at
-                 * the end just as if the *last* slot had been free.
-                 *
-                 * Deadline expiration is also handled here, since the oldest
-                 * expired entry will be in the first position.  This does mean
-                 * the bucket can stay full of expired entries if we're idle
-                 * but, if the small amount of extra memory or scan time before
-                 * we decide to evict someone ever become issues, we could
-                 * easily add a reaper thread.
-                 */
-                if (i >= AUX_GID_CACHE_ASSOC) {
-                        agl = BUCKET_START(priv->aux_gid_cache,bucket);
-                        GF_FREE(agl->gid_list);
-                        for (i = 1; i < AUX_GID_CACHE_ASSOC; ++i) {
-                                agl[0] = agl[1];
-                                ++agl;
-                        }
-                }
-                agl->gid_list = GF_CALLOC(ngroups,sizeof(gid_t),
-                                          gf_nfs_mt_aux_gids);
-                if (agl->gid_list) {
-                        /* It's not fatal if the alloc failed. */
-                        agl->uid = root->uid;
-                        agl->gid_count = ngroups;
-                        memcpy(agl->gid_list,mygroups,sizeof(gid_t)*ngroups);
-                        agl->deadline = now + priv->aux_gid_max_age;
-                }
-                UNLOCK(&priv->aux_gid_lock);
-        }
-
+	/* Add the group data to the cache. */
+	gl.gl_list = GF_CALLOC(ngroups, sizeof(gid_t), gf_nfs_mt_aux_gids);
+	if (gl.gl_list) {
+		/* It's not fatal if the alloc failed. */
+		gl.gl_id = root->uid;
+		gl.gl_count = ngroups;
+		memcpy(gl.gl_list, mygroups, sizeof(gid_t) * ngroups);
+		if (gid_cache_add(&priv->gid_cache, &gl) != 1)
+			GF_FREE(gl.gl_list);
+	}
+
+	/* Copy data to the frame. */
         for (i = 0; i < ngroups; ++i) {
                 gf_log (this->name, GF_LOG_TRACE,
                         "%s is in group %u", result->pw_name, mygroups[i]);
diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c
index ba63bcd7a8c..0a5a9d1e34a 100644
--- a/xlators/nfs/server/src/nfs.c
+++ b/xlators/nfs/server/src/nfs.c
@@ -736,9 +736,14 @@ nfs_init_state (xlator_t *this)
 
         GF_OPTION_INIT (OPT_SERVER_AUX_GIDS, nfs->server_aux_gids,
                         bool, free_foppool);
-        GF_OPTION_INIT (OPT_SERVER_GID_CACHE_TIMEOUT,nfs->aux_gid_max_age,
+        GF_OPTION_INIT (OPT_SERVER_GID_CACHE_TIMEOUT, nfs->server_aux_gids_max_age,
                         uint32, free_foppool);
 
+	if (gid_cache_init(&nfs->gid_cache, nfs->server_aux_gids_max_age) < 0) {
+		gf_log(GF_NFS, GF_LOG_ERROR, "Failed to initialize group cache.");
+		goto free_foppool;
+	}
+
         if (stat("/sbin/rpc.statd", &stbuf) == -1) {
                 gf_log (GF_NFS, GF_LOG_WARNING, "/sbin/rpc.statd not found. "
                         "Disabling NLM");
@@ -827,9 +832,6 @@ init (xlator_t *this) {
                 goto err;
         }
 
-        LOCK_INIT(&nfs->aux_gid_lock);
-        nfs->aux_gid_nbuckets = AUX_GID_CACHE_BUCKETS;
-
         gf_log (GF_NFS, GF_LOG_INFO, "NFS service started");
 err:
 
diff --git a/xlators/nfs/server/src/nfs.h b/xlators/nfs/server/src/nfs.h
index d2a0c134318..c3deba00a02 100644
--- a/xlators/nfs/server/src/nfs.h
+++ b/xlators/nfs/server/src/nfs.h
@@ -29,6 +29,7 @@
 #include "dict.h"
 #include "xlator.h"
 #include "lkowner.h"
+#include "gidcache.h"
 
 #define GF_NFS                  "nfs"
 
@@ -65,28 +66,6 @@ struct nfs_initer_list {
         rpcsvc_program_t        *program;
 };
 
-/*
- * TBD: make the cache size tunable
- *
- * The current size represents a pretty trivial amount of memory, and should
- * provide good hit rates even for quite busy systems.  If we ever want to
- * support really large cache sizes, we'll need to do dynamic allocation
- * instead of just defining an array within nfs_state.  It doesn't make a
- * whole lot of sense to change the associativity, because it won't improve
- * hit rates all that much and will increase the maintenance cost as we have
- * to scan more entries with every lookup/update.
- */
-#define AUX_GID_CACHE_ASSOC     4
-#define AUX_GID_CACHE_BUCKETS   256
-#define AUX_GID_CACHE_SIZE      (AUX_GID_CACHE_ASSOC * AUX_GID_CACHE_BUCKETS)
-
-typedef struct {
-        uid_t                   uid;
-        int                     gid_count;
-        gid_t                   *gid_list;
-        time_t                  deadline;
-} aux_gid_list_t;
-
 struct nfs_state {
         rpcsvc_t                *rpcsvc;
         struct list_head        versions;
@@ -110,10 +89,8 @@ struct nfs_state {
         int                     mount_udp;
         struct rpc_clnt         *rpc_clnt;
         gf_boolean_t            server_aux_gids;
-        gf_lock_t               aux_gid_lock;
-        uint32_t                aux_gid_max_age;
-        unsigned int            aux_gid_nbuckets;
-        aux_gid_list_t          aux_gid_cache[AUX_GID_CACHE_SIZE];
+	uint32_t		server_aux_gids_max_age;
+	gid_cache_t		gid_cache;
 };
 
 #define gf_nfs_dvm_on(nfsstt)   (((struct nfs_state *)nfsstt)->dynamicvolumes == GF_NFS_DVM_ON)
author	Brian Foster <bfoster@redhat.com>	2012-07-16 13:51:09 -0400
committer	Anand Avati <avati@redhat.com>	2012-07-17 08:11:48 -0700
commit	59ff893d11844eb52453ce4f7f098df05fcde174 (patch)
tree	25d332376a461e09770e7dfdd88e7cfc13efea4b
parent	911603eb0e1c85e79cf261f99f442c833ead8178 (diff)