diff options
Diffstat (limited to 'xlators/nfs')
-rw-r--r-- | xlators/nfs/server/src/nfs-fops.c | 159 | ||||
-rw-r--r-- | xlators/nfs/server/src/nfs-mem-types.h | 1 | ||||
-rw-r--r-- | xlators/nfs/server/src/nfs.c | 30 | ||||
-rw-r--r-- | xlators/nfs/server/src/nfs.h | 29 | ||||
-rw-r--r-- | xlators/nfs/server/src/nlm4.c | 5 |
5 files changed, 216 insertions, 8 deletions
diff --git a/xlators/nfs/server/src/nfs-fops.c b/xlators/nfs/server/src/nfs-fops.c index 6e2b334842b..87c511d546f 100644 --- a/xlators/nfs/server/src/nfs-fops.c +++ b/xlators/nfs/server/src/nfs-fops.c @@ -22,6 +22,9 @@ #include "config.h" #endif +#include <grp.h> +#include <pwd.h> + #include "dict.h" #include "xlator.h" #include "iobuf.h" @@ -32,9 +35,143 @@ #include "inode.h" #include "nfs-common.h" #include "nfs3-helpers.h" +#include "nfs-mem-types.h" #include <libgen.h> #include <semaphore.h> +/* + * We treat this as a very simple set-associative LRU cache, with entries aged + * out after a configurable interval. Hardly rocket science, but lots of + * details to worry about. + */ +#define BUCKET_START(p,n) ((p) + ((n) * AUX_GID_CACHE_ASSOC)) + +void +nfs_fix_groups (xlator_t *this, call_stack_t *root) +{ + struct passwd mypw; + char mystrs[1024]; + struct passwd *result; + gid_t mygroups[GF_MAX_AUX_GROUPS]; + int ngroups; + int i; + struct nfs_state *priv = this->private; + aux_gid_list_t *agl = NULL; + int bucket = 0; + time_t now = 0; + + if (!priv->server_aux_gids) { + return; + } + + LOCK(&priv->aux_gid_lock); + now = time(NULL); + bucket = root->uid % priv->aux_gid_nbuckets; + agl = BUCKET_START(priv->aux_gid_cache,bucket); + for (i = 0; i < AUX_GID_CACHE_ASSOC; ++i, ++agl) { + if (!agl->gid_list) { + continue; + } + if (agl->uid != root->uid) { + continue; + } + /* + * We don't put new entries in the cache when expiration=0, but + * there might be entries still in there if expiration was + * changed very recently. Writing the check this way ensures + * that they're not used. + */ + if (now < agl->deadline) { + for (ngroups = 0; ngroups < agl->gid_count; ++ngroups) { + root->groups[ngroups] = agl->gid_list[ngroups]; + } + UNLOCK(&priv->aux_gid_lock); + root->ngrps = ngroups; + return; + } + /* + * We're not going to find any more UID matches, and reaping + * is handled further down to maintain LRU order. + */ + break; + } + UNLOCK(&priv->aux_gid_lock); + + if (getpwuid_r(root->uid,&mypw,mystrs,sizeof(mystrs),&result) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "getpwuid_r(%u) failed", root->uid); + return; + } + + if (!result) { + gf_log (this->name, GF_LOG_ERROR, + "getpwuid_r(%u) found nothing", root->uid); + return; + } + + gf_log (this->name, GF_LOG_TRACE, "mapped %u => %s", + root->uid, result->pw_name); + + ngroups = GF_MAX_AUX_GROUPS; + if (getgrouplist(result->pw_name,root->gid,mygroups,&ngroups) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "could not map %s to group list", result->pw_name); + return; + } + + if (priv->aux_gid_max_age) { + LOCK(&priv->aux_gid_lock); + /* Bucket should still be valid from before. */ + agl = BUCKET_START(priv->aux_gid_cache,bucket); + for (i = 0; i < AUX_GID_CACHE_ASSOC; ++i, ++agl) { + if (!agl->gid_list) { + break; + } + } + /* + * The way we allocate free entries naturally places the newest + * ones at the highest indices, so evicting the lowest makes + * sense, but that also means we can't just replace it with the + * one that caused the eviction. That would cause us to thrash + * the first entry while others remain idle. Therefore, we + * need to slide the other entries down and add the new one at + * the end just as if the *last* slot had been free. + * + * Deadline expiration is also handled here, since the oldest + * expired entry will be in the first position. This does mean + * the bucket can stay full of expired entries if we're idle + * but, if the small amount of extra memory or scan time before + * we decide to evict someone ever become issues, we could + * easily add a reaper thread. + */ + if (i >= AUX_GID_CACHE_ASSOC) { + agl = BUCKET_START(priv->aux_gid_cache,bucket); + GF_FREE(agl->gid_list); + for (i = 1; i < AUX_GID_CACHE_ASSOC; ++i) { + agl[0] = agl[1]; + ++agl; + } + } + agl->gid_list = GF_CALLOC(ngroups,sizeof(gid_t), + gf_nfs_mt_aux_gids); + if (agl->gid_list) { + /* It's not fatal if the alloc failed. */ + agl->uid = root->uid; + agl->gid_count = ngroups; + memcpy(agl->gid_list,mygroups,sizeof(gid_t)*ngroups); + agl->deadline = now + priv->aux_gid_max_age; + } + UNLOCK(&priv->aux_gid_lock); + } + + for (i = 0; i < ngroups; ++i) { + gf_log (this->name, GF_LOG_TRACE, + "%s is in group %u", result->pw_name, mygroups[i]); + root->groups[i] = mygroups[i]; + } + root->ngrps = ngroups; +} + struct nfs_fop_local * nfs_fop_local_init (xlator_t *nfsx) { @@ -122,18 +259,24 @@ nfs_create_frame (xlator_t *xl, nfs_user_t *nfu) frame->root->uid = nfu->uid; frame->root->gid = nfu->gids[NFS_PRIMGID_IDX]; frame->root->lk_owner = nfu->lk_owner; - if (nfu->ngrps == 1) - goto err; /* Done, we only got primary gid */ - frame->root->ngrps = nfu->ngrps - 1; + if (nfu->ngrps != 1) { + frame->root->ngrps = nfu->ngrps - 1; - gf_log (GF_NFS, GF_LOG_TRACE,"uid: %d, gid %d, gids: %d", - frame->root->uid, frame->root->gid, frame->root->ngrps); - for(y = 0, x = 1; y < frame->root->ngrps; x++,y++) { - gf_log (GF_NFS, GF_LOG_TRACE, "gid: %d", nfu->gids[x]); - frame->root->groups[y] = nfu->gids[x]; + gf_log (GF_NFS, GF_LOG_TRACE,"uid: %d, gid %d, gids: %d", + frame->root->uid, frame->root->gid, frame->root->ngrps); + for(y = 0, x = 1; y < frame->root->ngrps; x++,y++) { + gf_log (GF_NFS, GF_LOG_TRACE, "gid: %d", nfu->gids[x]); + frame->root->groups[y] = nfu->gids[x]; + } } + /* + * It's tempting to do this *instead* of using nfu above, but we need + * to have those values in case nfs_fix_groups doesn't do anything. + */ + nfs_fix_groups(xl,frame->root); + err: return frame; } diff --git a/xlators/nfs/server/src/nfs-mem-types.h b/xlators/nfs/server/src/nfs-mem-types.h index d9edc95b90c..de25b08a826 100644 --- a/xlators/nfs/server/src/nfs-mem-types.h +++ b/xlators/nfs/server/src/nfs-mem-types.h @@ -52,6 +52,7 @@ enum gf_nfs_mem_types_ { gf_nfs_mt_nlm4_fde, gf_nfs_mt_nlm4_nlmclnt, gf_nfs_mt_nlm4_share, + gf_nfs_mt_aux_gids, gf_nfs_mt_end }; #endif diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c index 6ed3614296f..ba63bcd7a8c 100644 --- a/xlators/nfs/server/src/nfs.c +++ b/xlators/nfs/server/src/nfs.c @@ -41,6 +41,10 @@ #include "nfs-mem-types.h" #include "nfs3-helpers.h" #include "nlm4.h" +#include "options.h" + +#define OPT_SERVER_AUX_GIDS "nfs.server-aux-gids" +#define OPT_SERVER_GID_CACHE_TIMEOUT "nfs.server.aux-gid-timeout" /* Every NFS version must call this function with the init function * for its particular version. @@ -730,6 +734,11 @@ nfs_init_state (xlator_t *this) } } + GF_OPTION_INIT (OPT_SERVER_AUX_GIDS, nfs->server_aux_gids, + bool, free_foppool); + GF_OPTION_INIT (OPT_SERVER_GID_CACHE_TIMEOUT,nfs->aux_gid_max_age, + uint32, free_foppool); + if (stat("/sbin/rpc.statd", &stbuf) == -1) { gf_log (GF_NFS, GF_LOG_WARNING, "/sbin/rpc.statd not found. " "Disabling NLM"); @@ -818,6 +827,9 @@ init (xlator_t *this) { goto err; } + LOCK_INIT(&nfs->aux_gid_lock); + nfs->aux_gid_nbuckets = AUX_GID_CACHE_BUCKETS; + gf_log (GF_NFS, GF_LOG_INFO, "NFS service started"); err: @@ -1223,6 +1235,24 @@ struct volume_options options[] = { "Needed by Solaris NFS clients if NLM support is" "needed" }, + { .key = {OPT_SERVER_AUX_GIDS}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Let the server look up which groups a user belongs " + "to, overwriting the list passed from the client. " + "This enables support for group lists longer than " + "can be passed through the NFS protocol, but is not " + "secure unless users and groups are well synchronized " + "between clients and servers." + }, + { .key = {OPT_SERVER_GID_CACHE_TIMEOUT}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = 3600, + .default_value = "5", + .description = "Number of seconds to cache auxiliary-GID data, when " + OPT_SERVER_AUX_GIDS " is set." + }, { .key = {NULL} }, }; diff --git a/xlators/nfs/server/src/nfs.h b/xlators/nfs/server/src/nfs.h index 4c6d039f8d2..d2a0c134318 100644 --- a/xlators/nfs/server/src/nfs.h +++ b/xlators/nfs/server/src/nfs.h @@ -65,6 +65,27 @@ struct nfs_initer_list { rpcsvc_program_t *program; }; +/* + * TBD: make the cache size tunable + * + * The current size represents a pretty trivial amount of memory, and should + * provide good hit rates even for quite busy systems. If we ever want to + * support really large cache sizes, we'll need to do dynamic allocation + * instead of just defining an array within nfs_state. It doesn't make a + * whole lot of sense to change the associativity, because it won't improve + * hit rates all that much and will increase the maintenance cost as we have + * to scan more entries with every lookup/update. + */ +#define AUX_GID_CACHE_ASSOC 4 +#define AUX_GID_CACHE_BUCKETS 256 +#define AUX_GID_CACHE_SIZE (AUX_GID_CACHE_ASSOC * AUX_GID_CACHE_BUCKETS) + +typedef struct { + uid_t uid; + int gid_count; + gid_t *gid_list; + time_t deadline; +} aux_gid_list_t; struct nfs_state { rpcsvc_t *rpcsvc; @@ -88,6 +109,11 @@ struct nfs_state { int enable_nlm; int mount_udp; struct rpc_clnt *rpc_clnt; + gf_boolean_t server_aux_gids; + gf_lock_t aux_gid_lock; + uint32_t aux_gid_max_age; + unsigned int aux_gid_nbuckets; + aux_gid_list_t aux_gid_cache[AUX_GID_CACHE_SIZE]; }; #define gf_nfs_dvm_on(nfsstt) (((struct nfs_state *)nfsstt)->dynamicvolumes == GF_NFS_DVM_ON) @@ -126,4 +152,7 @@ nfs_request_primary_user_init (nfs_user_t *nfu, rpcsvc_request_t *req, uid_t uid, gid_t gid); extern int nfs_subvolume_started (struct nfs_state *nfs, xlator_t *xl); + +extern void +nfs_fix_groups (xlator_t *this, call_stack_t *root); #endif diff --git a/xlators/nfs/server/src/nlm4.c b/xlators/nfs/server/src/nlm4.c index 5acc1b80978..98012203bb5 100644 --- a/xlators/nfs/server/src/nlm4.c +++ b/xlators/nfs/server/src/nlm4.c @@ -646,6 +646,11 @@ nlm4_file_open_and_resume(nfs3_call_state_t *cs, nlm4_resume_fn_t resume) frame->root->uid = 0; frame->root->gid = 0; frame->local = cs; + /* + * This is the only place that we call STACK_WIND without nfs_fix_groups, + * because in this particular case the relevant identify is in lk_owner and + * we don't care about the fields that nfs_fix_groups would set up. + */ STACK_WIND_COOKIE (frame, nlm4_file_open_cbk, cs->vol, cs->vol, cs->vol->fops->open, &cs->resolvedloc, O_RDWR, cs->fd, NULL); |