diff options
author | Shehjar Tikoo <shehjart@gluster.com> | 2010-05-18 01:37:42 +0000 |
---|---|---|
committer | Anand V. Avati <avati@dev.gluster.com> | 2010-05-21 00:32:04 -0700 |
commit | c705b679fbe41aa9ac4486ebf46d3b2ae95d1628 (patch) | |
tree | a02d7314bacd0b720fe4b700a000153928d0df42 | |
parent | 3b88b2dfbc1a33a6967cfedf63a615ee1e81e49d (diff) |
nfs: Introduce trusted-write and trusted-sync options
Introduces two new options:
1. nfs3.*.trusted-write: Forces UNSTABLE writes to return STABLE to NFS
clients to prevent the clients from sending a COMMIT. STABLE writes
are still handled in a sync manner and so are COMMITs if they're sent
at all.
2. nfs3.*.trusted-sync: Forces all WRITEs and COMMITs to return STABLE
return flags to NFS clients to avoid the overhead of STABLE writes, and
COMMITs that follow UNSTABLE writes. This includes the trusted-write
functionality. In addition to the trusted-write, it also writes
STABLE writes in an UNSTABLE manner.
Both violate the NFS protocol but allow better write perf in most
configurations. Use with caution.
Signed-off-by: Shehjar Tikoo <shehjart@gluster.com>
Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
BUG: 924 (Slow NFS synchronous writes)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=924
-rw-r--r-- | xlators/nfs/server/src/nfs.c | 23 | ||||
-rw-r--r-- | xlators/nfs/server/src/nfs3.c | 164 | ||||
-rw-r--r-- | xlators/nfs/server/src/nfs3.h | 2 |
3 files changed, 183 insertions, 6 deletions
diff --git a/xlators/nfs/server/src/nfs.c b/xlators/nfs/server/src/nfs.c index 65ff55c76c1..cb5f19ef992 100644 --- a/xlators/nfs/server/src/nfs.c +++ b/xlators/nfs/server/src/nfs.c @@ -601,6 +601,29 @@ struct volume_options options[] = { .description = "Type of access desired for this subvolume: " " read-only, read-write(default)" }, + { .key = {"nfs3.*.trusted-write"}, + .type = GF_OPTION_TYPE_BOOL, + .description = "On an UNSTABLE write from client, return STABLE flag" + " to force client to not send a COMMIT request. In " + "some environments, combined with a replicated " + "GlusterFS setup, this option can improve write " + "performance. This flag allows user to trust Gluster" + " replication logic to sync data to the disks and " + "recover when required. COMMIT requests if received " + "will be handled in a default manner by fsyncing." + " STABLE writes are still handled in a sync manner. " + "Off by default." + + }, + { .key = {"nfs3.*.trusted-sync"}, + .type = GF_OPTION_TYPE_BOOL, + .description = "All writes and COMMIT requests are treated as async." + " This implies that no write requests are guaranteed" + " to be on server disks when the write reply is " + "received at the NFS client. Trusted sync includes " + " trusted-write behaviour. Off by default." + + }, { .key = {"rpc-auth.auth-unix"}, .type = GF_OPTION_TYPE_BOOL, .description = "Disable or enable the AUTH_UNIX authentication type." diff --git a/xlators/nfs/server/src/nfs3.c b/xlators/nfs/server/src/nfs3.c index b5db1b96628..385a4a3747b 100644 --- a/xlators/nfs/server/src/nfs3.c +++ b/xlators/nfs/server/src/nfs3.c @@ -141,6 +141,9 @@ } while (0) \ +#define nfs3_export_sync_trusted(nf3stt, xlid) ((nf3stt)->exports[xlid]).trusted_sync +#define nfs3_export_write_trusted(nf3stt, xlid) ((nf3stt)->exports[xlid]).trusted_write + int nfs3_solaris_zerolen_fh (struct nfs3_fh *fh, int fhlen) { @@ -1594,6 +1597,73 @@ nfs3svc_write_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } +/* + * If this logic determines that the write should return a reply to the client + * after this function, the return value is -1 and the writetype is reset to + * the type of write we want to signify to the client. + * + * In case the write should continue to serve the request according to the type + * of stable write, a 0 is returned and writetype is left as it is. + */ +int +nfs3_write_how (int *writetype, int write_trusted, int sync_trusted) +{ + int ret = -1; + + if (*writetype == UNSTABLE) { + /* On an UNSTABLE write, only return STABLE when trusted-write + * is set. TW is also set when trusted-sync is set. + */ + if (write_trusted) + *writetype = FILE_SYNC; + + goto err; + } else if ((*writetype == DATA_SYNC) || (*writetype == FILE_SYNC)) { + + /* On a STABLE write, if sync-trusted is on, only then, return + * without syncing. + */ + if (sync_trusted) + goto err; + } + + ret = 0; +err: + return ret; +} + + +/* + * Before going into the write reply logic, here is a matrix that shows the + * requirements for a write reply as given by RFC1813. + * + * Requested Write Type || Possible Returns + * ============================================== + * FILE_SYNC || FILE_SYNC + * DATA_SYNC || DATA_SYNC or FILE_SYNC + * UNSTABLE || DATA_SYNC or FILE_SYNC or UNSTABLE + * + * Write types other than UNSTABLE are together called STABLE. + * RS - Return Stable + * RU - Return Unstable + * WS - Write Stable + * WU - Write Unstable + * + *+============================================+ + *| Vol Opts -> || trusted-write| trusted-sync | + *| Write Type || | | + *|-------------||--------------|--------------| + *| STABLE || WS | WU | + *| || RS | RS | + *|-------------||--------------|--------------| + *| UNSTABLE || WU | WU | + *| || RS | RS | + *|-------------||--------------|--------------| + *| COMMIT || fsync | getattr | + *+============================================+ + * + * + */ int32_t nfs3svc_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, @@ -1604,6 +1674,8 @@ nfs3svc_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this, nfs_user_t nfu = {0, }; nfs3_call_state_t *cs = NULL; struct nfs3_state *nfs3 = NULL; + int write_trusted = 0; + int sync_trusted = 0; cs = frame->local; nfs3 = rpcsvc_request_program_private (cs->req); @@ -1612,11 +1684,15 @@ nfs3svc_write_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto err; } - /* So that we do send a reply if an unstable write was requested. */ - ret = -1; stat = NFS3_OK; cs->maxcount = op_ret; - if (cs->writetype == UNSTABLE) + + write_trusted = nfs3_export_write_trusted (cs->nfs3state, + cs->resolvefh.xlatorid); + sync_trusted = nfs3_export_sync_trusted (cs->nfs3state, + cs->resolvefh.xlatorid); + ret = nfs3_write_how (&cs->writetype, write_trusted, sync_trusted); + if (ret == -1) goto err; nfs_request_user_init (&nfu, cs->req); @@ -4386,6 +4462,13 @@ nfs3_commit_resume (void *carg) cs = (nfs3_call_state_t *)carg; nfs3_check_fh_resolve_status (cs, stat, nfs3err); + + if (nfs3_export_sync_trusted (cs->nfs3state, cs->resolvefh.xlatorid)) { + ret = -1; + stat = NFS3_OK; + goto nfs3err; + } + nfs_request_user_init (&nfu, cs->req); ret = nfs_fsync (cs->nfsx, cs->vol, &nfu, cs->fd, 0, nfs3svc_commit_cbk, cs); @@ -4396,7 +4479,8 @@ nfs3err: if (ret < 0) { nfs3_log_common_res (rpcsvc_request_xid (cs->req), "COMMIT", stat, -ret); - nfs3_commit_reply (cs->req, stat, 0, NULL, NULL); + nfs3_commit_reply (cs->req, stat, cs->nfs3state->serverstart, + NULL, NULL); nfs3_call_state_wipe (cs); ret = 0; } @@ -4650,6 +4734,7 @@ nfs3_init_subvolume_options (struct nfs3_export *exp, dict_t *options) char *optstr = NULL; char searchkey[1024]; char *name = NULL; + gf_boolean_t boolt = _gf_false; if ((!exp) || (!options)) return -1; @@ -4694,8 +4779,75 @@ nfs3_init_subvolume_options (struct nfs3_export *exp, dict_t *options) } } - gf_log (GF_NFS3, GF_LOG_TRACE, "%s: %s", exp->subvol->name, - (exp->access == GF_NFS3_VOLACCESS_RO)?"read-only":"read-write"); + exp->trusted_sync = 0; + ret = snprintf (searchkey, 1024, "nfs3.%s.trusted-sync", name); + if (ret < 0) { + gf_log (GF_NFS3, GF_LOG_ERROR, "snprintf failed"); + ret = -1; + goto err; + } + + if (dict_get (options, searchkey)) { + ret = dict_get_str (options, searchkey, &optstr); + if (ret < 0) { + gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to read " + " option: %s", searchkey); + ret = -1; + goto err; + } + + ret = gf_string2boolean (optstr, &boolt); + if (ret < 0) { + gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to convert str " + "to gf_boolean_t"); + ret = -1; + goto err; + } + + if (boolt == _gf_true) + exp->trusted_sync = 1; + } + + exp->trusted_write = 0; + ret = snprintf (searchkey, 1024, "nfs3.%s.trusted-write", name); + if (ret < 0) { + gf_log (GF_NFS3, GF_LOG_ERROR, "snprintf failed"); + ret = -1; + goto err; + } + + if (dict_get (options, searchkey)) { + ret = dict_get_str (options, searchkey, &optstr); + if (ret < 0) { + gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to read " + " option: %s", searchkey); + ret = -1; + goto err; + } + + ret = gf_string2boolean (optstr, &boolt); + if (ret < 0) { + gf_log (GF_NFS3, GF_LOG_ERROR, "Failed to convert str " + "to gf_boolean_t"); + ret = -1; + goto err; + } + + if (boolt == _gf_true) + exp->trusted_write = 1; + } + + /* If trusted-sync is on, then we also switch on trusted-write because + * tw is included in ts. In write logic, we're then only checking for + * tw. + */ + if (exp->trusted_sync) + exp->trusted_write = 1; + + gf_log (GF_NFS3, GF_LOG_TRACE, "%s: %s, %s, %s", exp->subvol->name, + (exp->access == GF_NFS3_VOLACCESS_RO)?"read-only":"read-write", + (exp->trusted_sync == 0)?"no trusted_sync":"trusted_sync", + (exp->trusted_write == 0)?"no trusted_write":"trusted_write"); ret = 0; err: return ret; diff --git a/xlators/nfs/server/src/nfs3.h b/xlators/nfs/server/src/nfs3.h index bb5fbb75033..ccdad447735 100644 --- a/xlators/nfs/server/src/nfs3.h +++ b/xlators/nfs/server/src/nfs3.h @@ -84,6 +84,8 @@ struct nfs3_fd_entry { struct nfs3_export { xlator_t *subvol; int access; + int trusted_sync; + int trusted_write; }; #define GF_NFS3_DEFAULT_VOLACCESS (GF_NFS3_VOLACCESS_RW) |