diff options
author | Csaba Henk <csaba@redhat.com> | 2018-05-03 10:22:18 +0200 |
---|---|---|
committer | Amar Tumballi <amarts@redhat.com> | 2018-05-04 17:42:12 +0000 |
commit | 2ac79ed8048753dfd2494d3a4d3b0e9411673e3a (patch) | |
tree | 080da10738f9dade1fcb787d43f333c641dffe04 | |
parent | 15866ac9773e89cd9e017e7d3bf8aa01a87edfd8 (diff) |
fuse: add support for kernel writeback cache
- Added kernel-writeback-cache command line and xlator
option for requesting utilisation of the writeback
cache of the kernel in FUSE_INIT (see [1]).
- Added attr-times-granularity command line and xlator
option via which granularity of the {a,m,c}time in
stat (attr) data that we support can be indicated to
kernel. This is a means to avoid divergence of the
attr times between kernel and userspace that could
occur with writeback-cache, while still maintaining
maximum time precision the FUSE server is capable of
(see [2]).
- Handling FATTR_CTIME flag in FUSE_SETATTR that
indicates presence of ctime in setattr payload.
Currently we cannot associate arbitrary ctimes to
files on backend, so we just touch them to update
their ctimes to current time. Having ctimes in setattr
payload is also a side effect of writeback cache
(see [3] and [4]).
[1]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4d99ff8,
"fuse: Turn writeback cache on"
[2]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e27c9d3,
"fuse: fuse: add time_gran to INIT_OUT"
[3]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1e18bda,
"fuse: add .write_inode"
[4]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ab9e13f,
"fuse: allow ctime flushing to userspace"
Updates: #435
Change-Id: Id174c8e0c815c4456c35f8c53e41a6a507d91855
Signed-off-by: Csaba Henk <csaba@redhat.com>
-rw-r--r-- | doc/glusterfs.8 | 6 | ||||
-rw-r--r-- | doc/mount.glusterfs.8 | 6 | ||||
-rw-r--r-- | glusterfsd/src/glusterfsd.c | 70 | ||||
-rw-r--r-- | glusterfsd/src/glusterfsd.h | 2 | ||||
-rw-r--r-- | libglusterfs/src/glusterfs.h | 4 | ||||
-rw-r--r-- | libglusterfs/src/xlator.h | 1 | ||||
-rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.c | 50 | ||||
-rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.h | 4 | ||||
-rwxr-xr-x | xlators/mount/fuse/utils/mount.glusterfs.in | 14 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-inode-fd-ops.c | 17 |
10 files changed, 170 insertions, 4 deletions
diff --git a/doc/glusterfs.8 b/doc/glusterfs.8 index 985f30a5865..592dedb6759 100644 --- a/doc/glusterfs.8 +++ b/doc/glusterfs.8 @@ -109,6 +109,9 @@ Mount subdirectory instead of the '/' of volume. .PP .TP +\fB\-\-attr\-times\-granularity=NANOSECONDS\fR +Declare supported granularity of file attribute times (default is 0 which kernel handles as unspecified; valid real values are between 1 and 1000000000). +.TP \fB\-\-attribute\-timeout=SECONDS\fR Set attribute timeout to SECONDS for inodes in fuse kernel module (the default is 1). .TP @@ -130,6 +133,9 @@ Set entry timeout to SECONDS in fuse kernel module (the default is 1). \fB\-\-gid\-timeout=SECONDS\fR Set auxiliary group list timeout to SECONDS for fuse translator (the default is 0). .TP +\fB\-\-kernel-writeback-cache=BOOL\fR +Enable fuse in-kernel writeback cache. +.TP \fB\-\-negative\-timeout=SECONDS\fR Set negative timeout to SECONDS in fuse kernel module (the default is 0). .TP diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8 index 6a51fc9aef0..0cc7a6f8535 100644 --- a/doc/mount.glusterfs.8 +++ b/doc/mount.glusterfs.8 @@ -133,6 +133,12 @@ enable root squashing for the trusted client [default: on] .TP \fBuse\-readdirp=\fRBOOL Use readdirp() mode in fuse kernel module [default: on] +.TP +\fBkernel\-writeback\-cache=\fRBOOL +Enable fuse in-kernel writeback cache [default: off] +.TP +\fBattr\-times\-granularity=\fRNS +Declare supported granularity of file attribute [default: 0] .PP .SH FILES .TP diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c index b5b8e4d30a7..3de12bc125f 100644 --- a/glusterfsd/src/glusterfsd.c +++ b/glusterfsd/src/glusterfsd.c @@ -256,6 +256,11 @@ static struct argp_option gf_options[] = { OPTION_ARG_OPTIONAL, "disable/enable fuse event-history"}, {"reader-thread-count", ARGP_READER_THREAD_COUNT_KEY, "INTEGER", OPTION_ARG_OPTIONAL, "set fuse reader thread count"}, + {"kernel-writeback-cache", ARGP_KERNEL_WRITEBACK_CACHE_KEY, "BOOL", + OPTION_ARG_OPTIONAL, "enable fuse in-kernel writeback cache"}, + {"attr-times-granularity", ARGP_ATTR_TIMES_GRANULARITY_KEY, "NS", + OPTION_ARG_OPTIONAL, "declare supported granularity of file attribute" + " times in nanoseconds"}, {0, 0, 0, 0, "Miscellaneous Options:"}, {0, } }; @@ -617,6 +622,44 @@ set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options) goto err; } } + switch (cmd_args->kernel_writeback_cache) { + case GF_OPTION_ENABLE: + ret = dict_set_static_ptr(options, "kernel-writeback-cache", + "on"); + if (ret < 0) { + gf_msg ("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4, + "failed to set dict value for key " + "kernel-writeback-cache"); + goto err; + } + break; + case GF_OPTION_DISABLE: + ret = dict_set_static_ptr(options, "kernel-writeback-cache", + "off"); + if (ret < 0) { + gf_msg ("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4, + "failed to set dict value for key " + "kernel-writeback-cache"); + goto err; + } + break; + case GF_OPTION_DEFERRED: /* default */ + default: + gf_msg_debug ("glusterfsd", 0, "kernel-writeback-cache mode %d", + cmd_args->kernel_writeback_cache); + break; + } + if (cmd_args->attr_times_granularity) { + ret = dict_set_uint32 (options, "attr-times-granularity", + cmd_args->attr_times_granularity); + if (ret < 0) { + gf_msg ("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4, + "failed to set dict value for key " + "attr-times-granularity"); + goto err; + } + } + ret = 0; err: @@ -1385,6 +1428,32 @@ no_oom_api: break; + case ARGP_KERNEL_WRITEBACK_CACHE_KEY: + if (!arg) + arg = "yes"; + + if (gf_string2boolean (arg, &b) == 0) { + cmd_args->kernel_writeback_cache = b; + + break; + } + + argp_failure (state, -1, 0, + "unknown kernel writeback cache setting \"%s\"", arg); + break; + case ARGP_ATTR_TIMES_GRANULARITY_KEY: + if (gf_string2uint32 (arg, &cmd_args->attr_times_granularity)) { + argp_failure (state, -1, 0, + "unknown attribute times granularity option %s", + arg); + } else if (cmd_args->attr_times_granularity > 1000000000) { + argp_failure (state, -1, 0, + "Invalid attribute times granularity value %s. " + "Valid range: [\"0, 1000000000\"]", arg); + } + + break; + } return 0; } @@ -1690,6 +1759,7 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx) cmd_args->fuse_attribute_timeout = -1; cmd_args->fuse_entry_timeout = -1; cmd_args->fopen_keep_cache = GF_OPTION_DEFERRED; + cmd_args->kernel_writeback_cache = GF_OPTION_DEFERRED; if (ctx->mem_acct_enable) cmd_args->mem_acct = 1; diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h index 2a03ec09fa2..496a4d95352 100644 --- a/glusterfsd/src/glusterfsd.h +++ b/glusterfsd/src/glusterfsd.h @@ -105,6 +105,8 @@ enum argp_option_keys { ARGP_PRINT_XLATORDIR_KEY = 183, ARGP_PRINT_STATEDUMPDIR_KEY = 184, ARGP_PRINT_LOGDIR_KEY = 185, + ARGP_KERNEL_WRITEBACK_CACHE_KEY = 186, + ARGP_ATTR_TIMES_GRANULARITY_KEY = 187, }; struct _gfd_vol_top_priv { diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 904d4b60d75..90f2762b990 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -532,6 +532,10 @@ struct _cmd_args { char *event_history; int thin_client; uint32_t reader_thread_count; + + /* FUSE writeback cache support */ + int kernel_writeback_cache; + uint32_t attr_times_granularity; }; typedef struct _cmd_args cmd_args_t; diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index 4f18d1cd2a9..d476cf26442 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -33,6 +33,7 @@ #define GF_SET_ATTR_SIZE 0x8 #define GF_SET_ATTR_ATIME 0x10 #define GF_SET_ATTR_MTIME 0x20 +#define GF_SET_ATTR_CTIME 0x40 #define gf_attr_mode_set(mode) ((mode) & GF_SET_ATTR_MODE) #define gf_attr_uid_set(mode) ((mode) & GF_SET_ATTR_UID) diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index f509d84a15b..322b4deeffa 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -1233,6 +1233,11 @@ fattr_to_gf_set_attr (int32_t valid) if (valid & FATTR_MTIME) gf_valid |= GF_SET_ATTR_MTIME; +#if FUSE_KERNEL_MINOR_VERSION >= 23 + if (valid & FATTR_CTIME) + gf_valid |= GF_SET_ATTR_CTIME; +#endif + if (valid & FATTR_SIZE) gf_valid |= GF_SET_ATTR_SIZE; @@ -1271,7 +1276,11 @@ fuse_setattr_resume (fuse_state_t *state) if ((state->valid & (FATTR_MASK)) != FATTR_SIZE) { if (state->fd && !((state->valid & FATTR_ATIME) || - (state->valid & FATTR_MTIME))) { + (state->valid & FATTR_MTIME) +#if FUSE_KERNEL_MINOR_VERSION >= 23 + || (state->valid & FATTR_CTIME) +#endif + )) { /* there is no "futimes" call, so don't send fsetattr if ATIME or MTIME is set @@ -1346,8 +1355,14 @@ fuse_setattr (xlator_t *this, fuse_in_header_t *finh, void *msg, state->attr.ia_size = fsi->size; state->attr.ia_atime = fsi->atime; state->attr.ia_mtime = fsi->mtime; +#if FUSE_KERNEL_MINOR_VERSION >= 23 + state->attr.ia_ctime = fsi->ctime; +#endif state->attr.ia_atime_nsec = fsi->atimensec; state->attr.ia_mtime_nsec = fsi->mtimensec; +#if FUSE_KERNEL_MINOR_VERSION >= 23 + state->attr.ia_ctime_nsec = fsi->ctimensec; +#endif state->attr.ia_prot = ia_prot_from_st_mode (fsi->mode); state->attr.ia_uid = fsi->uid; @@ -4253,14 +4268,23 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg, if (fini->flags & FUSE_ASYNC_DIO) fino.flags |= FUSE_ASYNC_DIO; #endif + + size = sizeof (fino); +#if FUSE_KERNEL_MINOR_VERSION >= 23 /* FUSE 7.23 and newer added attributes to the fuse_init_out struct */ - if (fini->minor > 22) { - size = sizeof (fino); - } else { + if (fini->minor < 23) { /* reduce the size, chop off unused attributes from &fino */ size = FUSE_COMPAT_22_INIT_OUT_SIZE; } + /* Writeback cache support */ + if (fini->minor >= 23) { + if (priv->kernel_writeback_cache) + fino.flags |= FUSE_WRITEBACK_CACHE; + fino.time_gran = priv->attr_times_granularity; + } +#endif + ret = send_fuse_data (this, finh, &fino, size); if (ret == 0) gf_log ("glusterfs-fuse", GF_LOG_INFO, @@ -5770,6 +5794,12 @@ init (xlator_t *this_xl) GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit); + /* Writeback cache support */ + GF_OPTION_INIT("kernel-writeback-cache", priv->kernel_writeback_cache, + bool, cleanup_exit); + GF_OPTION_INIT("attr-times-granularity", priv->attr_times_granularity, + int32, cleanup_exit); + /* user has set only background-qlen, not congestion-threshold, use the fuse kernel driver formula to set congestion. ie, 75% */ if (dict_get (this_xl->options, "background-qlen") && @@ -6093,5 +6123,17 @@ struct volume_options options[] = { .max = 64, .description = "Sets fuse reader thread count.", }, + { .key = {"kernel-writeback-cache"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .description = "Enables fuse in-kernel writeback cache.", + }, + { .key = {"attr-times-granularity"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + .min = 0, + .max = 1000000000, + .description = "Supported granularity of file attribute times.", + }, { .key = {NULL} }, }; diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h index 6cf9d2f7cf8..b26b5e21109 100644 --- a/xlators/mount/fuse/src/fuse-bridge.h +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -147,6 +147,10 @@ struct fuse_private { gf_boolean_t mount_finished; gf_boolean_t handle_graph_switch; pthread_cond_t migrate_cond; + + /* Writeback cache support */ + gf_boolean_t kernel_writeback_cache; + int attr_times_granularity; }; typedef struct fuse_private fuse_private_t; diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in index 6890ff00121..9d9069aa1f7 100755 --- a/xlators/mount/fuse/utils/mount.glusterfs.in +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -269,6 +269,14 @@ start_glusterfs () cmd_line=$(echo "$cmd_line --xlator-option=$xlator_option"); fi + if [ -n "$kernel_writeback_cache" ]; then + cmd_line=$(echo "$cmd_line --kernel-writeback-cache=$kernel_writeback_cache"); + fi + + if [ -n "$attr_times_granularity" ]; then + cmd_line=$(echo "$cmd_line --attr-times-granularity=$attr_times_granularity"); + fi + if [ -n "$process_name" ]; then cmd_line=$(echo "$cmd_line --process-name fuse.$process_name"); else @@ -520,6 +528,12 @@ with_options() [ $value = "false" ] ; then no_root_squash=1; fi ;; + "kernel-writeback-cache") + kernel_writeback_cache=$value + ;; + "attr-times-granularity") + attr_times_granularity=$value + ;; "context"|"fscontext"|"defcontext"|"rootcontext") # standard SElinux mount options to pass to the kernel [ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts," diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c index 496f6a7905b..4aa70baf3bd 100644 --- a/xlators/storage/posix/src/posix-inode-fd-ops.c +++ b/xlators/storage/posix/src/posix-inode-fd-ops.c @@ -357,6 +357,23 @@ posix_setattr (call_frame_t *frame, xlator_t *this, } } + if (valid & GF_SET_ATTR_CTIME) { + /* + * At the moment we have no means to associate an arbitrary + * ctime with the file, so we ignore the ctime payload + * and update the file ctime to current time (which POSIX + * lets us to do). + */ + op_ret = PATH_SET_TIMESPEC_OR_TIMEVAL (real_path, NULL); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_UTIMES_FAILED, "setattr (utimes) on %s " + "failed", real_path); + goto out; + } + } + if (!valid) { op_ret = sys_lchown (real_path, -1, -1); if (op_ret == -1) { |