summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCsaba Henk <csaba@redhat.com>2018-05-03 10:22:18 +0200
committerAmar Tumballi <amarts@redhat.com>2018-05-04 17:42:12 +0000
commit2ac79ed8048753dfd2494d3a4d3b0e9411673e3a (patch)
tree080da10738f9dade1fcb787d43f333c641dffe04
parent15866ac9773e89cd9e017e7d3bf8aa01a87edfd8 (diff)
fuse: add support for kernel writeback cache
- Added kernel-writeback-cache command line and xlator option for requesting utilisation of the writeback cache of the kernel in FUSE_INIT (see [1]). - Added attr-times-granularity command line and xlator option via which granularity of the {a,m,c}time in stat (attr) data that we support can be indicated to kernel. This is a means to avoid divergence of the attr times between kernel and userspace that could occur with writeback-cache, while still maintaining maximum time precision the FUSE server is capable of (see [2]). - Handling FATTR_CTIME flag in FUSE_SETATTR that indicates presence of ctime in setattr payload. Currently we cannot associate arbitrary ctimes to files on backend, so we just touch them to update their ctimes to current time. Having ctimes in setattr payload is also a side effect of writeback cache (see [3] and [4]). [1]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=4d99ff8, "fuse: Turn writeback cache on" [2]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=e27c9d3, "fuse: fuse: add time_gran to INIT_OUT" [3]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=1e18bda, "fuse: add .write_inode" [4]: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=ab9e13f, "fuse: allow ctime flushing to userspace" Updates: #435 Change-Id: Id174c8e0c815c4456c35f8c53e41a6a507d91855 Signed-off-by: Csaba Henk <csaba@redhat.com>
-rw-r--r--doc/glusterfs.86
-rw-r--r--doc/mount.glusterfs.86
-rw-r--r--glusterfsd/src/glusterfsd.c70
-rw-r--r--glusterfsd/src/glusterfsd.h2
-rw-r--r--libglusterfs/src/glusterfs.h4
-rw-r--r--libglusterfs/src/xlator.h1
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c50
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h4
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in14
-rw-r--r--xlators/storage/posix/src/posix-inode-fd-ops.c17
10 files changed, 170 insertions, 4 deletions
diff --git a/doc/glusterfs.8 b/doc/glusterfs.8
index 985f30a5865..592dedb6759 100644
--- a/doc/glusterfs.8
+++ b/doc/glusterfs.8
@@ -109,6 +109,9 @@ Mount subdirectory instead of the '/' of volume.
.PP
.TP
+\fB\-\-attr\-times\-granularity=NANOSECONDS\fR
+Declare supported granularity of file attribute times (default is 0 which kernel handles as unspecified; valid real values are between 1 and 1000000000).
+.TP
\fB\-\-attribute\-timeout=SECONDS\fR
Set attribute timeout to SECONDS for inodes in fuse kernel module (the default is 1).
.TP
@@ -130,6 +133,9 @@ Set entry timeout to SECONDS in fuse kernel module (the default is 1).
\fB\-\-gid\-timeout=SECONDS\fR
Set auxiliary group list timeout to SECONDS for fuse translator (the default is 0).
.TP
+\fB\-\-kernel-writeback-cache=BOOL\fR
+Enable fuse in-kernel writeback cache.
+.TP
\fB\-\-negative\-timeout=SECONDS\fR
Set negative timeout to SECONDS in fuse kernel module (the default is 0).
.TP
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
index 6a51fc9aef0..0cc7a6f8535 100644
--- a/doc/mount.glusterfs.8
+++ b/doc/mount.glusterfs.8
@@ -133,6 +133,12 @@ enable root squashing for the trusted client [default: on]
.TP
\fBuse\-readdirp=\fRBOOL
Use readdirp() mode in fuse kernel module [default: on]
+.TP
+\fBkernel\-writeback\-cache=\fRBOOL
+Enable fuse in-kernel writeback cache [default: off]
+.TP
+\fBattr\-times\-granularity=\fRNS
+Declare supported granularity of file attribute [default: 0]
.PP
.SH FILES
.TP
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index b5b8e4d30a7..3de12bc125f 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -256,6 +256,11 @@ static struct argp_option gf_options[] = {
OPTION_ARG_OPTIONAL, "disable/enable fuse event-history"},
{"reader-thread-count", ARGP_READER_THREAD_COUNT_KEY, "INTEGER",
OPTION_ARG_OPTIONAL, "set fuse reader thread count"},
+ {"kernel-writeback-cache", ARGP_KERNEL_WRITEBACK_CACHE_KEY, "BOOL",
+ OPTION_ARG_OPTIONAL, "enable fuse in-kernel writeback cache"},
+ {"attr-times-granularity", ARGP_ATTR_TIMES_GRANULARITY_KEY, "NS",
+ OPTION_ARG_OPTIONAL, "declare supported granularity of file attribute"
+ " times in nanoseconds"},
{0, 0, 0, 0, "Miscellaneous Options:"},
{0, }
};
@@ -617,6 +622,44 @@ set_fuse_mount_options (glusterfs_ctx_t *ctx, dict_t *options)
goto err;
}
}
+ switch (cmd_args->kernel_writeback_cache) {
+ case GF_OPTION_ENABLE:
+ ret = dict_set_static_ptr(options, "kernel-writeback-cache",
+ "on");
+ if (ret < 0) {
+ gf_msg ("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
+ "failed to set dict value for key "
+ "kernel-writeback-cache");
+ goto err;
+ }
+ break;
+ case GF_OPTION_DISABLE:
+ ret = dict_set_static_ptr(options, "kernel-writeback-cache",
+ "off");
+ if (ret < 0) {
+ gf_msg ("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
+ "failed to set dict value for key "
+ "kernel-writeback-cache");
+ goto err;
+ }
+ break;
+ case GF_OPTION_DEFERRED: /* default */
+ default:
+ gf_msg_debug ("glusterfsd", 0, "kernel-writeback-cache mode %d",
+ cmd_args->kernel_writeback_cache);
+ break;
+ }
+ if (cmd_args->attr_times_granularity) {
+ ret = dict_set_uint32 (options, "attr-times-granularity",
+ cmd_args->attr_times_granularity);
+ if (ret < 0) {
+ gf_msg ("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
+ "failed to set dict value for key "
+ "attr-times-granularity");
+ goto err;
+ }
+ }
+
ret = 0;
err:
@@ -1385,6 +1428,32 @@ no_oom_api:
break;
+ case ARGP_KERNEL_WRITEBACK_CACHE_KEY:
+ if (!arg)
+ arg = "yes";
+
+ if (gf_string2boolean (arg, &b) == 0) {
+ cmd_args->kernel_writeback_cache = b;
+
+ break;
+ }
+
+ argp_failure (state, -1, 0,
+ "unknown kernel writeback cache setting \"%s\"", arg);
+ break;
+ case ARGP_ATTR_TIMES_GRANULARITY_KEY:
+ if (gf_string2uint32 (arg, &cmd_args->attr_times_granularity)) {
+ argp_failure (state, -1, 0,
+ "unknown attribute times granularity option %s",
+ arg);
+ } else if (cmd_args->attr_times_granularity > 1000000000) {
+ argp_failure (state, -1, 0,
+ "Invalid attribute times granularity value %s. "
+ "Valid range: [\"0, 1000000000\"]", arg);
+ }
+
+ break;
+
}
return 0;
}
@@ -1690,6 +1759,7 @@ glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
cmd_args->fuse_attribute_timeout = -1;
cmd_args->fuse_entry_timeout = -1;
cmd_args->fopen_keep_cache = GF_OPTION_DEFERRED;
+ cmd_args->kernel_writeback_cache = GF_OPTION_DEFERRED;
if (ctx->mem_acct_enable)
cmd_args->mem_acct = 1;
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
index 2a03ec09fa2..496a4d95352 100644
--- a/glusterfsd/src/glusterfsd.h
+++ b/glusterfsd/src/glusterfsd.h
@@ -105,6 +105,8 @@ enum argp_option_keys {
ARGP_PRINT_XLATORDIR_KEY = 183,
ARGP_PRINT_STATEDUMPDIR_KEY = 184,
ARGP_PRINT_LOGDIR_KEY = 185,
+ ARGP_KERNEL_WRITEBACK_CACHE_KEY = 186,
+ ARGP_ATTR_TIMES_GRANULARITY_KEY = 187,
};
struct _gfd_vol_top_priv {
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 904d4b60d75..90f2762b990 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -532,6 +532,10 @@ struct _cmd_args {
char *event_history;
int thin_client;
uint32_t reader_thread_count;
+
+ /* FUSE writeback cache support */
+ int kernel_writeback_cache;
+ uint32_t attr_times_granularity;
};
typedef struct _cmd_args cmd_args_t;
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h
index 4f18d1cd2a9..d476cf26442 100644
--- a/libglusterfs/src/xlator.h
+++ b/libglusterfs/src/xlator.h
@@ -33,6 +33,7 @@
#define GF_SET_ATTR_SIZE 0x8
#define GF_SET_ATTR_ATIME 0x10
#define GF_SET_ATTR_MTIME 0x20
+#define GF_SET_ATTR_CTIME 0x40
#define gf_attr_mode_set(mode) ((mode) & GF_SET_ATTR_MODE)
#define gf_attr_uid_set(mode) ((mode) & GF_SET_ATTR_UID)
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index f509d84a15b..322b4deeffa 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -1233,6 +1233,11 @@ fattr_to_gf_set_attr (int32_t valid)
if (valid & FATTR_MTIME)
gf_valid |= GF_SET_ATTR_MTIME;
+#if FUSE_KERNEL_MINOR_VERSION >= 23
+ if (valid & FATTR_CTIME)
+ gf_valid |= GF_SET_ATTR_CTIME;
+#endif
+
if (valid & FATTR_SIZE)
gf_valid |= GF_SET_ATTR_SIZE;
@@ -1271,7 +1276,11 @@ fuse_setattr_resume (fuse_state_t *state)
if ((state->valid & (FATTR_MASK)) != FATTR_SIZE) {
if (state->fd &&
!((state->valid & FATTR_ATIME) ||
- (state->valid & FATTR_MTIME))) {
+ (state->valid & FATTR_MTIME)
+#if FUSE_KERNEL_MINOR_VERSION >= 23
+ || (state->valid & FATTR_CTIME)
+#endif
+ )) {
/*
there is no "futimes" call, so don't send
fsetattr if ATIME or MTIME is set
@@ -1346,8 +1355,14 @@ fuse_setattr (xlator_t *this, fuse_in_header_t *finh, void *msg,
state->attr.ia_size = fsi->size;
state->attr.ia_atime = fsi->atime;
state->attr.ia_mtime = fsi->mtime;
+#if FUSE_KERNEL_MINOR_VERSION >= 23
+ state->attr.ia_ctime = fsi->ctime;
+#endif
state->attr.ia_atime_nsec = fsi->atimensec;
state->attr.ia_mtime_nsec = fsi->mtimensec;
+#if FUSE_KERNEL_MINOR_VERSION >= 23
+ state->attr.ia_ctime_nsec = fsi->ctimensec;
+#endif
state->attr.ia_prot = ia_prot_from_st_mode (fsi->mode);
state->attr.ia_uid = fsi->uid;
@@ -4253,14 +4268,23 @@ fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg,
if (fini->flags & FUSE_ASYNC_DIO)
fino.flags |= FUSE_ASYNC_DIO;
#endif
+
+ size = sizeof (fino);
+#if FUSE_KERNEL_MINOR_VERSION >= 23
/* FUSE 7.23 and newer added attributes to the fuse_init_out struct */
- if (fini->minor > 22) {
- size = sizeof (fino);
- } else {
+ if (fini->minor < 23) {
/* reduce the size, chop off unused attributes from &fino */
size = FUSE_COMPAT_22_INIT_OUT_SIZE;
}
+ /* Writeback cache support */
+ if (fini->minor >= 23) {
+ if (priv->kernel_writeback_cache)
+ fino.flags |= FUSE_WRITEBACK_CACHE;
+ fino.time_gran = priv->attr_times_granularity;
+ }
+#endif
+
ret = send_fuse_data (this, finh, &fino, size);
if (ret == 0)
gf_log ("glusterfs-fuse", GF_LOG_INFO,
@@ -5770,6 +5794,12 @@ init (xlator_t *this_xl)
GF_OPTION_INIT("thin-client", priv->thin_client, bool,
cleanup_exit);
+ /* Writeback cache support */
+ GF_OPTION_INIT("kernel-writeback-cache", priv->kernel_writeback_cache,
+ bool, cleanup_exit);
+ GF_OPTION_INIT("attr-times-granularity", priv->attr_times_granularity,
+ int32, cleanup_exit);
+
/* user has set only background-qlen, not congestion-threshold,
use the fuse kernel driver formula to set congestion. ie, 75% */
if (dict_get (this_xl->options, "background-qlen") &&
@@ -6093,5 +6123,17 @@ struct volume_options options[] = {
.max = 64,
.description = "Sets fuse reader thread count.",
},
+ { .key = {"kernel-writeback-cache"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "Enables fuse in-kernel writeback cache.",
+ },
+ { .key = {"attr-times-granularity"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0",
+ .min = 0,
+ .max = 1000000000,
+ .description = "Supported granularity of file attribute times.",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
index 6cf9d2f7cf8..b26b5e21109 100644
--- a/xlators/mount/fuse/src/fuse-bridge.h
+++ b/xlators/mount/fuse/src/fuse-bridge.h
@@ -147,6 +147,10 @@ struct fuse_private {
gf_boolean_t mount_finished;
gf_boolean_t handle_graph_switch;
pthread_cond_t migrate_cond;
+
+ /* Writeback cache support */
+ gf_boolean_t kernel_writeback_cache;
+ int attr_times_granularity;
};
typedef struct fuse_private fuse_private_t;
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
index 6890ff00121..9d9069aa1f7 100755
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
@@ -269,6 +269,14 @@ start_glusterfs ()
cmd_line=$(echo "$cmd_line --xlator-option=$xlator_option");
fi
+ if [ -n "$kernel_writeback_cache" ]; then
+ cmd_line=$(echo "$cmd_line --kernel-writeback-cache=$kernel_writeback_cache");
+ fi
+
+ if [ -n "$attr_times_granularity" ]; then
+ cmd_line=$(echo "$cmd_line --attr-times-granularity=$attr_times_granularity");
+ fi
+
if [ -n "$process_name" ]; then
cmd_line=$(echo "$cmd_line --process-name fuse.$process_name");
else
@@ -520,6 +528,12 @@ with_options()
[ $value = "false" ] ; then
no_root_squash=1;
fi ;;
+ "kernel-writeback-cache")
+ kernel_writeback_cache=$value
+ ;;
+ "attr-times-granularity")
+ attr_times_granularity=$value
+ ;;
"context"|"fscontext"|"defcontext"|"rootcontext")
# standard SElinux mount options to pass to the kernel
[ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts,"
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
index 496f6a7905b..4aa70baf3bd 100644
--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
@@ -357,6 +357,23 @@ posix_setattr (call_frame_t *frame, xlator_t *this,
}
}
+ if (valid & GF_SET_ATTR_CTIME) {
+ /*
+ * At the moment we have no means to associate an arbitrary
+ * ctime with the file, so we ignore the ctime payload
+ * and update the file ctime to current time (which POSIX
+ * lets us to do).
+ */
+ op_ret = PATH_SET_TIMESPEC_OR_TIMEVAL (real_path, NULL);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_UTIMES_FAILED, "setattr (utimes) on %s "
+ "failed", real_path);
+ goto out;
+ }
+ }
+
if (!valid) {
op_ret = sys_lchown (real_path, -1, -1);
if (op_ret == -1) {