summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doc/developer-guide/README.md5
-rw-r--r--doc/developer-guide/fuse-interrupt.md130
-rw-r--r--glusterfsd/src/glusterfsd.c44
-rw-r--r--glusterfsd/src/glusterfsd.h1
-rw-r--r--libglusterfs/src/glusterfs.h2
-rw-r--r--tests/features/interrupt.t67
-rw-r--r--tests/features/open_and_sleep.c27
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c60
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h4
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in7
10 files changed, 345 insertions, 2 deletions
diff --git a/doc/developer-guide/README.md b/doc/developer-guide/README.md
index 4c6346e83d4..6c00a4a221b 100644
--- a/doc/developer-guide/README.md
+++ b/doc/developer-guide/README.md
@@ -59,6 +59,11 @@ Translators
- [Storage/posix Translator](./posix.md)
- [Compression translator](./network_compression.md)
+Fuse
+----
+
+- [Interrupt handling](./fuse-interrupt.md)
+
Testing/Debugging
-----------------
diff --git a/doc/developer-guide/fuse-interrupt.md b/doc/developer-guide/fuse-interrupt.md
new file mode 100644
index 00000000000..f92b5532eaf
--- /dev/null
+++ b/doc/developer-guide/fuse-interrupt.md
@@ -0,0 +1,130 @@
+# Fuse interrupt handling
+
+## Conventions followed
+
+- *FUSE* refers to the "wire protocol" between kernel and userspace and
+ related specifications.
+- *fuse* refers to the kernel subsystem and also to the GlusterFs translator.
+
+## FUSE interrupt handling spec
+
+The [Linux kernel FUSE documentation](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/fuse.txt?h=v4.18#n148)
+desrcibes how interrupt handling happens in fuse.
+
+## Interrupt handling in the fuse translator
+
+### Declarations
+
+This document describes the internal API in the fuse translator with which
+interrupt can be handled.
+
+The API being internal (to be used only in fuse-bridge.c; the functions are
+not exported to a header file).
+
+```
+enum fuse_interrupt_state {
+ INTERRUPT_NONE,
+ INTERRUPT_SQUELCHED,
+ INTERRUPT_HANDLED,
+};
+typedef enum fuse_interrupt_state fuse_interrupt_state_t;
+struct fuse_interrupt_record;
+typedef struct fuse_interrupt_record fuse_interrupt_record_t;
+typedef void (*fuse_interrupt_handler_t)(xlator_t *this,
+ fuse_interrupt_record_t *);
+struct fuse_interrupt_record {
+ fuse_in_header_t fuse_in_header;
+ void *data;
+ /*
+ ...
+ */
+};
+
+fuse_interrupt_record_t *
+fuse_interrupt_record_new(fuse_in_header_t *finh,
+ fuse_interrupt_handler_t handler);
+
+void
+fuse_interrupt_record_insert(xlator_t *this, fuse_interrupt_record_t *fir);
+
+gf_boolean_t
+fuse_interrupt_finish_fop(call_frame_t *frame, xlator_t *this,
+ gf_boolean_t sync, void **datap);
+
+void
+fuse_interrupt_finish_interrupt(xlator_t *this, fuse_interrupt_record_t *fir,
+ fuse_interrupt_state_t intstat,
+ gf_boolean_t sync, void **datap);
+```
+
+The code demonstrates the usage of the API through `fuse_flush()`. (It's a
+dummy implementation only for demonstration purposes.) Flush is chosen
+because a `FLUSH` interrupt is easy to trigger (see
+*tests/features/interrupt.t*). Interrupt handling for flush is switched on
+by `--fuse-flush-handle-interrupt` (a hidden glusterfs command line flag).
+The flush interrupt handling code is guarded by the
+`flush_handle_interrupt` Boolean member of `fuse_private_t`.
+
+### Usage
+
+A given FUSE fop can be enabled to handle interrupts via the following
+steps:
+
+- Define a handler function (of type `fuse_interrupt_handler_t`).
+ It should implement the interrupt handling logic and in the end
+ call (directly or as async callback) `fuse_interrupt_finish_interrupt()`.
+ The `intstat` argument to `fuse_interrupt_finish_interrupt` should be
+ either `INTERRUPT_SQUELCHED` or `INTERRUPT_HANDLED`.
+ - `INTERRUPT_SQUELCHED` means that we choose not to handle the interrupt
+ and the fop is going on uninterrupted.
+ - `INTERRUPT_HANDLED` means that the interrupt was actually handled. In
+ this case the fop will be answered from interrupt context with errno
+ `EINTR` (that is, the fop should not send a response to the kernel).
+
+ We return to the `sync` and `datap` arguments later.
+- In the `fuse_<FOP>` function create an interrupt record using
+ `fuse_interrupt_record_new()`, passing the incoming `fuse_in_header` and
+ the above handler function to it.
+ - Arbitrary further data can be referred to via the `data` member of the
+ interrupt record that is to be passed on from fop context to
+ interrupt context.
+- When it's set up, pass the interrupt record to
+ `fuse_interrupt_record_insert()`.
+- In `fuse_<FOP>_cbk` call `fuse_interrupt_finish_fop()`.
+ - `fuse_interrupt_finish_fop()` returns a Boolean according to whether the
+ interrupt was handled. If it was, then the fuse request is already
+ answered and the stack gets destroyed in `fuse_interrupt_finish_fop` so
+ `fuse_<FOP>_cbk` can just return (zero). Otherwise follow the standard
+ cbk logic (answer the fuse request and destroy the stack -- these are
+ typically accomplished by `fuse_err_cbk()`).
+- The last two argument of `fuse_interrupt_finish_fop()` and
+ `fuse_interrupt_finish_interrupt()` are `gf_boolean_t sync` and
+ `void **datap`.
+ - `sync` represents the strategy for freeing the interrupt record. The
+ interrupt handler and the fop handler are in race to get at the interrupt
+ record first (interrupt handler for purposes of doing the interrupt
+ handling, fop handler for purposes of deactivating the interrupt record
+ upon completion of the fop handling).
+ - If `sync` is true, then the fop handler will wait for the interrupt
+ handler to finish and it takes care of freeing.
+ - If `sync` is false, the loser of the above race will perform freeing.
+
+ Freeing is done within the respective interrupt finish routines, except
+ for the `data` field of the interrupt record; with respect to that, see
+ the discussion of the `datap` parameter below. The strategy has to be
+ consensual, that is, `fuse_interrupt_finish_fop()` and
+ `fuse_interrupt_finish_interrupt()` must pass the same value for `sync`.
+ If dismantling the resources associated with the interrupt record is
+ simple, `sync = _gf_false` is the suggested choice; `sync = _gf_true` can
+ be useful in the opposite case, when dismantling those resources would
+ be inconvenient to implement in two places or to enact in non-fop context.
+ - If `datap` is `NULL`, the `data` member of the interrupt record will be
+ freed within the interrupt finish routine. If it points to a valid
+ `void *` pointer, and if caller is doing the cleanup (see `sync` above),
+ then that pointer will be directed to the `data` member of the interrupt
+ record and it's up to the caller what it's doing with it.
+ - If `sync` is true, interrupt handler can use `datap = NULL`, and
+ fop handler will have `datap` set.
+ - If `sync` is false, and handlers pass a pointer to a pointer for
+ `datap`, they should check if the pointed pointer is NULL before
+ attempting to deal with the data.
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index dfef7922245..f62fbf5e7c4 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -264,6 +264,9 @@ static struct argp_option gf_options[] = {
OPTION_ARG_OPTIONAL,
"declare supported granularity of file attribute"
" times in nanoseconds"},
+ {"fuse-flush-handle-interrupt", ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY,
+ "BOOL", OPTION_ARG_OPTIONAL | OPTION_HIDDEN,
+ "handle interrupt in fuse FLUSH handler"},
{0, 0, 0, 0, "Miscellaneous Options:"},
{
0,
@@ -643,6 +646,31 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options)
goto err;
}
}
+ switch (cmd_args->fuse_flush_handle_interrupt) {
+ case GF_OPTION_ENABLE:
+ ret = dict_set_static_ptr(options, "flush-handle-interrupt", "on");
+ if (ret < 0) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
+ "failed to set dict value for key "
+ "flush-handle-interrupt");
+ goto err;
+ }
+ break;
+ case GF_OPTION_DISABLE:
+ ret = dict_set_static_ptr(options, "flush-handle-interrupt", "off");
+ if (ret < 0) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4,
+ "failed to set dict value for key "
+ "flush-handle-interrupt");
+ goto err;
+ }
+ break;
+ case GF_OPTION_DEFERRED: /* default */
+ default:
+ gf_msg_debug("glusterfsd", 0, "fuse-flush-handle-interrupt mode %d",
+ cmd_args->fuse_flush_handle_interrupt);
+ break;
+ }
ret = 0;
err:
@@ -1421,6 +1449,21 @@ parse_opts(int key, char *arg, struct argp_state *state)
}
break;
+
+ case ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY:
+ if (!arg)
+ arg = "yes";
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->fuse_flush_handle_interrupt = b;
+
+ break;
+ }
+
+ argp_failure(state, -1, 0,
+ "unknown fuse flush handle interrupt setting \"%s\"",
+ arg);
+ break;
}
return 0;
}
@@ -1728,6 +1771,7 @@ glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
cmd_args->fuse_entry_timeout = -1;
cmd_args->fopen_keep_cache = GF_OPTION_DEFERRED;
cmd_args->kernel_writeback_cache = GF_OPTION_DEFERRED;
+ cmd_args->fuse_flush_handle_interrupt = GF_OPTION_DEFERRED;
if (ctx->mem_acct_enable)
cmd_args->mem_acct = 1;
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
index 28d46fc68b9..0042054e138 100644
--- a/glusterfsd/src/glusterfsd.h
+++ b/glusterfsd/src/glusterfsd.h
@@ -108,6 +108,7 @@ enum argp_option_keys {
ARGP_KERNEL_WRITEBACK_CACHE_KEY = 186,
ARGP_ATTR_TIMES_GRANULARITY_KEY = 187,
ARGP_PRINT_LIBEXECDIR_KEY = 188,
+ ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189,
};
struct _gfd_vol_top_priv {
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
index 90bef74c4c3..93a01cab985 100644
--- a/libglusterfs/src/glusterfs.h
+++ b/libglusterfs/src/glusterfs.h
@@ -562,6 +562,8 @@ struct _cmd_args {
/* FUSE writeback cache support */
int kernel_writeback_cache;
uint32_t attr_times_granularity;
+
+ int fuse_flush_handle_interrupt;
};
typedef struct _cmd_args cmd_args_t;
diff --git a/tests/features/interrupt.t b/tests/features/interrupt.t
new file mode 100644
index 00000000000..476d8755d2b
--- /dev/null
+++ b/tests/features/interrupt.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+##Copy this file to tests/bugs before running run.sh (cp extras/test/bug-920583.t tests/bugs/)
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+TESTS_EXPECTED_IN_LOOP=4
+
+cleanup;
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/open_and_sleep.c
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+function log-file-name()
+{
+ logfilename=$M0".log"
+ echo ${logfilename:1} | tr / -
+}
+
+log_file=$logdir"/"`log-file-name`
+
+function test_interrupt {
+ local handlebool="$1"
+ local logpattern="$2"
+
+ TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --fuse-flush-handle-interrupt=$handlebool --log-level=DEBUG $M0
+
+ # If the test helper fails (which is considered a setup error, not failure of the test
+ # case itself), kill will be invoked without argument, and that will be the actual
+ # error which is caught.
+ TEST "./$(dirname $0)/open_and_sleep $M0/testfile | { sleep 0.1; xargs -n1 kill -INT; }"
+
+ TEST "grep -E '$logpattern' $log_file"
+ # Basic sanity check, making sure filesystem has not crashed.
+ TEST test -f $M0/testfile
+}
+
+# Theoretically FLUSH might finish before INTERRUPT is handled,
+# in which case we'd get the "no handler found" message (but it's unlikely).
+test_interrupt yes 'FLUSH.*interrupt handler triggered|INTERRUPT.*no handler found'
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+test_interrupt no 'INTERRUPT.*no handler found'
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup_tester $(dirname $0)/open_and_sleep;
+cleanup;
diff --git a/tests/features/open_and_sleep.c b/tests/features/open_and_sleep.c
new file mode 100644
index 00000000000..7d0e22a2503
--- /dev/null
+++ b/tests/features/open_and_sleep.c
@@ -0,0 +1,27 @@
+#include <unistd.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+int
+main(int argc, char **argv)
+{
+ pid_t pid;
+ int fd;
+
+ if (argc >= 2) {
+ fd = open(argv[1], O_RDWR | O_CREAT, 0644);
+ if (fd == -1) {
+ fprintf(stderr, "cannot open/create %s\n", argv[1]);
+ return 1;
+ }
+ }
+
+ pid = getpid();
+ printf("%d\n", pid);
+ fflush(stdout);
+
+ for (;;)
+ sleep(1);
+
+ return 0;
+}
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index e4e894e6c9b..48b8d9df480 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -1829,6 +1829,21 @@ fuse_err_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
}
static int
+fuse_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ fuse_private_t *priv = this->private;
+
+ if (priv->flush_handle_interrupt) {
+ if (fuse_interrupt_finish_fop(frame, this, _gf_false, NULL)) {
+ return 0;
+ }
+ }
+
+ return fuse_err_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+}
+
+static int
fuse_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
@@ -3044,7 +3059,19 @@ fuse_lseek(xlator_t *this, fuse_in_header_t *finh, void *msg,
void
fuse_flush_resume(fuse_state_t *state)
{
- FUSE_FOP(state, fuse_err_cbk, GF_FOP_FLUSH, flush, state->fd, state->xdata);
+ FUSE_FOP(state, fuse_flush_cbk, GF_FOP_FLUSH, flush, state->fd,
+ state->xdata);
+}
+
+static void
+fuse_flush_interrupt_handler(xlator_t *this, fuse_interrupt_record_t *fir)
+{
+ gf_log("glusterfs-fuse", GF_LOG_DEBUG,
+ "FLUSH unique %" PRIu64 ": interrupt handler triggered",
+ fir->fuse_in_header.unique);
+
+ fuse_interrupt_finish_interrupt(this, fir, INTERRUPT_HANDLED, _gf_false,
+ NULL);
}
static void
@@ -3052,6 +3079,7 @@ fuse_flush(xlator_t *this, fuse_in_header_t *finh, void *msg,
struct iobuf *iobuf)
{
struct fuse_flush_in *ffi = msg;
+ fuse_private_t *priv = NULL;
fuse_state_t *state = NULL;
fd_t *fd = NULL;
@@ -3060,6 +3088,26 @@ fuse_flush(xlator_t *this, fuse_in_header_t *finh, void *msg,
fd = FH_TO_FD(ffi->fh);
state->fd = fd;
+ priv = this->private;
+ if (priv->flush_handle_interrupt) {
+ fuse_interrupt_record_t *fir = NULL;
+
+ fir = fuse_interrupt_record_new(finh, fuse_flush_interrupt_handler);
+ if (!fir) {
+ send_fuse_err(this, finh, ENOMEM);
+
+ gf_log("glusterfs-fuse", GF_LOG_ERROR,
+ "FLUSH unique %" PRIu64
+ ":"
+ " interrupt record allocation failed",
+ finh->unique);
+ free_fuse_state(state);
+
+ return;
+ }
+ fuse_interrupt_record_insert(this, fir);
+ }
+
fuse_resolve_fd_init(state, &state->resolve, fd);
state->lk_owner = ffi->lock_owner;
@@ -6301,6 +6349,9 @@ init(xlator_t *this_xl)
GF_OPTION_INIT("attr-times-granularity", priv->attr_times_granularity,
int32, cleanup_exit);
+ GF_OPTION_INIT("flush-handle-interrupt", priv->flush_handle_interrupt, bool,
+ cleanup_exit);
+
/* user has set only background-qlen, not congestion-threshold,
use the fuse kernel driver formula to set congestion. ie, 75% */
if (dict_get(this_xl->options, "background-qlen") &&
@@ -6608,5 +6659,12 @@ struct volume_options options[] = {
.max = 1000000000,
.description = "Supported granularity of file attribute times.",
},
+ {
+ .key = {"flush-handle-interrupt"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description =
+ "Handle iterrupts in FLUSH handler (for testing purposes).",
+ },
{.key = {NULL}},
};
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
index b391af76bac..b997d9e0cbb 100644
--- a/xlators/mount/fuse/src/fuse-bridge.h
+++ b/xlators/mount/fuse/src/fuse-bridge.h
@@ -161,6 +161,8 @@ struct fuse_private {
/* Interrupt subscription */
struct list_head interrupt_list;
pthread_mutex_t interrupt_mutex;
+
+ gf_boolean_t flush_handle_interrupt;
};
typedef struct fuse_private fuse_private_t;
@@ -194,7 +196,7 @@ typedef struct fuse_interrupt_record fuse_interrupt_record_t;
typedef void (*fuse_interrupt_handler_t)(xlator_t *this,
fuse_interrupt_record_t *);
struct fuse_interrupt_record {
- struct fuse_in_header fuse_in_header;
+ fuse_in_header_t fuse_in_header;
void *data;
gf_boolean_t hit;
fuse_interrupt_state_t interrupt_state;
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
index 40172fc421d..4a95cd80b87 100755
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
@@ -281,6 +281,10 @@ start_glusterfs ()
cmd_line=$(echo "$cmd_line --dump-fuse=$dump_fuse");
fi
+ if [ -n "$fuse_flush_handle_interrupt" ]; then
+ cmd_line=$(echo "$cmd_line --fuse-flush-handle-interrupt=$fuse_flush_handle_interrupt");
+ fi
+
if [ -n "$process_name" ]; then
cmd_line=$(echo "$cmd_line --process-name fuse.$process_name");
else
@@ -541,6 +545,9 @@ with_options()
"dump-fuse")
dump_fuse=$value
;;
+ "fuse-flush-handle-interrupt")
+ fuse_flush_handle_interrupt=$value
+ ;;
"context"|"fscontext"|"defcontext"|"rootcontext")
# standard SElinux mount options to pass to the kernel
[ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts,"