summaryrefslogtreecommitdiffstats
path: root/xlators/mount/fuse/src/fuse-bridge.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/mount/fuse/src/fuse-bridge.c')
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c423
1 files changed, 315 insertions, 108 deletions
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 11b38f66028..0e22fe411ee 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -8,6 +8,8 @@
cases as published by the Free Software Foundation.
*/
+#include <config.h>
+
#include <sys/wait.h>
#include "fuse-bridge.h"
#include <glusterfs/glusterfs.h>
@@ -33,7 +35,7 @@ static int gf_fuse_xattr_enotsup_log;
void
fini(xlator_t *this_xl);
-static void
+static int32_t
fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino);
/*
@@ -177,7 +179,7 @@ fusedump_gettime(struct fusedump_timespec *fts)
0,
};
- clock_gettime(CLOCK_REALTIME, &ts);
+ timespec_now_realtime(&ts);
fts->sec = ts.tv_sec;
fts->nsec = ts.tv_nsec;
@@ -205,7 +207,7 @@ fusedump_setup_meta(struct iovec *iovs, char *dir,
static int
check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count,
- ssize_t res)
+ ssize_t res, errnomask_t errnomask)
{
char w = 'W';
struct iovec diov[4] = {
@@ -223,9 +225,92 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count,
struct fuse_out_header *fouh = NULL;
if (res == -1) {
- gf_log_callingfn("glusterfs-fuse", GF_LOG_ERROR,
- "writing to fuse device failed: %s", strerror(errno));
+ const char *errdesc = NULL;
+ gf_loglevel_t loglevel = GF_LOG_ERROR;
+ gf_boolean_t errno_degraded = _gf_false;
+ gf_boolean_t errno_promoted = _gf_false;
+
+#define ACCOUNT_ERRNO(eno) \
+ do { \
+ if (errno_degraded) { \
+ pthread_mutex_lock(&priv->fusedev_errno_cnt_mutex); \
+ { \
+ if (!++priv->fusedev_errno_cnt[FUSEDEV_##eno]) \
+ errno_promoted = _gf_true; \
+ } \
+ pthread_mutex_unlock(&priv->fusedev_errno_cnt_mutex); \
+ } \
+ } while (0)
+
+ /* If caller masked the errno, then it
+ * does not indicate an error at the application
+ * level, so we degrade the log severity to DEBUG.
+ */
+ if (errnomask && errno < ERRNOMASK_MAX &&
+ GET_ERRNO_MASK(errnomask, errno)) {
+ loglevel = GF_LOG_DEBUG;
+ errno_degraded = _gf_true;
+ }
+
+ switch (errno) {
+ /* The listed errnos are FUSE status indicators,
+ * not legit values according to POSIX (see write(3p)),
+ * so resolving them according to the standard
+ * POSIX interpretation would be misleading.
+ */
+ case ENOENT:
+ errdesc = "ENOENT";
+ ACCOUNT_ERRNO(ENOENT);
+ break;
+ case ENOTDIR:
+ errdesc = "ENOTDIR";
+ ACCOUNT_ERRNO(ENOTDIR);
+ break;
+ case ENODEV:
+ errdesc = "ENODEV";
+ ACCOUNT_ERRNO(ENODEV);
+ break;
+ case EPERM:
+ errdesc = "EPERM";
+ ACCOUNT_ERRNO(EPERM);
+ break;
+ case ENOMEM:
+ errdesc = "ENOMEM";
+ ACCOUNT_ERRNO(ENOMEM);
+ break;
+ case ENOTCONN:
+ errdesc = "ENOTCONN";
+ ACCOUNT_ERRNO(ENOTCONN);
+ break;
+ case ECONNREFUSED:
+ errdesc = "ECONNREFUSED";
+ ACCOUNT_ERRNO(ECONNREFUSED);
+ break;
+ case EOVERFLOW:
+ errdesc = "EOVERFLOW";
+ ACCOUNT_ERRNO(EOVERFLOW);
+ break;
+ case EBUSY:
+ errdesc = "EBUSY";
+ ACCOUNT_ERRNO(EBUSY);
+ break;
+ case ENOTEMPTY:
+ errdesc = "ENOTEMPTY";
+ ACCOUNT_ERRNO(ENOTEMPTY);
+ break;
+ default:
+ errdesc = strerror(errno);
+ }
+
+ gf_log_callingfn("glusterfs-fuse", loglevel,
+ "writing to fuse device failed: %s", errdesc);
+ if (errno_promoted)
+ gf_log("glusterfs-fuse", GF_LOG_WARNING,
+ "writing to fuse device yielded %s %d times", errdesc,
+ UINT8_MAX + 1);
return errno;
+
+#undef ACCOUNT_ERRNO
}
fouh = iov_out[0].iov_base;
@@ -289,7 +374,7 @@ send_fuse_iov(xlator_t *this, fuse_in_header_t *finh, struct iovec *iov_out,
gf_log("glusterfs-fuse", GF_LOG_TRACE, "writev() result %d/%d %s", res,
fouh->len, res == -1 ? strerror(errno) : "");
- return check_and_dump_fuse_W(priv, iov_out, count, res);
+ return check_and_dump_fuse_W(priv, iov_out, count, res, NULL);
}
static int
@@ -319,10 +404,10 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)
#define send_fuse_obj(this, finh, obj) \
send_fuse_data(this, finh, obj, sizeof(*(obj)))
-static void
+static int32_t
fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
{
-#if FUSE_KERNEL_MINOR_VERSION >= 11
+#if (FUSE_KERNEL_MINOR_VERSION >= 11 && defined(HAVE_FUSE_NOTIFICATIONS))
struct fuse_out_header *fouh = NULL;
struct fuse_notify_inval_entry_out *fnieo = NULL;
fuse_private_t *priv = NULL;
@@ -335,17 +420,22 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
priv = this->private;
if (!priv->reverse_fuse_thread_started)
- return;
+ return -1;
+
+ if (priv->invalidate_limit &&
+ (priv->invalidate_count >= priv->invalidate_limit)) {
+ return -1;
+ }
inode = (inode_t *)(unsigned long)fuse_ino;
if (inode == NULL)
- return;
+ return -1;
list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list)
{
node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
if (node == NULL)
- break;
+ return -1;
INIT_LIST_HEAD(&node->next);
@@ -355,6 +445,15 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
fouh->unique = 0;
fouh->error = FUSE_NOTIFY_INVAL_ENTRY;
+ if (ENOENT < ERRNOMASK_MAX)
+ MASK_ERRNO(node->errnomask, ENOENT);
+ if (ENOTDIR < ERRNOMASK_MAX)
+ MASK_ERRNO(node->errnomask, ENOTDIR);
+ if (EBUSY < ERRNOMASK_MAX)
+ MASK_ERRNO(node->errnomask, EBUSY);
+ if (ENOTEMPTY < ERRNOMASK_MAX)
+ MASK_ERRNO(node->errnomask, ENOTEMPTY);
+
if (dentry->name) {
nlen = strlen(dentry->name);
fouh->len = sizeof(*fouh) + sizeof(*fnieo) + nlen + 1;
@@ -382,23 +481,24 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
pthread_mutex_lock(&priv->invalidate_mutex);
{
list_add_tail(&node->next, &priv->invalidate_list);
+ priv->invalidate_count++;
pthread_cond_signal(&priv->invalidate_cond);
}
pthread_mutex_unlock(&priv->invalidate_mutex);
}
#endif
- return;
+ return 0;
}
/*
* Send an inval inode notification to fuse. This causes an invalidation of the
* entire page cache mapping on the inode.
*/
-static void
+static int32_t
fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
{
-#if FUSE_KERNEL_MINOR_VERSION >= 11
+#if (FUSE_KERNEL_MINOR_VERSION >= 11 && defined(HAVE_FUSE_NOTIFICATIONS))
struct fuse_out_header *fouh = NULL;
struct fuse_notify_inval_inode_out *fniio = NULL;
fuse_private_t *priv = NULL;
@@ -408,15 +508,20 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
priv = this->private;
if (!priv->reverse_fuse_thread_started)
- return;
+ return -1;
+
+ if (priv->invalidate_limit &&
+ (priv->invalidate_count >= priv->invalidate_limit)) {
+ return -1;
+ }
inode = (inode_t *)(unsigned long)fuse_ino;
if (inode == NULL)
- return;
+ return -1;
node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
if (node == NULL)
- return;
+ return -1;
INIT_LIST_HEAD(&node->next);
@@ -433,6 +538,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
fniio->off = 0;
fniio->len = -1;
+ if (ENOENT < ERRNOMASK_MAX)
+ MASK_ERRNO(node->errnomask, ENOENT);
+
fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino,
uuid_utoa(inode->gfid));
gf_log("glusterfs-fuse", GF_LOG_TRACE,
@@ -442,6 +550,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
pthread_mutex_lock(&priv->invalidate_mutex);
{
list_add_tail(&node->next, &priv->invalidate_list);
+ priv->invalidate_count++;
pthread_cond_signal(&priv->invalidate_cond);
}
pthread_mutex_unlock(&priv->invalidate_mutex);
@@ -450,7 +559,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
gf_log("glusterfs-fuse", GF_LOG_WARNING,
"fuse_invalidate_inode not implemented on this system");
#endif
- return;
+ return 0;
}
#if FUSE_KERNEL_MINOR_VERSION >= 11
@@ -458,8 +567,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
static int32_t
fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode)
{
- fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
- return 0;
+ int32_t ret = 0;
+ ret = fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
+ return ret;
}
#endif
@@ -476,6 +586,7 @@ fuse_timed_message_new(void)
/* should be NULL if not set */
dmsg->fuse_message_body = NULL;
INIT_LIST_HEAD(&dmsg->next);
+ memset(dmsg->errnomask, 0, sizeof(dmsg->errnomask));
return dmsg;
}
@@ -674,6 +785,8 @@ fuse_interrupt(xlator_t *this, fuse_in_header_t *finh, void *msg,
dmsg->fuse_out_header.unique = finh->unique;
dmsg->fuse_out_header.len = sizeof(dmsg->fuse_out_header);
dmsg->fuse_out_header.error = -EAGAIN;
+ if (ENOENT < ERRNOMASK_MAX)
+ MASK_ERRNO(dmsg->errnomask, ENOENT);
timespec_now(&dmsg->scheduled_ts);
timespec_adjust_delta(&dmsg->scheduled_ts,
(struct timespec){0, 10000000});
@@ -720,18 +833,22 @@ fuse_interrupt_finish_fop(call_frame_t *frame, xlator_t *this,
{
intstat_orig = fir->interrupt_state;
if (fir->interrupt_state == INTERRUPT_NONE) {
- fir->interrupt_state = INTERRUPT_SQUELCHED;
if (sync) {
- while (fir->interrupt_state == INTERRUPT_NONE) {
+ fir->interrupt_state = INTERRUPT_WAITING_HANDLER;
+ while (fir->interrupt_state != INTERRUPT_SQUELCHED) {
pthread_cond_wait(&fir->handler_cond,
&fir->handler_mutex);
}
- }
+ } else
+ fir->interrupt_state = INTERRUPT_SQUELCHED;
}
}
pthread_mutex_unlock(&fir->handler_mutex);
}
+ GF_ASSERT(intstat_orig == INTERRUPT_NONE ||
+ intstat_orig == INTERRUPT_HANDLED ||
+ intstat_orig == INTERRUPT_SQUELCHED);
gf_log("glusterfs-fuse", GF_LOG_DEBUG, "intstat_orig=%d", intstat_orig);
/*
@@ -781,19 +898,29 @@ fuse_interrupt_finish_interrupt(xlator_t *this, fuse_interrupt_record_t *fir,
};
fuse_interrupt_state_t intstat_orig = INTERRUPT_NONE;
+ GF_ASSERT(intstat == INTERRUPT_HANDLED || intstat == INTERRUPT_SQUELCHED);
+
pthread_mutex_lock(&fir->handler_mutex);
{
intstat_orig = fir->interrupt_state;
- if (fir->interrupt_state == INTERRUPT_NONE) {
- fir->interrupt_state = intstat;
- if (sync) {
+ switch (intstat_orig) {
+ case INTERRUPT_NONE:
+ fir->interrupt_state = intstat;
+ break;
+ case INTERRUPT_WAITING_HANDLER:
+ fir->interrupt_state = INTERRUPT_SQUELCHED;
pthread_cond_signal(&fir->handler_cond);
- }
+ break;
+ default:
+ break;
}
finh = fir->fuse_in_header;
}
pthread_mutex_unlock(&fir->handler_mutex);
+ GF_ASSERT(intstat_orig == INTERRUPT_NONE ||
+ (sync && intstat_orig == INTERRUPT_WAITING_HANDLER) ||
+ (!sync && intstat_orig == INTERRUPT_SQUELCHED));
gf_log("glusterfs-fuse", GF_LOG_DEBUG, "intstat_orig=%d", intstat_orig);
/*
@@ -1634,6 +1761,14 @@ fattr_to_gf_set_attr(int32_t valid)
gf_valid |= GF_SET_ATTR_CTIME;
#endif
+#if FUSE_KERNEL_MINOR_VERSION >= 9
+ if (valid & FATTR_ATIME_NOW)
+ gf_valid |= GF_ATTR_ATIME_NOW;
+
+ if (valid & FATTR_MTIME_NOW)
+ gf_valid |= GF_ATTR_MTIME_NOW;
+#endif
+
if (valid & FATTR_SIZE)
gf_valid |= GF_SET_ATTR_SIZE;
@@ -2098,7 +2233,6 @@ fuse_mknod(xlator_t *this, fuse_in_header_t *finh, void *msg,
fuse_state_t *state = NULL;
#if FUSE_KERNEL_MINOR_VERSION >= 12
fuse_private_t *priv = NULL;
- int32_t ret = -1;
priv = this->private;
if (priv->proto_minor < 12)
@@ -2170,7 +2304,6 @@ fuse_mkdir(xlator_t *this, fuse_in_header_t *finh, void *msg,
char *name = (char *)(fmi + 1);
#if FUSE_KERNEL_MINOR_VERSION >= 12
fuse_private_t *priv = NULL;
- int32_t ret = -1;
#endif
fuse_state_t *state;
@@ -2333,21 +2466,26 @@ fuse_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
{
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
+ char loc_uuid_str[64] = {0}, loc2_uuid_str[64] = {0};
state = frame->root->state;
finh = state->finh;
- fuse_log_eh(this,
- "op_ret: %d, op_errno: %d, %" PRIu64
- ": %s() "
- "path: %s parent: %s ==> path: %s parent: %s"
- "gfid: %s",
- op_ret, op_errno, frame->root->unique,
- gf_fop_list[frame->root->op], state->loc.path,
- state->loc.parent ? uuid_utoa(state->loc.parent->gfid) : "",
- state->loc2.path,
- state->loc2.parent ? uuid_utoa(state->loc2.parent->gfid) : "",
- state->loc.inode ? uuid_utoa(state->loc.inode->gfid) : "");
+ fuse_log_eh(
+ this,
+ "op_ret: %d, op_errno: %d, %" PRIu64
+ ": %s() "
+ "path: %s parent: %s ==> path: %s parent: %s"
+ "gfid: %s",
+ op_ret, op_errno, frame->root->unique, gf_fop_list[frame->root->op],
+ state->loc.path,
+ (state->loc.parent ? uuid_utoa_r(state->loc.parent->gfid, loc_uuid_str)
+ : ""),
+ state->loc2.path,
+ (state->loc2.parent
+ ? uuid_utoa_r(state->loc2.parent->gfid, loc2_uuid_str)
+ : ""),
+ state->loc.inode ? uuid_utoa(state->loc.inode->gfid) : "");
/* need to check for loc->parent to keep clang-scan happy.
It gets dereferenced below, and is checked for NULL above. */
@@ -2691,7 +2829,6 @@ fuse_create(xlator_t *this, fuse_in_header_t *finh, void *msg,
#if FUSE_KERNEL_MINOR_VERSION >= 12
struct fuse_create_in *fci = msg;
fuse_private_t *priv = NULL;
- int32_t ret = -1;
#else
struct fuse_open_in *fci = msg;
#endif
@@ -3080,15 +3217,18 @@ fuse_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
void
fuse_copy_file_range_resume(fuse_state_t *state)
{
+ char fd_uuid_str[64] = {0}, fd_dst_uuid_str[64] = {0};
+
gf_log("glusterfs-fuse", GF_LOG_TRACE,
"%" PRIu64
": COPY_FILE_RANGE "
"(input fd: %p (gfid: %s), "
"output fd: %p (gfid: %s) size=%zu, "
"offset_in=%" PRIu64 ", offset_out=%" PRIu64 ")",
- state->finh->unique, state->fd, uuid_utoa(state->fd->inode->gfid),
- state->fd_dst, uuid_utoa(state->fd_dst->inode->gfid), state->size,
- state->off_in, state->off_out);
+ state->finh->unique, state->fd,
+ uuid_utoa_r(state->fd->inode->gfid, fd_uuid_str), state->fd_dst,
+ uuid_utoa_r(state->fd_dst->inode->gfid, fd_dst_uuid_str),
+ state->size, state->off_in, state->off_out);
FUSE_FOP(state, fuse_copy_file_range_cbk, GF_FOP_COPY_FILE_RANGE,
copy_file_range, state->fd, state->off_in, state->fd_dst,
@@ -3286,6 +3426,8 @@ fuse_release(xlator_t *this, fuse_in_header_t *finh, void *msg,
gf_log("glusterfs-fuse", GF_LOG_TRACE,
"finh->unique: %" PRIu64 ": RELEASE %p", finh->unique, state->fd);
+ fd_close(state->fd);
+
fuse_fd_ctx_destroy(this, state->fd);
fd_unref(fd);
@@ -4010,7 +4152,9 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg,
gf_log("fuse", GF_LOG_TRACE, "got request to invalidate %" PRIu64,
finh->nodeid);
#if FUSE_KERNEL_MINOR_VERSION >= 11
- fuse_invalidate_entry(this, finh->nodeid);
+ ret = fuse_invalidate_entry(this, finh->nodeid);
+ if (ret)
+ op_errno = EBUSY;
#endif
goto done;
}
@@ -4051,6 +4195,7 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg,
"%" PRIu64 ": SETXATTR value allocation failed",
finh->unique);
op_errno = ENOMEM;
+ GF_FREE(newkey);
goto done;
}
memcpy(dict_value, value, fsi->size);
@@ -4592,12 +4737,10 @@ fuse_setlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
fuse_state_t *state = NULL;
int ret = 0;
- ret = fuse_interrupt_finish_fop(frame, this, _gf_false, (void **)&state);
- if (state) {
- GF_FREE(state->name);
- dict_unref(state->xdata);
- GF_FREE(state);
- }
+ ret = fuse_interrupt_finish_fop(frame, this, _gf_true, (void **)&state);
+ GF_FREE(state->name);
+ dict_unref(state->xdata);
+ GF_FREE(state);
if (ret) {
return 0;
}
@@ -4653,28 +4796,12 @@ fuse_setlk_interrupt_handler_cbk(call_frame_t *frame, void *cookie,
int32_t op_errno, dict_t *dict, dict_t *xdata)
{
fuse_interrupt_state_t intstat = INTERRUPT_NONE;
- fuse_interrupt_record_t *fir;
- fuse_state_t *state = NULL;
- int ret = 0;
-
- ret = dict_get_bin(xdata, "fuse-interrupt-record", (void **)&fir);
- if (ret < 0) {
- gf_log("glusterfs-fuse", GF_LOG_ERROR, "interrupt record not found");
-
- goto out;
- }
+ fuse_interrupt_record_t *fir = cookie;
intstat = op_ret >= 0 ? INTERRUPT_HANDLED : INTERRUPT_SQUELCHED;
- fuse_interrupt_finish_interrupt(this, fir, intstat, _gf_false,
- (void **)&state);
- if (state) {
- GF_FREE(state->name);
- dict_unref(state->xdata);
- GF_FREE(state);
- }
+ fuse_interrupt_finish_interrupt(this, fir, intstat, _gf_true, NULL);
-out:
STACK_DESTROY(frame->root);
return 0;
@@ -4712,9 +4839,10 @@ fuse_setlk_interrupt_handler(xlator_t *this, fuse_interrupt_record_t *fir)
frame->op = GF_FOP_GETXATTR;
state->name = xattr_name;
- STACK_WIND(frame, fuse_setlk_interrupt_handler_cbk, state->active_subvol,
- state->active_subvol->fops->fgetxattr, state->fd, xattr_name,
- state->xdata);
+ STACK_WIND_COOKIE(frame, fuse_setlk_interrupt_handler_cbk, fir,
+ state->active_subvol,
+ state->active_subvol->fops->fgetxattr, state->fd,
+ xattr_name, state->xdata);
return;
@@ -4737,15 +4865,9 @@ fuse_setlk_resume(fuse_state_t *state)
fir = fuse_interrupt_record_new(state->finh, fuse_setlk_interrupt_handler);
state_clone = gf_memdup(state, sizeof(*state));
if (state_clone) {
- /*
- * Calling this allocator with fir casted to (char *) seems like
- * an abuse of this API, but in fact the API is stupid to assume
- * a (char *) argument (in the funcion it's casted to (void *)
- * anyway).
- */
- state_clone->xdata = dict_for_key_value(
- "fuse-interrupt-record", (char *)fir, sizeof(*fir), _gf_true);
+ state_clone->xdata = dict_new();
}
+
if (!fir || !state_clone || !state_clone->xdata) {
if (fir) {
GF_FREE(fir);
@@ -4803,7 +4925,7 @@ fuse_setlk(xlator_t *this, fuse_in_header_t *finh, void *msg,
return;
}
-#if FUSE_KERNEL_MINOR_VERSION >= 11
+#if FUSE_KERNEL_MINOR_VERSION >= 11 && defined(HAVE_FUSE_NOTIFICATIONS)
static void *
notify_kernel_loop(void *data)
{
@@ -4832,6 +4954,7 @@ notify_kernel_loop(void *data)
fuse_invalidate_node_t, next);
list_del_init(&node->next);
+ priv->invalidate_count--;
}
pthread_mutex_unlock(&priv->invalidate_mutex);
@@ -4852,7 +4975,7 @@ notify_kernel_loop(void *data)
iov_out.iov_base = node->inval_buf;
iov_out.iov_len = len;
rv = sys_writev(priv->fd, &iov_out, 1);
- check_and_dump_fuse_W(priv, &iov_out, 1, rv);
+ check_and_dump_fuse_W(priv, &iov_out, 1, rv, node->errnomask);
GF_FREE(node);
@@ -4875,6 +4998,7 @@ notify_kernel_loop(void *data)
list_del_init(&node->next);
GF_FREE(node);
}
+ priv->invalidate_count = 0;
}
pthread_mutex_unlock(&priv->invalidate_mutex);
@@ -4943,7 +5067,7 @@ timed_response_loop(void *data)
iovs[1] = (struct iovec){dmsg->fuse_message_body,
len - sizeof(struct fuse_out_header)};
rv = sys_writev(priv->fd, iovs, 2);
- check_and_dump_fuse_W(priv, iovs, 2, rv);
+ check_and_dump_fuse_W(priv, iovs, 2, rv, dmsg->errnomask);
fuse_timed_message_free(dmsg);
@@ -5048,6 +5172,7 @@ fuse_init(xlator_t *this, fuse_in_header_t *finh, void *msg,
priv->timed_response_fuse_thread_started = _gf_true;
/* Used for 'reverse invalidation of inode' */
+#ifdef HAVE_FUSE_NOTIFICATIONS
if (fini->minor >= 12) {
ret = gf_thread_create(&messenger, NULL, notify_kernel_loop, this,
"fusenoti");
@@ -5059,7 +5184,9 @@ fuse_init(xlator_t *this, fuse_in_header_t *finh, void *msg,
goto out;
}
priv->reverse_fuse_thread_started = _gf_true;
- } else {
+ } else
+#endif
+ {
/*
* FUSE minor < 12 does not implement invalidate notifications.
* This mechanism is required for fopen-keep-cache to operate
@@ -5202,7 +5329,7 @@ fuse_first_lookup(xlator_t *this)
};
xlator_t *xl = NULL;
dict_t *dict = NULL;
- uuid_t gfid;
+ static uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
int ret = -1;
struct iatt iatt = {
0,
@@ -5220,8 +5347,6 @@ fuse_first_lookup(xlator_t *this)
xl = priv->active_subvol;
- memset(gfid, 0, 16);
- gfid[15] = 1;
ret = dict_set_gfuuid(dict, "gfid-req", gfid, true);
if (ret) {
gf_log(xl->name, GF_LOG_ERROR, "failed to set 'gfid-req'");
@@ -5488,6 +5613,7 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol,
char create_in_progress = 0;
fuse_fd_ctx_t *basefd_ctx = NULL;
fd_t *oldfd = NULL;
+ dict_t *xdata = NULL;
basefd_ctx = fuse_fd_ctx_get(this, basefd);
GF_VALIDATE_OR_GOTO("glusterfs-fuse", basefd_ctx, out);
@@ -5524,10 +5650,23 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol,
}
if (oldfd->inode->table->xl == old_subvol) {
- if (IA_ISDIR(oldfd->inode->ia_type))
+ if (IA_ISDIR(oldfd->inode->ia_type)) {
ret = syncop_fsyncdir(old_subvol, oldfd, 0, NULL, NULL);
- else
- ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, NULL, NULL);
+ } else {
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_log("glusterfs-fuse", GF_LOG_WARNING,
+ "last-fsync set failed (%s) on fd (%p)"
+ "(basefd:%p basefd-inode.gfid:%s) "
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)",
+ strerror(ENOMEM), oldfd, basefd,
+ uuid_utoa(basefd->inode->gfid), old_subvol->name,
+ old_subvol->graph->id, new_subvol->name,
+ new_subvol->graph->id);
+ }
+
+ ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, xdata, NULL);
+ }
if (ret < 0) {
gf_log("glusterfs-fuse", GF_LOG_WARNING,
@@ -5582,6 +5721,9 @@ out:
fd_unref(oldfd);
+ if (xdata)
+ dict_unref(xdata);
+
return ret;
}
@@ -5757,7 +5899,9 @@ fuse_graph_sync(xlator_t *this)
new_graph_id = priv->next_graph->id;
priv->next_graph = NULL;
need_first_lookup = 1;
- priv->handle_graph_switch = _gf_true;
+ if (old_subvol) {
+ priv->handle_graph_switch = _gf_true;
+ }
while (!priv->event_recvd) {
ret = pthread_cond_wait(&priv->sync_cond, &priv->sync_mutex);
@@ -5793,13 +5937,6 @@ unlock:
if (winds_on_old_subvol == 0) {
xlator_notify(old_subvol, GF_EVENT_PARENT_DOWN, old_subvol, NULL);
}
- } else {
- pthread_mutex_lock(&priv->sync_mutex);
- {
- priv->handle_graph_switch = _gf_false;
- pthread_cond_broadcast(&priv->migrate_cond);
- }
- pthread_mutex_unlock(&priv->sync_mutex);
}
return 0;
@@ -5976,6 +6113,16 @@ fuse_thread_proc(void *data)
"glusterfs-fuse: read from "
"/dev/fuse returned -1 (%s)",
strerror(errno));
+ if (errno == EPERM) {
+ /*
+ * sleep a while to avoid busy looping
+ * on EPERM condition
+ */
+ nanosleep(
+ &(struct timespec){0,
+ priv->fuse_dev_eperm_ratelimit_ns},
+ NULL);
+ }
}
goto cont_err;
@@ -6140,6 +6287,9 @@ fuse_priv_dump(xlator_t *this)
(int)private->timed_response_fuse_thread_started);
gf_proc_dump_write("reverse_thread_started", "%d",
(int)private->reverse_fuse_thread_started);
+ gf_proc_dump_write("invalidate_limit", "%u", private->invalidate_limit);
+ gf_proc_dump_write("invalidate_queue_length", "%" PRIu64,
+ private->invalidate_count);
gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp);
return 0;
@@ -6173,14 +6323,12 @@ out:
int
dump_history_fuse(circular_buffer_t *cb, void *data)
{
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
- gf_time_fmt(timestr, sizeof timestr, cb->tv.tv_sec, gf_timefmt_F_HMS);
+ gf_time_fmt_tv(timestr, sizeof timestr, &cb->tv, gf_timefmt_F_HMS);
- snprintf(timestr + strlen(timestr), 256 - strlen(timestr),
- ".%" GF_PRI_SUSECONDS, cb->tv.tv_usec);
gf_proc_dump_write("TIME", "%s", timestr);
gf_proc_dump_write("message", "%s\n", (char *)cb->data);
@@ -6263,6 +6411,7 @@ notify(xlator_t *this, int32_t event, void *data, ...)
fuse_private_t *private = NULL;
gf_boolean_t start_thread = _gf_false;
glusterfs_graph_t *graph = NULL;
+ struct pollfd pfd = {0};
private
= this->private;
@@ -6330,6 +6479,32 @@ notify(xlator_t *this, int32_t event, void *data, ...)
/* Authentication failure is an error and glusterfs should stop */
gf_log(this->name, GF_LOG_ERROR,
"Server authenication failed. Shutting down.");
+ pthread_mutex_lock(&private->sync_mutex);
+ {
+ /*Wait for mount to finish*/
+ if (!private->mount_finished) {
+ pfd.fd = private->status_pipe[0];
+ pfd.events = POLLIN | POLLHUP | POLLERR;
+ if (poll(&pfd, 1, -1) < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "poll error %s",
+ strerror(errno));
+ goto auth_fail_unlock;
+ }
+ if (pfd.revents & POLLIN) {
+ if (fuse_get_mount_status(this) != 0) {
+ goto auth_fail_unlock;
+ }
+ private
+ ->mount_finished = _gf_true;
+ } else if (pfd.revents) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "mount pipe closed without status");
+ goto auth_fail_unlock;
+ }
+ }
+ }
+ auth_fail_unlock:
+ pthread_mutex_unlock(&private->sync_mutex);
fini(this);
break;
}
@@ -6533,6 +6708,8 @@ init(xlator_t *this_xl)
INIT_LIST_HEAD(&priv->interrupt_list);
pthread_mutex_init(&priv->interrupt_mutex, NULL);
+ pthread_mutex_init(&priv->fusedev_errno_cnt_mutex, NULL);
+
/* get options from option dictionary */
ret = dict_get_str(options, ZR_MOUNTPOINT_OPT, &value_string);
if (ret == -1 || value_string == NULL) {
@@ -6621,12 +6798,18 @@ init(xlator_t *this_xl)
ret = dict_get_str(options, "dump-fuse", &value_string);
if (ret == 0) {
ret = sys_unlink(value_string);
- if (ret != -1 || errno == ENOENT)
- ret = open(value_string, O_RDWR | O_CREAT | O_EXCL,
- S_IRUSR | S_IWUSR);
+ if (ret == -1 && errno != ENOENT) {
+ gf_log("glusterfs-fuse", GF_LOG_ERROR,
+ "failed to remove old fuse dump file %s: %s", value_string,
+ strerror(errno));
+
+ goto cleanup_exit;
+ }
+ ret = open(value_string, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
if (ret == -1) {
gf_log("glusterfs-fuse", GF_LOG_ERROR,
- "cannot open fuse dump file %s", value_string);
+ "failed to open fuse dump file %s: %s", value_string,
+ strerror(errno));
goto cleanup_exit;
}
@@ -6668,7 +6851,7 @@ init(xlator_t *this_xl)
GF_OPTION_INIT("no-root-squash", priv->no_root_squash, bool, cleanup_exit);
/* change the client_pid to no-root-squash pid only if the
- client is none of defrag process, hadoop access and gsyncd process.
+ client is neither defrag process or gsyncd process.
*/
if (!priv->client_pid_set) {
if (priv->no_root_squash == _gf_true) {
@@ -6679,6 +6862,9 @@ init(xlator_t *this_xl)
GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit);
+ GF_OPTION_INIT("invalidate-limit", priv->invalidate_limit, uint32,
+ cleanup_exit);
+
GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit);
GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit);
@@ -6692,6 +6878,9 @@ init(xlator_t *this_xl)
GF_OPTION_INIT("flush-handle-interrupt", priv->flush_handle_interrupt, bool,
cleanup_exit);
+ GF_OPTION_INIT("fuse-dev-eperm-ratelimit-ns",
+ priv->fuse_dev_eperm_ratelimit_ns, uint32, cleanup_exit);
+
/* user has set only background-qlen, not congestion-threshold,
use the fuse kernel driver formula to set congestion. ie, 75% */
if (dict_get(this_xl->options, "background-qlen") &&
@@ -7009,12 +7198,21 @@ struct volume_options options[] = {
{
.key = {"lru-limit"},
.type = GF_OPTION_TYPE_INT,
- .default_value = "131072",
+ .default_value = "65536",
.min = 0,
.description = "makes glusterfs invalidate kernel inodes after "
"reaching this limit (0 means 'unlimited')",
},
{
+ .key = {"invalidate-limit"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0",
+ .min = 0,
+ .description = "suspend invalidations as of 'lru-limit' if the number "
+ "of outstanding invalidations reaches this limit "
+ "(0 means 'unlimited')",
+ },
+ {
.key = {"auto-invalidation"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "true",
@@ -7023,6 +7221,15 @@ struct volume_options options[] = {
"if same files/directories are not accessed across "
"two different mounts concurrently",
},
+ {
+ .key = {"fuse-dev-eperm-ratelimit-ns"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "10000000", /* 0.01 sec */
+ .min = 0,
+ .max = 1000000000,
+ .description = "Rate limit reading from fuse device upon EPERM "
+ "failure.",
+ },
{.key = {NULL}},
};