summaryrefslogtreecommitdiffstats
path: root/xlators/mount/fuse/src
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/mount/fuse/src')
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c423
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h58
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c110
3 files changed, 407 insertions, 184 deletions
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 11b38f66028..0e22fe411ee 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -8,6 +8,8 @@
cases as published by the Free Software Foundation.
*/
+#include <config.h>
+
#include <sys/wait.h>
#include "fuse-bridge.h"
#include <glusterfs/glusterfs.h>
@@ -33,7 +35,7 @@ static int gf_fuse_xattr_enotsup_log;
void
fini(xlator_t *this_xl);
-static void
+static int32_t
fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino);
/*
@@ -177,7 +179,7 @@ fusedump_gettime(struct fusedump_timespec *fts)
0,
};
- clock_gettime(CLOCK_REALTIME, &ts);
+ timespec_now_realtime(&ts);
fts->sec = ts.tv_sec;
fts->nsec = ts.tv_nsec;
@@ -205,7 +207,7 @@ fusedump_setup_meta(struct iovec *iovs, char *dir,
static int
check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count,
- ssize_t res)
+ ssize_t res, errnomask_t errnomask)
{
char w = 'W';
struct iovec diov[4] = {
@@ -223,9 +225,92 @@ check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count,
struct fuse_out_header *fouh = NULL;
if (res == -1) {
- gf_log_callingfn("glusterfs-fuse", GF_LOG_ERROR,
- "writing to fuse device failed: %s", strerror(errno));
+ const char *errdesc = NULL;
+ gf_loglevel_t loglevel = GF_LOG_ERROR;
+ gf_boolean_t errno_degraded = _gf_false;
+ gf_boolean_t errno_promoted = _gf_false;
+
+#define ACCOUNT_ERRNO(eno) \
+ do { \
+ if (errno_degraded) { \
+ pthread_mutex_lock(&priv->fusedev_errno_cnt_mutex); \
+ { \
+ if (!++priv->fusedev_errno_cnt[FUSEDEV_##eno]) \
+ errno_promoted = _gf_true; \
+ } \
+ pthread_mutex_unlock(&priv->fusedev_errno_cnt_mutex); \
+ } \
+ } while (0)
+
+ /* If caller masked the errno, then it
+ * does not indicate an error at the application
+ * level, so we degrade the log severity to DEBUG.
+ */
+ if (errnomask && errno < ERRNOMASK_MAX &&
+ GET_ERRNO_MASK(errnomask, errno)) {
+ loglevel = GF_LOG_DEBUG;
+ errno_degraded = _gf_true;
+ }
+
+ switch (errno) {
+ /* The listed errnos are FUSE status indicators,
+ * not legit values according to POSIX (see write(3p)),
+ * so resolving them according to the standard
+ * POSIX interpretation would be misleading.
+ */
+ case ENOENT:
+ errdesc = "ENOENT";
+ ACCOUNT_ERRNO(ENOENT);
+ break;
+ case ENOTDIR:
+ errdesc = "ENOTDIR";
+ ACCOUNT_ERRNO(ENOTDIR);
+ break;
+ case ENODEV:
+ errdesc = "ENODEV";
+ ACCOUNT_ERRNO(ENODEV);
+ break;
+ case EPERM:
+ errdesc = "EPERM";
+ ACCOUNT_ERRNO(EPERM);
+ break;
+ case ENOMEM:
+ errdesc = "ENOMEM";
+ ACCOUNT_ERRNO(ENOMEM);
+ break;
+ case ENOTCONN:
+ errdesc = "ENOTCONN";
+ ACCOUNT_ERRNO(ENOTCONN);
+ break;
+ case ECONNREFUSED:
+ errdesc = "ECONNREFUSED";
+ ACCOUNT_ERRNO(ECONNREFUSED);
+ break;
+ case EOVERFLOW:
+ errdesc = "EOVERFLOW";
+ ACCOUNT_ERRNO(EOVERFLOW);
+ break;
+ case EBUSY:
+ errdesc = "EBUSY";
+ ACCOUNT_ERRNO(EBUSY);
+ break;
+ case ENOTEMPTY:
+ errdesc = "ENOTEMPTY";
+ ACCOUNT_ERRNO(ENOTEMPTY);
+ break;
+ default:
+ errdesc = strerror(errno);
+ }
+
+ gf_log_callingfn("glusterfs-fuse", loglevel,
+ "writing to fuse device failed: %s", errdesc);
+ if (errno_promoted)
+ gf_log("glusterfs-fuse", GF_LOG_WARNING,
+ "writing to fuse device yielded %s %d times", errdesc,
+ UINT8_MAX + 1);
return errno;
+
+#undef ACCOUNT_ERRNO
}
fouh = iov_out[0].iov_base;
@@ -289,7 +374,7 @@ send_fuse_iov(xlator_t *this, fuse_in_header_t *finh, struct iovec *iov_out,
gf_log("glusterfs-fuse", GF_LOG_TRACE, "writev() result %d/%d %s", res,
fouh->len, res == -1 ? strerror(errno) : "");
- return check_and_dump_fuse_W(priv, iov_out, count, res);
+ return check_and_dump_fuse_W(priv, iov_out, count, res, NULL);
}
static int
@@ -319,10 +404,10 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)
#define send_fuse_obj(this, finh, obj) \
send_fuse_data(this, finh, obj, sizeof(*(obj)))
-static void
+static int32_t
fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
{
-#if FUSE_KERNEL_MINOR_VERSION >= 11
+#if (FUSE_KERNEL_MINOR_VERSION >= 11 && defined(HAVE_FUSE_NOTIFICATIONS))
struct fuse_out_header *fouh = NULL;
struct fuse_notify_inval_entry_out *fnieo = NULL;
fuse_private_t *priv = NULL;
@@ -335,17 +420,22 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
priv = this->private;
if (!priv->reverse_fuse_thread_started)
- return;
+ return -1;
+
+ if (priv->invalidate_limit &&
+ (priv->invalidate_count >= priv->invalidate_limit)) {
+ return -1;
+ }
inode = (inode_t *)(unsigned long)fuse_ino;
if (inode == NULL)
- return;
+ return -1;
list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list)
{
node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
if (node == NULL)
- break;
+ return -1;
INIT_LIST_HEAD(&node->next);
@@ -355,6 +445,15 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
fouh->unique = 0;
fouh->error = FUSE_NOTIFY_INVAL_ENTRY;
+ if (ENOENT < ERRNOMASK_MAX)
+ MASK_ERRNO(node->errnomask, ENOENT);
+ if (ENOTDIR < ERRNOMASK_MAX)
+ MASK_ERRNO(node->errnomask, ENOTDIR);
+ if (EBUSY < ERRNOMASK_MAX)
+ MASK_ERRNO(node->errnomask, EBUSY);
+ if (ENOTEMPTY < ERRNOMASK_MAX)
+ MASK_ERRNO(node->errnomask, ENOTEMPTY);
+
if (dentry->name) {
nlen = strlen(dentry->name);
fouh->len = sizeof(*fouh) + sizeof(*fnieo) + nlen + 1;
@@ -382,23 +481,24 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino)
pthread_mutex_lock(&priv->invalidate_mutex);
{
list_add_tail(&node->next, &priv->invalidate_list);
+ priv->invalidate_count++;
pthread_cond_signal(&priv->invalidate_cond);
}
pthread_mutex_unlock(&priv->invalidate_mutex);
}
#endif
- return;
+ return 0;
}
/*
* Send an inval inode notification to fuse. This causes an invalidation of the
* entire page cache mapping on the inode.
*/
-static void
+static int32_t
fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
{
-#if FUSE_KERNEL_MINOR_VERSION >= 11
+#if (FUSE_KERNEL_MINOR_VERSION >= 11 && defined(HAVE_FUSE_NOTIFICATIONS))
struct fuse_out_header *fouh = NULL;
struct fuse_notify_inval_inode_out *fniio = NULL;
fuse_private_t *priv = NULL;
@@ -408,15 +508,20 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
priv = this->private;
if (!priv->reverse_fuse_thread_started)
- return;
+ return -1;
+
+ if (priv->invalidate_limit &&
+ (priv->invalidate_count >= priv->invalidate_limit)) {
+ return -1;
+ }
inode = (inode_t *)(unsigned long)fuse_ino;
if (inode == NULL)
- return;
+ return -1;
node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t);
if (node == NULL)
- return;
+ return -1;
INIT_LIST_HEAD(&node->next);
@@ -433,6 +538,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
fniio->off = 0;
fniio->len = -1;
+ if (ENOENT < ERRNOMASK_MAX)
+ MASK_ERRNO(node->errnomask, ENOENT);
+
fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino,
uuid_utoa(inode->gfid));
gf_log("glusterfs-fuse", GF_LOG_TRACE,
@@ -442,6 +550,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
pthread_mutex_lock(&priv->invalidate_mutex);
{
list_add_tail(&node->next, &priv->invalidate_list);
+ priv->invalidate_count++;
pthread_cond_signal(&priv->invalidate_cond);
}
pthread_mutex_unlock(&priv->invalidate_mutex);
@@ -450,7 +559,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
gf_log("glusterfs-fuse", GF_LOG_WARNING,
"fuse_invalidate_inode not implemented on this system");
#endif
- return;
+ return 0;
}
#if FUSE_KERNEL_MINOR_VERSION >= 11
@@ -458,8 +567,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
static int32_t
fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode)
{
- fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
- return 0;
+ int32_t ret = 0;
+ ret = fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode);
+ return ret;
}
#endif
@@ -476,6 +586,7 @@ fuse_timed_message_new(void)
/* should be NULL if not set */
dmsg->fuse_message_body = NULL;
INIT_LIST_HEAD(&dmsg->next);
+ memset(dmsg->errnomask, 0, sizeof(dmsg->errnomask));
return dmsg;
}
@@ -674,6 +785,8 @@ fuse_interrupt(xlator_t *this, fuse_in_header_t *finh, void *msg,
dmsg->fuse_out_header.unique = finh->unique;
dmsg->fuse_out_header.len = sizeof(dmsg->fuse_out_header);
dmsg->fuse_out_header.error = -EAGAIN;
+ if (ENOENT < ERRNOMASK_MAX)
+ MASK_ERRNO(dmsg->errnomask, ENOENT);
timespec_now(&dmsg->scheduled_ts);
timespec_adjust_delta(&dmsg->scheduled_ts,
(struct timespec){0, 10000000});
@@ -720,18 +833,22 @@ fuse_interrupt_finish_fop(call_frame_t *frame, xlator_t *this,
{
intstat_orig = fir->interrupt_state;
if (fir->interrupt_state == INTERRUPT_NONE) {
- fir->interrupt_state = INTERRUPT_SQUELCHED;
if (sync) {
- while (fir->interrupt_state == INTERRUPT_NONE) {
+ fir->interrupt_state = INTERRUPT_WAITING_HANDLER;
+ while (fir->interrupt_state != INTERRUPT_SQUELCHED) {
pthread_cond_wait(&fir->handler_cond,
&fir->handler_mutex);
}
- }
+ } else
+ fir->interrupt_state = INTERRUPT_SQUELCHED;
}
}
pthread_mutex_unlock(&fir->handler_mutex);
}
+ GF_ASSERT(intstat_orig == INTERRUPT_NONE ||
+ intstat_orig == INTERRUPT_HANDLED ||
+ intstat_orig == INTERRUPT_SQUELCHED);
gf_log("glusterfs-fuse", GF_LOG_DEBUG, "intstat_orig=%d", intstat_orig);
/*
@@ -781,19 +898,29 @@ fuse_interrupt_finish_interrupt(xlator_t *this, fuse_interrupt_record_t *fir,
};
fuse_interrupt_state_t intstat_orig = INTERRUPT_NONE;
+ GF_ASSERT(intstat == INTERRUPT_HANDLED || intstat == INTERRUPT_SQUELCHED);
+
pthread_mutex_lock(&fir->handler_mutex);
{
intstat_orig = fir->interrupt_state;
- if (fir->interrupt_state == INTERRUPT_NONE) {
- fir->interrupt_state = intstat;
- if (sync) {
+ switch (intstat_orig) {
+ case INTERRUPT_NONE:
+ fir->interrupt_state = intstat;
+ break;
+ case INTERRUPT_WAITING_HANDLER:
+ fir->interrupt_state = INTERRUPT_SQUELCHED;
pthread_cond_signal(&fir->handler_cond);
- }
+ break;
+ default:
+ break;
}
finh = fir->fuse_in_header;
}
pthread_mutex_unlock(&fir->handler_mutex);
+ GF_ASSERT(intstat_orig == INTERRUPT_NONE ||
+ (sync && intstat_orig == INTERRUPT_WAITING_HANDLER) ||
+ (!sync && intstat_orig == INTERRUPT_SQUELCHED));
gf_log("glusterfs-fuse", GF_LOG_DEBUG, "intstat_orig=%d", intstat_orig);
/*
@@ -1634,6 +1761,14 @@ fattr_to_gf_set_attr(int32_t valid)
gf_valid |= GF_SET_ATTR_CTIME;
#endif
+#if FUSE_KERNEL_MINOR_VERSION >= 9
+ if (valid & FATTR_ATIME_NOW)
+ gf_valid |= GF_ATTR_ATIME_NOW;
+
+ if (valid & FATTR_MTIME_NOW)
+ gf_valid |= GF_ATTR_MTIME_NOW;
+#endif
+
if (valid & FATTR_SIZE)
gf_valid |= GF_SET_ATTR_SIZE;
@@ -2098,7 +2233,6 @@ fuse_mknod(xlator_t *this, fuse_in_header_t *finh, void *msg,
fuse_state_t *state = NULL;
#if FUSE_KERNEL_MINOR_VERSION >= 12
fuse_private_t *priv = NULL;
- int32_t ret = -1;
priv = this->private;
if (priv->proto_minor < 12)
@@ -2170,7 +2304,6 @@ fuse_mkdir(xlator_t *this, fuse_in_header_t *finh, void *msg,
char *name = (char *)(fmi + 1);
#if FUSE_KERNEL_MINOR_VERSION >= 12
fuse_private_t *priv = NULL;
- int32_t ret = -1;
#endif
fuse_state_t *state;
@@ -2333,21 +2466,26 @@ fuse_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
{
fuse_state_t *state = NULL;
fuse_in_header_t *finh = NULL;
+ char loc_uuid_str[64] = {0}, loc2_uuid_str[64] = {0};
state = frame->root->state;
finh = state->finh;
- fuse_log_eh(this,
- "op_ret: %d, op_errno: %d, %" PRIu64
- ": %s() "
- "path: %s parent: %s ==> path: %s parent: %s"
- "gfid: %s",
- op_ret, op_errno, frame->root->unique,
- gf_fop_list[frame->root->op], state->loc.path,
- state->loc.parent ? uuid_utoa(state->loc.parent->gfid) : "",
- state->loc2.path,
- state->loc2.parent ? uuid_utoa(state->loc2.parent->gfid) : "",
- state->loc.inode ? uuid_utoa(state->loc.inode->gfid) : "");
+ fuse_log_eh(
+ this,
+ "op_ret: %d, op_errno: %d, %" PRIu64
+ ": %s() "
+ "path: %s parent: %s ==> path: %s parent: %s"
+ "gfid: %s",
+ op_ret, op_errno, frame->root->unique, gf_fop_list[frame->root->op],
+ state->loc.path,
+ (state->loc.parent ? uuid_utoa_r(state->loc.parent->gfid, loc_uuid_str)
+ : ""),
+ state->loc2.path,
+ (state->loc2.parent
+ ? uuid_utoa_r(state->loc2.parent->gfid, loc2_uuid_str)
+ : ""),
+ state->loc.inode ? uuid_utoa(state->loc.inode->gfid) : "");
/* need to check for loc->parent to keep clang-scan happy.
It gets dereferenced below, and is checked for NULL above. */
@@ -2691,7 +2829,6 @@ fuse_create(xlator_t *this, fuse_in_header_t *finh, void *msg,
#if FUSE_KERNEL_MINOR_VERSION >= 12
struct fuse_create_in *fci = msg;
fuse_private_t *priv = NULL;
- int32_t ret = -1;
#else
struct fuse_open_in *fci = msg;
#endif
@@ -3080,15 +3217,18 @@ fuse_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
void
fuse_copy_file_range_resume(fuse_state_t *state)
{
+ char fd_uuid_str[64] = {0}, fd_dst_uuid_str[64] = {0};
+
gf_log("glusterfs-fuse", GF_LOG_TRACE,
"%" PRIu64
": COPY_FILE_RANGE "
"(input fd: %p (gfid: %s), "
"output fd: %p (gfid: %s) size=%zu, "
"offset_in=%" PRIu64 ", offset_out=%" PRIu64 ")",
- state->finh->unique, state->fd, uuid_utoa(state->fd->inode->gfid),
- state->fd_dst, uuid_utoa(state->fd_dst->inode->gfid), state->size,
- state->off_in, state->off_out);
+ state->finh->unique, state->fd,
+ uuid_utoa_r(state->fd->inode->gfid, fd_uuid_str), state->fd_dst,
+ uuid_utoa_r(state->fd_dst->inode->gfid, fd_dst_uuid_str),
+ state->size, state->off_in, state->off_out);
FUSE_FOP(state, fuse_copy_file_range_cbk, GF_FOP_COPY_FILE_RANGE,
copy_file_range, state->fd, state->off_in, state->fd_dst,
@@ -3286,6 +3426,8 @@ fuse_release(xlator_t *this, fuse_in_header_t *finh, void *msg,
gf_log("glusterfs-fuse", GF_LOG_TRACE,
"finh->unique: %" PRIu64 ": RELEASE %p", finh->unique, state->fd);
+ fd_close(state->fd);
+
fuse_fd_ctx_destroy(this, state->fd);
fd_unref(fd);
@@ -4010,7 +4152,9 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg,
gf_log("fuse", GF_LOG_TRACE, "got request to invalidate %" PRIu64,
finh->nodeid);
#if FUSE_KERNEL_MINOR_VERSION >= 11
- fuse_invalidate_entry(this, finh->nodeid);
+ ret = fuse_invalidate_entry(this, finh->nodeid);
+ if (ret)
+ op_errno = EBUSY;
#endif
goto done;
}
@@ -4051,6 +4195,7 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg,
"%" PRIu64 ": SETXATTR value allocation failed",
finh->unique);
op_errno = ENOMEM;
+ GF_FREE(newkey);
goto done;
}
memcpy(dict_value, value, fsi->size);
@@ -4592,12 +4737,10 @@ fuse_setlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
fuse_state_t *state = NULL;
int ret = 0;
- ret = fuse_interrupt_finish_fop(frame, this, _gf_false, (void **)&state);
- if (state) {
- GF_FREE(state->name);
- dict_unref(state->xdata);
- GF_FREE(state);
- }
+ ret = fuse_interrupt_finish_fop(frame, this, _gf_true, (void **)&state);
+ GF_FREE(state->name);
+ dict_unref(state->xdata);
+ GF_FREE(state);
if (ret) {
return 0;
}
@@ -4653,28 +4796,12 @@ fuse_setlk_interrupt_handler_cbk(call_frame_t *frame, void *cookie,
int32_t op_errno, dict_t *dict, dict_t *xdata)
{
fuse_interrupt_state_t intstat = INTERRUPT_NONE;
- fuse_interrupt_record_t *fir;
- fuse_state_t *state = NULL;
- int ret = 0;
-
- ret = dict_get_bin(xdata, "fuse-interrupt-record", (void **)&fir);
- if (ret < 0) {
- gf_log("glusterfs-fuse", GF_LOG_ERROR, "interrupt record not found");
-
- goto out;
- }
+ fuse_interrupt_record_t *fir = cookie;
intstat = op_ret >= 0 ? INTERRUPT_HANDLED : INTERRUPT_SQUELCHED;
- fuse_interrupt_finish_interrupt(this, fir, intstat, _gf_false,
- (void **)&state);
- if (state) {
- GF_FREE(state->name);
- dict_unref(state->xdata);
- GF_FREE(state);
- }
+ fuse_interrupt_finish_interrupt(this, fir, intstat, _gf_true, NULL);
-out:
STACK_DESTROY(frame->root);
return 0;
@@ -4712,9 +4839,10 @@ fuse_setlk_interrupt_handler(xlator_t *this, fuse_interrupt_record_t *fir)
frame->op = GF_FOP_GETXATTR;
state->name = xattr_name;
- STACK_WIND(frame, fuse_setlk_interrupt_handler_cbk, state->active_subvol,
- state->active_subvol->fops->fgetxattr, state->fd, xattr_name,
- state->xdata);
+ STACK_WIND_COOKIE(frame, fuse_setlk_interrupt_handler_cbk, fir,
+ state->active_subvol,
+ state->active_subvol->fops->fgetxattr, state->fd,
+ xattr_name, state->xdata);
return;
@@ -4737,15 +4865,9 @@ fuse_setlk_resume(fuse_state_t *state)
fir = fuse_interrupt_record_new(state->finh, fuse_setlk_interrupt_handler);
state_clone = gf_memdup(state, sizeof(*state));
if (state_clone) {
- /*
- * Calling this allocator with fir casted to (char *) seems like
- * an abuse of this API, but in fact the API is stupid to assume
- * a (char *) argument (in the funcion it's casted to (void *)
- * anyway).
- */
- state_clone->xdata = dict_for_key_value(
- "fuse-interrupt-record", (char *)fir, sizeof(*fir), _gf_true);
+ state_clone->xdata = dict_new();
}
+
if (!fir || !state_clone || !state_clone->xdata) {
if (fir) {
GF_FREE(fir);
@@ -4803,7 +4925,7 @@ fuse_setlk(xlator_t *this, fuse_in_header_t *finh, void *msg,
return;
}
-#if FUSE_KERNEL_MINOR_VERSION >= 11
+#if FUSE_KERNEL_MINOR_VERSION >= 11 && defined(HAVE_FUSE_NOTIFICATIONS)
static void *
notify_kernel_loop(void *data)
{
@@ -4832,6 +4954,7 @@ notify_kernel_loop(void *data)
fuse_invalidate_node_t, next);
list_del_init(&node->next);
+ priv->invalidate_count--;
}
pthread_mutex_unlock(&priv->invalidate_mutex);
@@ -4852,7 +4975,7 @@ notify_kernel_loop(void *data)
iov_out.iov_base = node->inval_buf;
iov_out.iov_len = len;
rv = sys_writev(priv->fd, &iov_out, 1);
- check_and_dump_fuse_W(priv, &iov_out, 1, rv);
+ check_and_dump_fuse_W(priv, &iov_out, 1, rv, node->errnomask);
GF_FREE(node);
@@ -4875,6 +4998,7 @@ notify_kernel_loop(void *data)
list_del_init(&node->next);
GF_FREE(node);
}
+ priv->invalidate_count = 0;
}
pthread_mutex_unlock(&priv->invalidate_mutex);
@@ -4943,7 +5067,7 @@ timed_response_loop(void *data)
iovs[1] = (struct iovec){dmsg->fuse_message_body,
len - sizeof(struct fuse_out_header)};
rv = sys_writev(priv->fd, iovs, 2);
- check_and_dump_fuse_W(priv, iovs, 2, rv);
+ check_and_dump_fuse_W(priv, iovs, 2, rv, dmsg->errnomask);
fuse_timed_message_free(dmsg);
@@ -5048,6 +5172,7 @@ fuse_init(xlator_t *this, fuse_in_header_t *finh, void *msg,
priv->timed_response_fuse_thread_started = _gf_true;
/* Used for 'reverse invalidation of inode' */
+#ifdef HAVE_FUSE_NOTIFICATIONS
if (fini->minor >= 12) {
ret = gf_thread_create(&messenger, NULL, notify_kernel_loop, this,
"fusenoti");
@@ -5059,7 +5184,9 @@ fuse_init(xlator_t *this, fuse_in_header_t *finh, void *msg,
goto out;
}
priv->reverse_fuse_thread_started = _gf_true;
- } else {
+ } else
+#endif
+ {
/*
* FUSE minor < 12 does not implement invalidate notifications.
* This mechanism is required for fopen-keep-cache to operate
@@ -5202,7 +5329,7 @@ fuse_first_lookup(xlator_t *this)
};
xlator_t *xl = NULL;
dict_t *dict = NULL;
- uuid_t gfid;
+ static uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
int ret = -1;
struct iatt iatt = {
0,
@@ -5220,8 +5347,6 @@ fuse_first_lookup(xlator_t *this)
xl = priv->active_subvol;
- memset(gfid, 0, 16);
- gfid[15] = 1;
ret = dict_set_gfuuid(dict, "gfid-req", gfid, true);
if (ret) {
gf_log(xl->name, GF_LOG_ERROR, "failed to set 'gfid-req'");
@@ -5488,6 +5613,7 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol,
char create_in_progress = 0;
fuse_fd_ctx_t *basefd_ctx = NULL;
fd_t *oldfd = NULL;
+ dict_t *xdata = NULL;
basefd_ctx = fuse_fd_ctx_get(this, basefd);
GF_VALIDATE_OR_GOTO("glusterfs-fuse", basefd_ctx, out);
@@ -5524,10 +5650,23 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol,
}
if (oldfd->inode->table->xl == old_subvol) {
- if (IA_ISDIR(oldfd->inode->ia_type))
+ if (IA_ISDIR(oldfd->inode->ia_type)) {
ret = syncop_fsyncdir(old_subvol, oldfd, 0, NULL, NULL);
- else
- ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, NULL, NULL);
+ } else {
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_log("glusterfs-fuse", GF_LOG_WARNING,
+ "last-fsync set failed (%s) on fd (%p)"
+ "(basefd:%p basefd-inode.gfid:%s) "
+ "(old-subvolume:%s-%d new-subvolume:%s-%d)",
+ strerror(ENOMEM), oldfd, basefd,
+ uuid_utoa(basefd->inode->gfid), old_subvol->name,
+ old_subvol->graph->id, new_subvol->name,
+ new_subvol->graph->id);
+ }
+
+ ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, xdata, NULL);
+ }
if (ret < 0) {
gf_log("glusterfs-fuse", GF_LOG_WARNING,
@@ -5582,6 +5721,9 @@ out:
fd_unref(oldfd);
+ if (xdata)
+ dict_unref(xdata);
+
return ret;
}
@@ -5757,7 +5899,9 @@ fuse_graph_sync(xlator_t *this)
new_graph_id = priv->next_graph->id;
priv->next_graph = NULL;
need_first_lookup = 1;
- priv->handle_graph_switch = _gf_true;
+ if (old_subvol) {
+ priv->handle_graph_switch = _gf_true;
+ }
while (!priv->event_recvd) {
ret = pthread_cond_wait(&priv->sync_cond, &priv->sync_mutex);
@@ -5793,13 +5937,6 @@ unlock:
if (winds_on_old_subvol == 0) {
xlator_notify(old_subvol, GF_EVENT_PARENT_DOWN, old_subvol, NULL);
}
- } else {
- pthread_mutex_lock(&priv->sync_mutex);
- {
- priv->handle_graph_switch = _gf_false;
- pthread_cond_broadcast(&priv->migrate_cond);
- }
- pthread_mutex_unlock(&priv->sync_mutex);
}
return 0;
@@ -5976,6 +6113,16 @@ fuse_thread_proc(void *data)
"glusterfs-fuse: read from "
"/dev/fuse returned -1 (%s)",
strerror(errno));
+ if (errno == EPERM) {
+ /*
+ * sleep a while to avoid busy looping
+ * on EPERM condition
+ */
+ nanosleep(
+ &(struct timespec){0,
+ priv->fuse_dev_eperm_ratelimit_ns},
+ NULL);
+ }
}
goto cont_err;
@@ -6140,6 +6287,9 @@ fuse_priv_dump(xlator_t *this)
(int)private->timed_response_fuse_thread_started);
gf_proc_dump_write("reverse_thread_started", "%d",
(int)private->reverse_fuse_thread_started);
+ gf_proc_dump_write("invalidate_limit", "%u", private->invalidate_limit);
+ gf_proc_dump_write("invalidate_queue_length", "%" PRIu64,
+ private->invalidate_count);
gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp);
return 0;
@@ -6173,14 +6323,12 @@ out:
int
dump_history_fuse(circular_buffer_t *cb, void *data)
{
- char timestr[256] = {
+ char timestr[GF_TIMESTR_SIZE] = {
0,
};
- gf_time_fmt(timestr, sizeof timestr, cb->tv.tv_sec, gf_timefmt_F_HMS);
+ gf_time_fmt_tv(timestr, sizeof timestr, &cb->tv, gf_timefmt_F_HMS);
- snprintf(timestr + strlen(timestr), 256 - strlen(timestr),
- ".%" GF_PRI_SUSECONDS, cb->tv.tv_usec);
gf_proc_dump_write("TIME", "%s", timestr);
gf_proc_dump_write("message", "%s\n", (char *)cb->data);
@@ -6263,6 +6411,7 @@ notify(xlator_t *this, int32_t event, void *data, ...)
fuse_private_t *private = NULL;
gf_boolean_t start_thread = _gf_false;
glusterfs_graph_t *graph = NULL;
+ struct pollfd pfd = {0};
private
= this->private;
@@ -6330,6 +6479,32 @@ notify(xlator_t *this, int32_t event, void *data, ...)
/* Authentication failure is an error and glusterfs should stop */
gf_log(this->name, GF_LOG_ERROR,
"Server authenication failed. Shutting down.");
+ pthread_mutex_lock(&private->sync_mutex);
+ {
+ /*Wait for mount to finish*/
+ if (!private->mount_finished) {
+ pfd.fd = private->status_pipe[0];
+ pfd.events = POLLIN | POLLHUP | POLLERR;
+ if (poll(&pfd, 1, -1) < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "poll error %s",
+ strerror(errno));
+ goto auth_fail_unlock;
+ }
+ if (pfd.revents & POLLIN) {
+ if (fuse_get_mount_status(this) != 0) {
+ goto auth_fail_unlock;
+ }
+ private
+ ->mount_finished = _gf_true;
+ } else if (pfd.revents) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "mount pipe closed without status");
+ goto auth_fail_unlock;
+ }
+ }
+ }
+ auth_fail_unlock:
+ pthread_mutex_unlock(&private->sync_mutex);
fini(this);
break;
}
@@ -6533,6 +6708,8 @@ init(xlator_t *this_xl)
INIT_LIST_HEAD(&priv->interrupt_list);
pthread_mutex_init(&priv->interrupt_mutex, NULL);
+ pthread_mutex_init(&priv->fusedev_errno_cnt_mutex, NULL);
+
/* get options from option dictionary */
ret = dict_get_str(options, ZR_MOUNTPOINT_OPT, &value_string);
if (ret == -1 || value_string == NULL) {
@@ -6621,12 +6798,18 @@ init(xlator_t *this_xl)
ret = dict_get_str(options, "dump-fuse", &value_string);
if (ret == 0) {
ret = sys_unlink(value_string);
- if (ret != -1 || errno == ENOENT)
- ret = open(value_string, O_RDWR | O_CREAT | O_EXCL,
- S_IRUSR | S_IWUSR);
+ if (ret == -1 && errno != ENOENT) {
+ gf_log("glusterfs-fuse", GF_LOG_ERROR,
+ "failed to remove old fuse dump file %s: %s", value_string,
+ strerror(errno));
+
+ goto cleanup_exit;
+ }
+ ret = open(value_string, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR);
if (ret == -1) {
gf_log("glusterfs-fuse", GF_LOG_ERROR,
- "cannot open fuse dump file %s", value_string);
+ "failed to open fuse dump file %s: %s", value_string,
+ strerror(errno));
goto cleanup_exit;
}
@@ -6668,7 +6851,7 @@ init(xlator_t *this_xl)
GF_OPTION_INIT("no-root-squash", priv->no_root_squash, bool, cleanup_exit);
/* change the client_pid to no-root-squash pid only if the
- client is none of defrag process, hadoop access and gsyncd process.
+ client is neither defrag process or gsyncd process.
*/
if (!priv->client_pid_set) {
if (priv->no_root_squash == _gf_true) {
@@ -6679,6 +6862,9 @@ init(xlator_t *this_xl)
GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit);
+ GF_OPTION_INIT("invalidate-limit", priv->invalidate_limit, uint32,
+ cleanup_exit);
+
GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit);
GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit);
@@ -6692,6 +6878,9 @@ init(xlator_t *this_xl)
GF_OPTION_INIT("flush-handle-interrupt", priv->flush_handle_interrupt, bool,
cleanup_exit);
+ GF_OPTION_INIT("fuse-dev-eperm-ratelimit-ns",
+ priv->fuse_dev_eperm_ratelimit_ns, uint32, cleanup_exit);
+
/* user has set only background-qlen, not congestion-threshold,
use the fuse kernel driver formula to set congestion. ie, 75% */
if (dict_get(this_xl->options, "background-qlen") &&
@@ -7009,12 +7198,21 @@ struct volume_options options[] = {
{
.key = {"lru-limit"},
.type = GF_OPTION_TYPE_INT,
- .default_value = "131072",
+ .default_value = "65536",
.min = 0,
.description = "makes glusterfs invalidate kernel inodes after "
"reaching this limit (0 means 'unlimited')",
},
{
+ .key = {"invalidate-limit"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0",
+ .min = 0,
+ .description = "suspend invalidations as of 'lru-limit' if the number "
+ "of outstanding invalidations reaches this limit "
+ "(0 means 'unlimited')",
+ },
+ {
.key = {"auto-invalidation"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "true",
@@ -7023,6 +7221,15 @@ struct volume_options options[] = {
"if same files/directories are not accessed across "
"two different mounts concurrently",
},
+ {
+ .key = {"fuse-dev-eperm-ratelimit-ns"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "10000000", /* 0.01 sec */
+ .min = 0,
+ .max = 1000000000,
+ .description = "Rate limit reading from fuse device upon EPERM "
+ "failure.",
+ },
{.key = {NULL}},
};
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
index 697bd8848e1..4cb94c23cad 100644
--- a/xlators/mount/fuse/src/fuse-bridge.h
+++ b/xlators/mount/fuse/src/fuse-bridge.h
@@ -78,6 +78,20 @@ typedef struct fuse_in_header fuse_in_header_t;
typedef void(fuse_handler_t)(xlator_t *this, fuse_in_header_t *finh, void *msg,
struct iobuf *iobuf);
+enum fusedev_errno {
+ FUSEDEV_ENOENT,
+ FUSEDEV_ENOTDIR,
+ FUSEDEV_ENODEV,
+ FUSEDEV_EPERM,
+ FUSEDEV_ENOMEM,
+ FUSEDEV_ENOTCONN,
+ FUSEDEV_ECONNREFUSED,
+ FUSEDEV_EOVERFLOW,
+ FUSEDEV_EBUSY,
+ FUSEDEV_ENOTEMPTY,
+ FUSEDEV_EMAXPLUS
+};
+
struct fuse_private {
int fd;
uint32_t proto_minor;
@@ -139,7 +153,7 @@ struct fuse_private {
pthread_cond_t invalidate_cond;
pthread_mutex_t invalidate_mutex;
gf_boolean_t reverse_fuse_thread_started;
-
+ uint64_t invalidate_count;
/* For communicating with separate mount thread. */
int status_pipe[2];
@@ -189,19 +203,31 @@ struct fuse_private {
gf_boolean_t flush_handle_interrupt;
gf_boolean_t fuse_auto_inval;
- /* LRU Limit, if not set, default is 128k for now */
+ /* LRU Limit, if not set, default is 64k for now */
uint32_t lru_limit;
+ uint32_t invalidate_limit;
+ uint32_t fuse_dev_eperm_ratelimit_ns;
+
+ /* counters for fusdev errnos */
+ uint8_t fusedev_errno_cnt[FUSEDEV_EMAXPLUS];
+ pthread_mutex_t fusedev_errno_cnt_mutex;
};
typedef struct fuse_private fuse_private_t;
+typedef uint64_t errnomask_t[2];
+#define MASK_ERRNO(mask, n) ((mask)[(n) >> 6] |= ((uint64_t)1 << ((n)&63)))
+#define GET_ERRNO_MASK(mask, n) ((mask)[(n) >> 6] & ((uint64_t)1 << ((n)&63)))
+#define ERRNOMASK_MAX (64 * (sizeof(errnomask_t) / sizeof(uint64_t)))
+
#define INVAL_BUF_SIZE \
(sizeof(struct fuse_out_header) + \
max(sizeof(struct fuse_notify_inval_inode_out), \
sizeof(struct fuse_notify_inval_entry_out) + NAME_MAX + 1))
struct fuse_invalidate_node {
- char inval_buf[INVAL_BUF_SIZE];
+ errnomask_t errnomask;
struct list_head next;
+ char inval_buf[INVAL_BUF_SIZE];
};
typedef struct fuse_invalidate_node fuse_invalidate_node_t;
@@ -209,6 +235,7 @@ struct fuse_timed_message {
struct fuse_out_header fuse_out_header;
void *fuse_message_body;
struct timespec scheduled_ts;
+ errnomask_t errnomask;
struct list_head next;
};
typedef struct fuse_timed_message fuse_timed_message_t;
@@ -217,6 +244,7 @@ enum fuse_interrupt_state {
INTERRUPT_NONE,
INTERRUPT_SQUELCHED,
INTERRUPT_HANDLED,
+ INTERRUPT_WAITING_HANDLER,
};
typedef enum fuse_interrupt_state fuse_interrupt_state_t;
struct fuse_interrupt_record;
@@ -332,30 +360,6 @@ typedef struct fuse_graph_switch_args fuse_graph_switch_args_t;
return; \
} \
state->umask = fci->umask; \
- \
- /* TODO: remove this after 3.4.0 release. keeping it for the \
- sake of backward compatibility with old (3.3.[01]) \
- releases till then. */ \
- ret = dict_set_int16(state->xdata, "umask", fci->umask); \
- if (ret < 0) { \
- gf_log("glusterfs-fuse", GF_LOG_WARNING, \
- "%s Failed adding umask" \
- " to request", \
- op); \
- send_fuse_err(this, finh, ENOMEM); \
- free_fuse_state(state); \
- return; \
- } \
- ret = dict_set_int16(state->xdata, "mode", fci->mode); \
- if (ret < 0) { \
- gf_log("glusterfs-fuse", GF_LOG_WARNING, \
- "%s Failed adding mode " \
- "to request", \
- op); \
- send_fuse_err(this, finh, ENOMEM); \
- free_fuse_state(state); \
- return; \
- } \
} \
} while (0)
diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c
index 5bfc40c9fcb..a2b0ad11fe4 100644
--- a/xlators/mount/fuse/src/fuse-helpers.c
+++ b/xlators/mount/fuse/src/fuse-helpers.c
@@ -139,8 +139,6 @@ get_fuse_state(xlator_t *this, fuse_in_header_t *finh)
return state;
}
-#define FUSE_MAX_AUX_GROUPS \
- 32 /* We can get only up to 32 aux groups from /proc */
void
frame_fill_groups(call_frame_t *frame)
{
@@ -150,8 +148,6 @@ frame_fill_groups(call_frame_t *frame)
char filename[32];
char line[4096];
char *ptr = NULL;
- FILE *fp = NULL;
- int idx = 0;
long int id = 0;
char *saveptr = NULL;
char *endptr = NULL;
@@ -191,45 +187,72 @@ frame_fill_groups(call_frame_t *frame)
call_stack_set_groups(frame->root, ngroups, &mygroups);
} else {
+ FILE *fp = NULL;
+
ret = snprintf(filename, sizeof filename, "/proc/%d/status",
frame->root->pid);
- if (ret >= sizeof filename)
+ if (ret >= sizeof filename) {
+ gf_log(this->name, GF_LOG_ERROR, "procfs path exceeds buffer size");
goto out;
+ }
fp = fopen(filename, "r");
- if (!fp)
+ if (!fp) {
+ gf_log(this->name, GF_LOG_ERROR, "failed to open %s: %s", filename,
+ strerror(errno));
goto out;
+ }
- if (call_stack_alloc_groups(frame->root, ngroups) != 0)
- goto out;
+ for (;;) {
+ gf_boolean_t found_groups = _gf_false;
+ int idx = 0;
- while ((ptr = fgets(line, sizeof line, fp))) {
- if (strncmp(ptr, "Groups:", 7) != 0)
- continue;
+ if (call_stack_alloc_groups(frame->root, ngroups) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to allocate gid buffer");
+ goto out;
+ }
+ while ((ptr = fgets(line, sizeof line, fp))) {
+ if (strncmp(ptr, "Groups:", 7) == 0) {
+ found_groups = _gf_true;
+ break;
+ }
+ }
+ if (!found_groups) {
+ gf_log(this->name, GF_LOG_ERROR, "cannot find gid list in %s",
+ filename);
+ break;
+ }
ptr = line + 8;
for (ptr = strtok_r(ptr, " \t\r\n", &saveptr); ptr;
ptr = strtok_r(NULL, " \t\r\n", &saveptr)) {
errno = 0;
id = strtol(ptr, &endptr, 0);
- if (errno == ERANGE)
- break;
- if (!endptr || *endptr)
+ if (errno == ERANGE || !endptr || *endptr) {
+ gf_log(this->name, GF_LOG_ERROR, "failed to parse %s",
+ filename);
break;
- frame->root->groups[idx++] = id;
- if (idx == FUSE_MAX_AUX_GROUPS)
+ }
+ if (idx < call_stack_groups_capacity(frame->root))
+ frame->root->groups[idx] = id;
+ idx++;
+ if (idx == GF_MAX_AUX_GROUPS)
break;
}
-
- frame->root->ngrps = idx;
- break;
+ if (idx > call_stack_groups_capacity(frame->root)) {
+ ngroups = idx;
+ rewind(fp);
+ } else {
+ frame->root->ngrps = idx;
+ break;
+ }
}
+ out:
+ if (fp)
+ fclose(fp);
}
-
-out:
- if (fp)
- fclose(fp);
#elif defined(GF_SOLARIS_HOST_OS)
char filename[32];
char scratch[128];
@@ -245,7 +268,7 @@ out:
fp = fopen(filename, "r");
if (fp != NULL) {
if (fgets(scratch, sizeof scratch, fp) != NULL) {
- ngrps = MIN(prcred->pr_ngroups, FUSE_MAX_AUX_GROUPS);
+ ngrps = MIN(prcred->pr_ngroups, GF_MAX_AUX_GROUPS);
if (call_stack_alloc_groups(frame->root, ngrps) != 0) {
fclose(fp);
return;
@@ -309,8 +332,10 @@ get_groups(fuse_private_t *priv, call_frame_t *frame)
gl = gid_cache_lookup(&priv->gid_cache, frame->root->pid, frame->root->uid,
frame->root->gid);
if (gl) {
- if (call_stack_alloc_groups(frame->root, gl->gl_count) != 0)
+ if (call_stack_alloc_groups(frame->root, gl->gl_count) != 0) {
+ gid_cache_release(&priv->gid_cache, gl);
return;
+ }
frame->root->ngrps = gl->gl_count;
for (i = 0; i < gl->gl_count; i++)
frame->root->groups[i] = gl->gl_list[i];
@@ -581,30 +606,17 @@ fuse_flip_xattr_ns(fuse_private_t *priv, char *okey, char **nkey)
int ret = 0;
gf_boolean_t need_flip = _gf_false;
- switch (priv->client_pid) {
- case GF_CLIENT_PID_GSYNCD:
- /* valid xattr(s): *xtime, volume-mark* */
- gf_log("glusterfs-fuse", GF_LOG_DEBUG,
- "PID: %d, checking xattr(s): "
- "volume-mark*, *xtime",
- priv->client_pid);
- if ((strcmp(okey, UNPRIV_XA_NS ".glusterfs.volume-mark") == 0) ||
- (fnmatch(UNPRIV_XA_NS ".glusterfs.volume-mark.*", okey,
- FNM_PERIOD) == 0) ||
- (fnmatch(UNPRIV_XA_NS ".glusterfs.*.xtime", okey, FNM_PERIOD) ==
- 0))
- need_flip = _gf_true;
- break;
-
- case GF_CLIENT_PID_HADOOP:
- /* valid xattr(s): pathinfo */
- gf_log("glusterfs-fuse", GF_LOG_DEBUG,
- "PID: %d, checking xattr(s): "
- "pathinfo",
- priv->client_pid);
- if (strcmp(okey, UNPRIV_XA_NS ".glusterfs.pathinfo") == 0)
- need_flip = _gf_true;
- break;
+ if (GF_CLIENT_PID_GSYNCD == priv->client_pid) {
+ /* valid xattr(s): *xtime, volume-mark* */
+ gf_log("glusterfs-fuse", GF_LOG_DEBUG,
+ "PID: %d, checking xattr(s): "
+ "volume-mark*, *xtime",
+ priv->client_pid);
+ if ((strcmp(okey, UNPRIV_XA_NS ".glusterfs.volume-mark") == 0) ||
+ (fnmatch(UNPRIV_XA_NS ".glusterfs.volume-mark.*", okey,
+ FNM_PERIOD) == 0) ||
+ (fnmatch(UNPRIV_XA_NS ".glusterfs.*.xtime", okey, FNM_PERIOD) == 0))
+ need_flip = _gf_true;
}
if (need_flip) {