diff options
Diffstat (limited to 'xlators/mount/fuse')
| -rw-r--r-- | xlators/mount/fuse/src/Makefile.am | 29 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.c | 9436 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.h | 734 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-helpers.c | 969 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-mem-types.h | 43 | ||||
| -rw-r--r-- | xlators/mount/fuse/src/fuse-resolve.c | 980 | ||||
| -rw-r--r-- | xlators/mount/fuse/utils/Makefile.am | 9 | ||||
| -rwxr-xr-x | xlators/mount/fuse/utils/mount.glusterfs.in | 935 | ||||
| -rwxr-xr-x | xlators/mount/fuse/utils/mount_glusterfs.in | 617 |
9 files changed, 9088 insertions, 4664 deletions
diff --git a/xlators/mount/fuse/src/Makefile.am b/xlators/mount/fuse/src/Makefile.am index 7bb3931ec51..7018cad37f6 100644 --- a/xlators/mount/fuse/src/Makefile.am +++ b/xlators/mount/fuse/src/Makefile.am @@ -1,26 +1,39 @@ -noinst_HEADERS = $(CONTRIBDIR)/fuse-include/fuse_kernel.h\ - $(CONTRIBDIR)/fuse-include/fuse-mount.h\ +noinst_HEADERS_linux = $(CONTRIBDIR)/fuse-include/fuse_kernel.h\ + $(CONTRIBDIR)/fuse-include/mount_util.h\ + $(CONTRIBDIR)/fuse-lib/mount-gluster-compat.h +noinst_HEADERS_darwin = $(CONTRIBDIR)/fuse-include/fuse_kernel_macfuse.h\ + $(CONTRIBDIR)/macfuse/fuse_param.h\ + $(CONTRIBDIR)/macfuse/fuse_ioctl.h +noinst_HEADERS_common = $(CONTRIBDIR)/fuse-include/fuse-mount.h\ $(CONTRIBDIR)/fuse-include/fuse-misc.h fuse-mem-types.h \ fuse-bridge.h +if GF_DARWIN_HOST_OS + noinst_HEADERS = $(noinst_HEADERS_common) $(noinst_HEADERS_darwin) +else + noinst_HEADERS = $(noinst_HEADERS_common) $(noinst_HEADERS_linux) +endif + xlator_LTLIBRARIES = fuse.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mount if GF_DARWIN_HOST_OS mount_source=$(CONTRIBDIR)/macfuse/mount_darwin.c else - mount_source=$(CONTRIBDIR)/fuse-lib/mount.c + mount_source=$(CONTRIBDIR)/fuse-lib/mount.c $(CONTRIBDIR)/fuse-lib/mount-common.c endif fuse_la_SOURCES = fuse-helpers.c fuse-resolve.c fuse-bridge.c \ $(CONTRIBDIR)/fuse-lib/misc.c $(mount_source) -fuse_la_LDFLAGS = -module -avoidversion -shared -nostartfiles -fuse_la_LIBADD = @GF_FUSE_LDADD@ +fuse_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) +fuse_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GF_LDADD) @GF_FUSE_LDADD@ -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) -Wall \ - -I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/fuse-include \ - $(GF_CFLAGS) $(GF_FUSE_CFLAGS) +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(CONTRIBDIR)/fuse-include \ + -I$(CONTRIBDIR)/fuse-lib $(GF_FUSE_CFLAGS) +AM_CFLAGS = -Wall $(GF_CFLAGS) CLEANFILES = diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index b8f53a1bc3c..0e22fe411ee 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -1,4190 +1,7248 @@ /* - Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ +#include <config.h> + +#include <sys/wait.h> #include "fuse-bridge.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/compat-errno.h> +#include <glusterfs/glusterfs-acl.h> +#include <glusterfs/syscall.h> +#include <glusterfs/timespec.h> +#include <glusterfs/async.h> + +#ifdef __NetBSD__ +#undef open /* in perfuse.h, pulled from mount-gluster-compat.h */ +#endif +typedef struct _fuse_async { + struct iobuf *iobuf; + fuse_in_header_t *finh; + void *msg; + gf_async_t async; +} fuse_async_t; -static int gf_fuse_conn_err_log; static int gf_fuse_xattr_enotsup_log; -fuse_fd_ctx_t * -__fuse_fd_ctx_check_n_create (fd_t *fd, xlator_t *this) +void +fini(xlator_t *this_xl); + +static int32_t +fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino); + +/* + * Send an invalidate notification up to fuse to purge the file from local + * page cache. + */ + +static int32_t +fuse_invalidate(xlator_t *this, inode_t *inode) { - uint64_t val = 0; - int32_t ret = 0; - fuse_fd_ctx_t *fd_ctx = NULL; + fuse_private_t *priv = this->private; + uint64_t nodeid; + + /* + * NOTE: We only invalidate at the moment if fopen_keep_cache is + * enabled because otherwise this is a departure from default + * behavior. Specifically, the performance/write-behind xlator + * causes unconditional invalidations on write requests. + */ + if (!priv->fopen_keep_cache) + return 0; - ret = __fd_ctx_get (fd, this, &val); + nodeid = inode_to_fuse_nodeid(inode); + gf_log(this->name, GF_LOG_DEBUG, "Invalidate inode id %" GF_PRI_INODE ".", + nodeid); + fuse_log_eh(this, "Sending invalidate inode id: %" GF_PRI_INODE " gfid: %s", + nodeid, uuid_utoa(inode->gfid)); + fuse_invalidate_inode(this, nodeid); - fd_ctx = (fuse_fd_ctx_t *)(unsigned long) val; + return 0; +} - if (fd_ctx == NULL) { - fd_ctx = GF_CALLOC (1, sizeof (*fd_ctx), - gf_fuse_mt_fd_ctx_t); +static int32_t +fuse_forget_cbk(xlator_t *this, inode_t *inode) +{ + // Nothing to free in inode ctx, hence return. + return 0; +} - ret = __fd_ctx_set (fd, this, - (uint64_t)(unsigned long)fd_ctx); - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "fd-ctx-set failed"); - GF_FREE (fd_ctx); - fd_ctx = NULL; - } +fuse_fd_ctx_t * +__fuse_fd_ctx_check_n_create(xlator_t *this, fd_t *fd) +{ + uint64_t val = 0; + int32_t ret = 0; + fuse_fd_ctx_t *fd_ctx = NULL; + + ret = __fd_ctx_get(fd, this, &val); + + fd_ctx = (fuse_fd_ctx_t *)(unsigned long)val; + + if (fd_ctx == NULL) { + fd_ctx = GF_CALLOC(1, sizeof(*fd_ctx), gf_fuse_mt_fd_ctx_t); + if (!fd_ctx) { + goto out; + } + ret = __fd_ctx_set(fd, this, (uint64_t)(unsigned long)fd_ctx); + if (ret < 0) { + gf_log("glusterfs-fuse", GF_LOG_DEBUG, "fd-ctx-set failed"); + GF_FREE(fd_ctx); + fd_ctx = NULL; } + } +out: + return fd_ctx; +} + +fuse_fd_ctx_t * +fuse_fd_ctx_check_n_create(xlator_t *this, fd_t *fd) +{ + fuse_fd_ctx_t *fd_ctx = NULL; - return fd_ctx; + if ((fd == NULL) || (this == NULL)) { + goto out; + } + + LOCK(&fd->lock); + { + fd_ctx = __fuse_fd_ctx_check_n_create(this, fd); + } + UNLOCK(&fd->lock); + +out: + return fd_ctx; +} + +static void +fuse_fd_ctx_destroy(xlator_t *this, fd_t *fd) +{ + fd_t *activefd = NULL; + uint64_t val = 0; + int ret = 0; + fuse_fd_ctx_t *fdctx = NULL; + + ret = fd_ctx_del(fd, this, &val); + if (!ret) { + fdctx = (fuse_fd_ctx_t *)(unsigned long)val; + if (fdctx) { + activefd = fdctx->activefd; + if (activefd) { + fd_unref(activefd); + } + + GF_FREE(fdctx); + } + } } fuse_fd_ctx_t * -fuse_fd_ctx_check_n_create (fd_t *fd, xlator_t *this) +fuse_fd_ctx_get(xlator_t *this, fd_t *fd) { - fuse_fd_ctx_t *fd_ctx = NULL; + fuse_fd_ctx_t *fdctx = NULL; + uint64_t value = 0; + int ret = 0; - if ((fd == NULL) || (this == NULL)) { - goto out; - } + ret = fd_ctx_get(fd, this, &value); + if (ret < 0) { + goto out; + } - LOCK (&fd->lock); - { - fd_ctx = __fuse_fd_ctx_check_n_create (fd, this); - } - UNLOCK (&fd->lock); + fdctx = (fuse_fd_ctx_t *)(unsigned long)value; out: - return fd_ctx; + return fdctx; } +struct fusedump_timespec { + uint32_t len; + uint64_t sec; + uint32_t nsec; +} __attribute__((packed)); + +struct fusedump_signature { + uint32_t len; + char sig[8]; +} __attribute__((packed)); + +static void +fusedump_gettime(struct fusedump_timespec *fts) +{ + struct timespec ts = { + 0, + }; + + timespec_now_realtime(&ts); + + fts->sec = ts.tv_sec; + fts->nsec = ts.tv_nsec; +} + +static void +fusedump_setup_meta(struct iovec *iovs, char *dir, + uint32_t *fusedump_item_count, + struct fusedump_timespec *fts, + struct fusedump_signature *fsig) +{ + char glustersig[8] = {'G', 'L', 'U', 'S', 'T', 'E', 'R', 0xF5}; + + *fusedump_item_count = 3; + fts->len = sizeof(*fts); + fusedump_gettime(fts); + fsig->len = sizeof(*fsig); + memcpy(fsig->sig, glustersig, 8); + + iovs[0] = (struct iovec){dir, sizeof(*dir)}; + iovs[1] = (struct iovec){fusedump_item_count, sizeof(*fusedump_item_count)}; + iovs[2] = (struct iovec){fts, fts->len}; + iovs[3] = (struct iovec){fsig, fsig->len}; +} + +static int +check_and_dump_fuse_W(fuse_private_t *priv, struct iovec *iov_out, int count, + ssize_t res, errnomask_t errnomask) +{ + char w = 'W'; + struct iovec diov[4] = { + { + 0, + }, + }; + uint32_t fusedump_item_count = 3; + struct fusedump_timespec fts = { + 0, + }; + struct fusedump_signature fsig = { + 0, + }; + struct fuse_out_header *fouh = NULL; + + if (res == -1) { + const char *errdesc = NULL; + gf_loglevel_t loglevel = GF_LOG_ERROR; + gf_boolean_t errno_degraded = _gf_false; + gf_boolean_t errno_promoted = _gf_false; + +#define ACCOUNT_ERRNO(eno) \ + do { \ + if (errno_degraded) { \ + pthread_mutex_lock(&priv->fusedev_errno_cnt_mutex); \ + { \ + if (!++priv->fusedev_errno_cnt[FUSEDEV_##eno]) \ + errno_promoted = _gf_true; \ + } \ + pthread_mutex_unlock(&priv->fusedev_errno_cnt_mutex); \ + } \ + } while (0) + + /* If caller masked the errno, then it + * does not indicate an error at the application + * level, so we degrade the log severity to DEBUG. + */ + if (errnomask && errno < ERRNOMASK_MAX && + GET_ERRNO_MASK(errnomask, errno)) { + loglevel = GF_LOG_DEBUG; + errno_degraded = _gf_true; + } + + switch (errno) { + /* The listed errnos are FUSE status indicators, + * not legit values according to POSIX (see write(3p)), + * so resolving them according to the standard + * POSIX interpretation would be misleading. + */ + case ENOENT: + errdesc = "ENOENT"; + ACCOUNT_ERRNO(ENOENT); + break; + case ENOTDIR: + errdesc = "ENOTDIR"; + ACCOUNT_ERRNO(ENOTDIR); + break; + case ENODEV: + errdesc = "ENODEV"; + ACCOUNT_ERRNO(ENODEV); + break; + case EPERM: + errdesc = "EPERM"; + ACCOUNT_ERRNO(EPERM); + break; + case ENOMEM: + errdesc = "ENOMEM"; + ACCOUNT_ERRNO(ENOMEM); + break; + case ENOTCONN: + errdesc = "ENOTCONN"; + ACCOUNT_ERRNO(ENOTCONN); + break; + case ECONNREFUSED: + errdesc = "ECONNREFUSED"; + ACCOUNT_ERRNO(ECONNREFUSED); + break; + case EOVERFLOW: + errdesc = "EOVERFLOW"; + ACCOUNT_ERRNO(EOVERFLOW); + break; + case EBUSY: + errdesc = "EBUSY"; + ACCOUNT_ERRNO(EBUSY); + break; + case ENOTEMPTY: + errdesc = "ENOTEMPTY"; + ACCOUNT_ERRNO(ENOTEMPTY); + break; + default: + errdesc = strerror(errno); + } + + gf_log_callingfn("glusterfs-fuse", loglevel, + "writing to fuse device failed: %s", errdesc); + if (errno_promoted) + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "writing to fuse device yielded %s %d times", errdesc, + UINT8_MAX + 1); + return errno; + +#undef ACCOUNT_ERRNO + } + + fouh = iov_out[0].iov_base; + if (res != fouh->len) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "inconsistent write to fuse device: " + "written %zd, expectd %d", + res, fouh->len); + return EINVAL; + } + + if (priv->fuse_dump_fd == -1) + return 0; + + fusedump_setup_meta(diov, &w, &fusedump_item_count, &fts, &fsig); + + pthread_mutex_lock(&priv->fuse_dump_mutex); + res = sys_writev(priv->fuse_dump_fd, diov, sizeof(diov) / sizeof(diov[0])); + if (res != -1) + res = sys_writev(priv->fuse_dump_fd, iov_out, count); + pthread_mutex_unlock(&priv->fuse_dump_mutex); + + if (res == -1) + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "failed to dump fuse message (W): %s", strerror(errno)); + + /* + * Return value reflects check on write to /dev/fuse, + * so ignore issues with dumping. + */ + + return 0; +} /* * iov_out should contain a fuse_out_header at zeroth position. * The error value of this header is sent to kernel. */ static int -send_fuse_iov (xlator_t *this, fuse_in_header_t *finh, struct iovec *iov_out, - int count) +send_fuse_iov(xlator_t *this, fuse_in_header_t *finh, struct iovec *iov_out, + int count) +{ + fuse_private_t *priv = NULL; + struct fuse_out_header *fouh = NULL; + int res, i; + + if (!this || !finh || !iov_out) { + gf_log("send_fuse_iov", GF_LOG_ERROR, "Invalid arguments"); + return EINVAL; + } + priv = this->private; + + fouh = iov_out[0].iov_base; + iov_out[0].iov_len = sizeof(*fouh); + fouh->len = 0; + for (i = 0; i < count; i++) + fouh->len += iov_out[i].iov_len; + fouh->unique = finh->unique; + + res = sys_writev(priv->fd, iov_out, count); + gf_log("glusterfs-fuse", GF_LOG_TRACE, "writev() result %d/%d %s", res, + fouh->len, res == -1 ? strerror(errno) : ""); + + return check_and_dump_fuse_W(priv, iov_out, count, res, NULL); +} + +static int +send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size) +{ + struct fuse_out_header fouh = { + 0, + }; + struct iovec iov_out[2]; + int ret = 0; + + fouh.error = 0; + iov_out[0].iov_base = &fouh; + iov_out[1].iov_base = data; + iov_out[1].iov_len = size; + + ret = send_fuse_iov(this, finh, iov_out, 2); + if (ret != 0) + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "send_fuse_iov() " + "failed: %s", + strerror(ret)); + + return ret; +} + +#define send_fuse_obj(this, finh, obj) \ + send_fuse_data(this, finh, obj, sizeof(*(obj))) + +static int32_t +fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) { - fuse_private_t *priv = NULL; - struct fuse_out_header *fouh = NULL; - int res, i; +#if (FUSE_KERNEL_MINOR_VERSION >= 11 && defined(HAVE_FUSE_NOTIFICATIONS)) + struct fuse_out_header *fouh = NULL; + struct fuse_notify_inval_entry_out *fnieo = NULL; + fuse_private_t *priv = NULL; + dentry_t *dentry = NULL; + dentry_t *tmp = NULL; + inode_t *inode = NULL; + size_t nlen = 0; + fuse_invalidate_node_t *node = NULL; + char gfid_str[UUID_CANONICAL_FORM_LEN + 1]; + + priv = this->private; + if (!priv->reverse_fuse_thread_started) + return -1; + + if (priv->invalidate_limit && + (priv->invalidate_count >= priv->invalidate_limit)) { + return -1; + } + + inode = (inode_t *)(unsigned long)fuse_ino; + if (inode == NULL) + return -1; + + list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list) + { + node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t); + if (node == NULL) + return -1; + + INIT_LIST_HEAD(&node->next); + + fouh = (struct fuse_out_header *)node->inval_buf; + fnieo = (struct fuse_notify_inval_entry_out *)(fouh + 1); + + fouh->unique = 0; + fouh->error = FUSE_NOTIFY_INVAL_ENTRY; - if (!this || !finh || !iov_out) { - gf_log ("send_fuse_iov", GF_LOG_ERROR,"Invalid arguments"); - return -1; + if (ENOENT < ERRNOMASK_MAX) + MASK_ERRNO(node->errnomask, ENOENT); + if (ENOTDIR < ERRNOMASK_MAX) + MASK_ERRNO(node->errnomask, ENOTDIR); + if (EBUSY < ERRNOMASK_MAX) + MASK_ERRNO(node->errnomask, EBUSY); + if (ENOTEMPTY < ERRNOMASK_MAX) + MASK_ERRNO(node->errnomask, ENOTEMPTY); + + if (dentry->name) { + nlen = strlen(dentry->name); + fouh->len = sizeof(*fouh) + sizeof(*fnieo) + nlen + 1; + fnieo->parent = inode_to_fuse_nodeid(dentry->parent); + + fnieo->namelen = nlen; + strcpy((node->inval_buf + sizeof(*fouh) + sizeof(*fnieo)), + dentry->name); + } + + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "INVALIDATE entry: %" PRIu64 "/%s (gfid:%s)", fnieo->parent, + dentry->name, uuid_utoa(inode->gfid)); + + if (dentry->parent) { + fuse_log_eh(this, "Invalidated entry %s (parent: %s) gfid:%s", + dentry->name, uuid_utoa(dentry->parent->gfid), + uuid_utoa_r(inode->gfid, gfid_str)); + } else { + fuse_log_eh(this, + "Invalidated entry %s(nodeid: %" PRIu64 ") gfid:%s", + dentry->name, fnieo->parent, uuid_utoa(inode->gfid)); } - priv = this->private; - - fouh = iov_out[0].iov_base; - iov_out[0].iov_len = sizeof (*fouh); - fouh->len = 0; - for (i = 0; i < count; i++) - fouh->len += iov_out[i].iov_len; - fouh->unique = finh->unique; - - res = writev (priv->fd, iov_out, count); - - if (res == -1) - return errno; - if (res != fouh->len) - return EINVAL; - - if (priv->fuse_dump_fd != -1) { - char w = 'W'; - - pthread_mutex_lock (&priv->fuse_dump_mutex); - res = write (priv->fuse_dump_fd, &w, 1); - if (res != -1) - res = writev (priv->fuse_dump_fd, iov_out, count); - pthread_mutex_unlock (&priv->fuse_dump_mutex); - - if (res == -1) - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "failed to dump fuse message (W): %s", - strerror (errno)); + + pthread_mutex_lock(&priv->invalidate_mutex); + { + list_add_tail(&node->next, &priv->invalidate_list); + priv->invalidate_count++; + pthread_cond_signal(&priv->invalidate_cond); } + pthread_mutex_unlock(&priv->invalidate_mutex); + } - return 0; +#endif + return 0; } -static int -send_fuse_data (xlator_t *this, fuse_in_header_t *finh, void *data, size_t size) +/* + * Send an inval inode notification to fuse. This causes an invalidation of the + * entire page cache mapping on the inode. + */ +static int32_t +fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) { - struct fuse_out_header fouh = {0, }; - struct iovec iov_out[2]; +#if (FUSE_KERNEL_MINOR_VERSION >= 11 && defined(HAVE_FUSE_NOTIFICATIONS)) + struct fuse_out_header *fouh = NULL; + struct fuse_notify_inval_inode_out *fniio = NULL; + fuse_private_t *priv = NULL; + fuse_invalidate_node_t *node = NULL; + inode_t *inode = NULL; - fouh.error = 0; - iov_out[0].iov_base = &fouh; - iov_out[1].iov_base = data; - iov_out[1].iov_len = size; + priv = this->private; - return send_fuse_iov (this, finh, iov_out, 2); + if (!priv->reverse_fuse_thread_started) + return -1; + + if (priv->invalidate_limit && + (priv->invalidate_count >= priv->invalidate_limit)) { + return -1; + } + + inode = (inode_t *)(unsigned long)fuse_ino; + if (inode == NULL) + return -1; + + node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t); + if (node == NULL) + return -1; + + INIT_LIST_HEAD(&node->next); + + fouh = (struct fuse_out_header *)node->inval_buf; + fniio = (struct fuse_notify_inval_inode_out *)(fouh + 1); + + fouh->unique = 0; + fouh->error = FUSE_NOTIFY_INVAL_INODE; + fouh->len = sizeof(struct fuse_out_header) + + sizeof(struct fuse_notify_inval_inode_out); + + /* inval the entire mapping until we learn how to be more granular */ + fniio->ino = fuse_ino; + fniio->off = 0; + fniio->len = -1; + + if (ENOENT < ERRNOMASK_MAX) + MASK_ERRNO(node->errnomask, ENOENT); + + fuse_log_eh(this, "Invalidated inode %" PRIu64 " (gfid: %s)", fuse_ino, + uuid_utoa(inode->gfid)); + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "INVALIDATE inode: %" PRIu64 "(gfid:%s)", fuse_ino, + uuid_utoa(inode->gfid)); + + pthread_mutex_lock(&priv->invalidate_mutex); + { + list_add_tail(&node->next, &priv->invalidate_list); + priv->invalidate_count++; + pthread_cond_signal(&priv->invalidate_cond); + } + pthread_mutex_unlock(&priv->invalidate_mutex); + +#else + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "fuse_invalidate_inode not implemented on this system"); +#endif + return 0; } -#define send_fuse_obj(this, finh, obj) \ - send_fuse_data (this, finh, obj, sizeof (*(obj))) +#if FUSE_KERNEL_MINOR_VERSION >= 11 +/* Need this function for the signature (inode_t *, instead of uint64_t) */ +static int32_t +fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode) +{ + int32_t ret = 0; + ret = fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode); + return ret; +} +#endif +static fuse_timed_message_t * +fuse_timed_message_new(void) +{ + fuse_timed_message_t *dmsg = NULL; + + dmsg = GF_MALLOC(sizeof(*dmsg), gf_fuse_mt_timed_message_t); + if (!dmsg) { + return NULL; + } + + /* should be NULL if not set */ + dmsg->fuse_message_body = NULL; + INIT_LIST_HEAD(&dmsg->next); + memset(dmsg->errnomask, 0, sizeof(dmsg->errnomask)); + + return dmsg; +} static void -fuse_invalidate (xlator_t *this, uint64_t fuse_ino) +fuse_timed_message_free(fuse_timed_message_t *dmsg) { - struct fuse_out_header *fouh = NULL; - struct fuse_notify_inval_entry_out *fnieo = NULL; - fuse_private_t *priv = NULL; - dentry_t *dentry = NULL; - inode_t *inode = NULL; - size_t nlen = 0; - int rv = 0; + GF_FREE(dmsg->fuse_message_body); + GF_FREE(dmsg); +} - char inval_buf[INVAL_BUF_SIZE] = {0,}; +static void +send_fuse_timed(xlator_t *this, fuse_timed_message_t *dmsg) +{ + fuse_private_t *priv = NULL; - fouh = (struct fuse_out_header *)inval_buf; - fnieo = (struct fuse_notify_inval_entry_out *)(fouh + 1); + priv = this->private; - priv = this->private; - if (priv->revchan_out == -1) - return; + if (!priv->timed_response_fuse_thread_started) { + return; + } + + pthread_mutex_lock(&priv->timed_mutex); + { + list_add_tail(&dmsg->next, &priv->timed_list); + pthread_cond_signal(&priv->timed_cond); + } + pthread_mutex_unlock(&priv->timed_mutex); +} - fouh->unique = 0; - fouh->error = FUSE_NOTIFY_INVAL_ENTRY; +fuse_interrupt_record_t * +fuse_interrupt_record_new(fuse_in_header_t *finh, + fuse_interrupt_handler_t handler) +{ + fuse_interrupt_record_t *fir = NULL; - inode = fuse_ino_to_inode (fuse_ino, this); + fir = GF_MALLOC(sizeof(*fir), gf_fuse_mt_interrupt_record_t); + if (!fir) { + return NULL; + } - list_for_each_entry (dentry, &inode->dentry_list, inode_list) { - nlen = strlen (dentry->name); - fouh->len = sizeof (*fouh) + sizeof (*fnieo) + nlen + 1; - fnieo->parent = inode_to_fuse_nodeid (dentry->parent); + fir->hit = _gf_false; + fir->interrupt_state = INTERRUPT_NONE; + fir->data = NULL; - fnieo->namelen = nlen; - strcpy (inval_buf + sizeof (*fouh) + sizeof (*fnieo), dentry->name); + fir->interrupt_handler = handler; + memcpy(&fir->fuse_in_header, finh, sizeof(*finh)); + pthread_cond_init(&fir->handler_cond, NULL); + pthread_mutex_init(&fir->handler_mutex, NULL); + INIT_LIST_HEAD(&fir->next); - rv = write (priv->revchan_out, inval_buf, fouh->len); - if (rv != fouh->len) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "kernel notification daemon defunct"); + return fir; +} - close (priv->fd); - break; - } +static void +fuse_interrupt_record_free(fuse_interrupt_record_t *fir, void **datap) +{ + /* + * If caller wishes, we give back the private data to let them deal with it + * however they want; otherwise we take care of freeing it. + */ + if (datap) { + *datap = fir->data; + } else { + GF_FREE(fir->data); + } + + GF_FREE(fir); +} - gf_log ("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE entry: " - "%"PRIu64"/%s", fnieo->parent, dentry->name); - } +void +fuse_interrupt_record_insert(xlator_t *this, fuse_interrupt_record_t *fir) +{ + fuse_private_t *priv = NULL; + + priv = this->private; + pthread_mutex_lock(&priv->interrupt_mutex); + { + list_add_tail(&fir->next, &priv->interrupt_list); + } + pthread_mutex_unlock(&priv->interrupt_mutex); } -int -send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error) +static fuse_interrupt_record_t * +fuse_interrupt_record_fetch(xlator_t *this, uint64_t unique, gf_boolean_t reap) { - struct fuse_out_header fouh = {0, }; - struct iovec iov_out; + fuse_interrupt_record_t *fir = NULL; + gf_boolean_t found = _gf_false; + fuse_private_t *priv = NULL; + + priv = this->private; + pthread_mutex_lock(&priv->interrupt_mutex); + { + list_for_each_entry(fir, &priv->interrupt_list, next) + { + if (fir->fuse_in_header.unique == unique) { + /* + * If we are to reap, we do it regardless the + * hit flag; otherwise we take the record only + * hasn't yet flagged hit. + */ + if (reap || !fir->hit) { + found = _gf_true; + } + /* + * If we are not reaping (coming from handler + * context), we set the hit flag. + */ + if (!reap) { + fir->hit = _gf_true; + } + break; + } + } + if (found && reap) { + list_del(&fir->next); + } + } + pthread_mutex_unlock(&priv->interrupt_mutex); - fouh.error = -error; - iov_out.iov_base = &fouh; + if (found) { + return fir; + } + return NULL; +} - return send_fuse_iov (this, finh, &iov_out, 1); +static fuse_interrupt_record_t * +fuse_interrupt_record_get(xlator_t *this, uint64_t unique) +{ + return fuse_interrupt_record_fetch(this, unique, _gf_false); } -static int -fuse_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf) -{ - fuse_state_t *state = NULL; - fuse_in_header_t *finh = NULL; - struct fuse_entry_out feo = {0, }; - fuse_private_t *priv = NULL; - inode_t *linked_inode = NULL; - - priv = this->private; - state = frame->root->state; - finh = state->finh; - - if (!op_ret && __is_root_gfid (state->loc.inode->gfid)) { - buf->ia_ino = 1; +static fuse_interrupt_record_t * +fuse_interrupt_record_reap(xlator_t *this, uint64_t unique) +{ + return fuse_interrupt_record_fetch(this, unique, _gf_true); +} + +static void +fuse_interrupt(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + struct fuse_interrupt_in *fii = msg; + fuse_interrupt_record_t *fir = NULL; + + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "unique %" PRIu64 " INTERRUPT for %" PRIu64, finh->unique, + fii->unique); + + fir = fuse_interrupt_record_get(this, fii->unique); + if (fir) { + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "unique %" PRIu64 " INTERRUPT for %" PRIu64 + ": handler triggered", + finh->unique, fii->unique); + + fir->interrupt_handler(this, fir); + } else { + fuse_timed_message_t *dmsg = NULL; + + /* + * No record found for this interrupt request. + * + * It's either because the handler for the interrupted message + * does not want to handle interrupt, or this interrupt + * message beat the interrupted which hasn't yet added a record + * to the interrupt queue. Either case we reply with error + * EAGAIN with some (0.01 sec) delay. That will have this + * interrupt request resent, unless the interrupted message + * has been already answered. + * + * So effectively we are looping in between kernel and + * userspace, which will be exited either when the interrupted + * message handler has added an interrupt record, or has + * replied to kernel. See + * + * https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/ + * linux.git/tree/Documentation/filesystems/fuse.txt?h=v4.18#n148 + */ + + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "unique %" PRIu64 " INTERRUPT for %" PRIu64 ": no handler found", + finh->unique, fii->unique); + + dmsg = fuse_timed_message_new(); + if (!dmsg) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "unique %" PRIu64 " INTERRUPT for %" PRIu64 + ":" + " failed to allocate timed message", + finh->unique, fii->unique); + + goto out; } - if (op_ret == 0) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": %s() %s => %"PRId64, - frame->root->unique, gf_fop_list[frame->root->op], - state->loc.path, buf->ia_ino); + dmsg->fuse_out_header.unique = finh->unique; + dmsg->fuse_out_header.len = sizeof(dmsg->fuse_out_header); + dmsg->fuse_out_header.error = -EAGAIN; + if (ENOENT < ERRNOMASK_MAX) + MASK_ERRNO(dmsg->errnomask, ENOENT); + timespec_now(&dmsg->scheduled_ts); + timespec_adjust_delta(&dmsg->scheduled_ts, + (struct timespec){0, 10000000}); - buf->ia_blksize = this->ctx->page_size; - gf_fuse_stat2attr (buf, &feo.attr); + send_fuse_timed(this, dmsg); + } - if (!buf->ia_ino) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": %s() %s returning inode 0", - frame->root->unique, - gf_fop_list[frame->root->op], state->loc.path); - } +out: + GF_FREE(finh); +} - linked_inode = inode_link (inode, state->loc.parent, - state->loc.name, buf); +/* + * Function to be called in fop cbk context (if the fop engages + * with interrupt handling). + */ +gf_boolean_t +fuse_interrupt_finish_fop(call_frame_t *frame, xlator_t *this, + gf_boolean_t sync, void **datap) +{ + fuse_interrupt_record_t *fir = NULL; + fuse_state_t *state = frame->root->state; + fuse_in_header_t *finh = state->finh; + gf_boolean_t hit = _gf_false; + gf_boolean_t handled = _gf_false; + fuse_interrupt_state_t intstat_orig = INTERRUPT_NONE; + + fir = fuse_interrupt_record_reap(this, finh->unique); + if (!fir) { + /* + * No interrupt record was inserted (however, caller would usually know + * about that and there is no point then in calling this function). + */ + return _gf_false; + } + + /* + * The interrupt handler (if finds the record) modifies fir->hit; however, + * that could have occurred only before fuse_interrupt_record_reap(), so + * we are safe here with a lock-free access. + */ + hit = fir->hit; + if (hit) { + pthread_mutex_lock(&fir->handler_mutex); + { + intstat_orig = fir->interrupt_state; + if (fir->interrupt_state == INTERRUPT_NONE) { + if (sync) { + fir->interrupt_state = INTERRUPT_WAITING_HANDLER; + while (fir->interrupt_state != INTERRUPT_SQUELCHED) { + pthread_cond_wait(&fir->handler_cond, + &fir->handler_mutex); + } + } else + fir->interrupt_state = INTERRUPT_SQUELCHED; + } + } + pthread_mutex_unlock(&fir->handler_mutex); + } + + GF_ASSERT(intstat_orig == INTERRUPT_NONE || + intstat_orig == INTERRUPT_HANDLED || + intstat_orig == INTERRUPT_SQUELCHED); + gf_log("glusterfs-fuse", GF_LOG_DEBUG, "intstat_orig=%d", intstat_orig); + + /* + * From this on fir can only be referred under the conditions that imply + * we are to free it (otherwise interrupt handler might have already freed + * it). + */ + + if (/* there was no interrupt */ + !hit || + /* lost the race against interrupt handler */ + intstat_orig != INTERRUPT_NONE || + /* we took cleaning up on us */ + sync) { + /* cleaning up */ + fuse_interrupt_record_free(fir, datap); + } else if (datap) { + *datap = NULL; + } + + handled = (intstat_orig == INTERRUPT_HANDLED); + if (handled) { + /* + * Fuse request was answered already from interrupt context, we can do + * away with the stack. + */ + free_fuse_state(state); + STACK_DESTROY(frame->root); + } + + /* + * Let caller know if they have to answer the fuse request. + */ + return handled; +} - if (linked_inode != inode) { - } +/* + * Function to be called in interrupt handler context. + */ +void +fuse_interrupt_finish_interrupt(xlator_t *this, fuse_interrupt_record_t *fir, + fuse_interrupt_state_t intstat, + gf_boolean_t sync, void **datap) +{ + fuse_in_header_t finh = { + 0, + }; + fuse_interrupt_state_t intstat_orig = INTERRUPT_NONE; + + GF_ASSERT(intstat == INTERRUPT_HANDLED || intstat == INTERRUPT_SQUELCHED); + + pthread_mutex_lock(&fir->handler_mutex); + { + intstat_orig = fir->interrupt_state; + switch (intstat_orig) { + case INTERRUPT_NONE: + fir->interrupt_state = intstat; + break; + case INTERRUPT_WAITING_HANDLER: + fir->interrupt_state = INTERRUPT_SQUELCHED; + pthread_cond_signal(&fir->handler_cond); + break; + default: + break; + } + finh = fir->fuse_in_header; + } + pthread_mutex_unlock(&fir->handler_mutex); + + GF_ASSERT(intstat_orig == INTERRUPT_NONE || + (sync && intstat_orig == INTERRUPT_WAITING_HANDLER) || + (!sync && intstat_orig == INTERRUPT_SQUELCHED)); + gf_log("glusterfs-fuse", GF_LOG_DEBUG, "intstat_orig=%d", intstat_orig); + + /* + * From this on fir can only be referred under the conditions that imply + * we are to free it (otherwise fop handler might have already freed it). + */ + + if (/* we won the race, response is up to us */ + intstat_orig == INTERRUPT_NONE && + /* interrupt handling was successful, let the kernel know */ + intstat == INTERRUPT_HANDLED) { + send_fuse_err(this, &finh, EINTR); + } + + if (/* lost the race ... */ + intstat_orig != INTERRUPT_NONE && + /* + * ... and there is no contract with fop handler that it does the + * cleanup ... + */ + !sync) { + /* ... so we do! */ + fuse_interrupt_record_free(fir, datap); + } else if (datap) { + *datap = NULL; + } +} - inode_lookup (linked_inode); +int +send_fuse_err(xlator_t *this, fuse_in_header_t *finh, int error) +{ + struct fuse_out_header fouh = { + 0, + }; + struct iovec iov_out; + inode_t *inode = NULL; + + fouh.error = -error; + iov_out.iov_base = &fouh; + + inode = fuse_ino_to_inode(finh->nodeid, this); + + // filter out ENOENT + if (error != ENOENT) { + if (inode) { + fuse_log_eh(this, + "Sending %s for operation %d on " + "inode %s", + strerror(error), finh->opcode, uuid_utoa(inode->gfid)); + } else { + fuse_log_eh(this, + "Sending %s for operation %d on " + "inode %" GF_PRI_INODE, + strerror(error), finh->opcode, finh->nodeid); + } + } - feo.nodeid = inode_to_fuse_nodeid (linked_inode); + if (inode) + inode_unref(inode); - inode_unref (linked_inode); + return send_fuse_iov(this, finh, &iov_out, 1); +} - feo.entry_valid = - calc_timeout_sec (priv->entry_timeout); - feo.entry_valid_nsec = - calc_timeout_nsec (priv->entry_timeout); - feo.attr_valid = - calc_timeout_sec (priv->attribute_timeout); - feo.attr_valid_nsec = - calc_timeout_nsec (priv->attribute_timeout); +static int +fuse_entry_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *xdata) +{ + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + struct fuse_entry_out feo = { + 0, + }; + fuse_private_t *priv = NULL; + inode_t *linked_inode = NULL; + uint64_t ctx_value = LOOKUP_NOT_NEEDED; + + priv = this->private; + state = frame->root->state; + finh = state->finh; + + if (op_ret == 0) { + if (__is_root_gfid(state->loc.inode->gfid)) + buf->ia_ino = 1; + if (gf_uuid_is_null(buf->ia_gfid)) { + /* With a NULL gfid inode linking is + not possible. Let's not pretend this + call was a "success". + */ + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "Received NULL gfid for %s. Forcing EIO", state->loc.path); + op_ret = -1; + op_errno = EIO; + } + } + + /* log into the event-history after the null uuid check is done, since + * the op_ret and op_errno are being changed if the gfid is NULL. + */ + fuse_log_eh( + this, + "op_ret: %d op_errno: %d " + "%" PRIu64 ": %s() %s => %s", + op_ret, op_errno, frame->root->unique, gf_fop_list[frame->root->op], + state->loc.path, + (op_ret == 0) ? uuid_utoa(buf->ia_gfid) : uuid_utoa(state->loc.gfid)); + + if (op_ret == 0) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": %s() %s => %" PRIu64, frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, buf->ia_ino); + + buf->ia_blksize = this->ctx->page_size; + gf_fuse_stat2attr(buf, &feo.attr, priv->enable_ino32); + + if (!buf->ia_ino) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": %s() %s returning inode 0", + frame->root->unique, gf_fop_list[frame->root->op], + state->loc.path); + } + + linked_inode = inode_link(inode, state->loc.parent, state->loc.name, + buf); + + if (linked_inode == inode) { + inode_ctx_set(linked_inode, this, &ctx_value); + } + + inode_lookup(linked_inode); + + feo.nodeid = inode_to_fuse_nodeid(linked_inode); + + inode_unref(linked_inode); + + feo.entry_valid = calc_timeout_sec(priv->entry_timeout); + feo.entry_valid_nsec = calc_timeout_nsec(priv->entry_timeout); + feo.attr_valid = calc_timeout_sec(priv->attribute_timeout); + feo.attr_valid_nsec = calc_timeout_nsec(priv->attribute_timeout); #if FUSE_KERNEL_MINOR_VERSION >= 9 - priv->proto_minor >= 9 ? - send_fuse_obj (this, finh, &feo) : - send_fuse_data (this, finh, &feo, - FUSE_COMPAT_ENTRY_OUT_SIZE); + priv->proto_minor >= 9 + ? send_fuse_obj(this, finh, &feo) + : send_fuse_data(this, finh, &feo, FUSE_COMPAT_ENTRY_OUT_SIZE); #else - send_fuse_obj (this, finh, &feo); + send_fuse_obj(this, finh, &feo); #endif + } else { + gf_log("glusterfs-fuse", + (op_errno == ENOENT ? GF_LOG_TRACE : GF_LOG_WARNING), + "%" PRIu64 ": %s() %s => -1 (%s)", frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, + strerror(op_errno)); + + if ((op_errno == ENOENT) && (priv->negative_timeout != 0)) { + feo.entry_valid = calc_timeout_sec(priv->negative_timeout); + feo.entry_valid_nsec = calc_timeout_nsec(priv->negative_timeout); + send_fuse_obj(this, finh, &feo); } else { - gf_log ("glusterfs-fuse", - (op_errno == ENOENT ? GF_LOG_TRACE : GF_LOG_WARNING), - "%"PRIu64": %s() %s => -1 (%s)", frame->root->unique, - gf_fop_list[frame->root->op], state->loc.path, - strerror (op_errno)); - send_fuse_err (this, state->finh, op_errno); + send_fuse_err(this, state->finh, op_errno); } + } - free_fuse_state (state); - STACK_DESTROY (frame->root); - return 0; + free_fuse_state(state); + STACK_DESTROY(frame->root); + return 0; } - static int -fuse_newentry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) +fuse_newentry_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - fuse_entry_cbk (frame, cookie, this, op_ret, op_errno, inode, buf); - return 0; -} + /* facilitate retry of link from VFS */ + if (op_errno == ENOENT) + op_errno = ESTALE; + fuse_entry_cbk(frame, cookie, this, op_ret, op_errno, inode, buf, xdata); + return 0; +} static int -fuse_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *stat, dict_t *dict, - struct iatt *postparent) -{ - fuse_state_t *state = NULL; - call_frame_t *prev = NULL; - inode_table_t *itable = NULL; - - state = frame->root->state; - prev = cookie; - - if (op_ret == -1 && state->is_revalidate == 1) { - itable = state->loc.inode->table; - inode_unref (state->loc.inode); - state->loc.inode = inode_new (itable); - state->is_revalidate = 2; - if (uuid_is_null (state->gfid)) - uuid_generate (state->gfid); - fuse_gfid_set (state); - - STACK_WIND (frame, fuse_lookup_cbk, - prev->this, prev->this->fops->lookup, - &state->loc, state->dict); - return 0; - } +fuse_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stat, dict_t *dict, struct iatt *postparent) +{ + fuse_state_t *state = NULL; + call_frame_t *prev = NULL; + inode_table_t *itable = NULL; - fuse_entry_cbk (frame, cookie, this, op_ret, op_errno, inode, stat); + state = frame->root->state; + prev = cookie; + + if (op_ret == -1 && state->is_revalidate == 1) { + itable = state->itable; + /* + * A stale mapping might exist for a dentry/inode that has been + * removed from another client. + */ + if (op_errno == ENOENT) + inode_unlink(state->loc.inode, state->loc.parent, state->loc.name); + inode_unref(state->loc.inode); + state->loc.inode = inode_new(itable); + state->is_revalidate = 2; + if (gf_uuid_is_null(state->gfid)) + gf_uuid_generate(state->gfid); + fuse_gfid_set(state); + + STACK_WIND(frame, fuse_lookup_cbk, prev->this, prev->this->fops->lookup, + &state->loc, state->xdata); return 0; + } + + fuse_entry_cbk(frame, cookie, this, op_ret, op_errno, inode, stat, dict); + return 0; } void -fuse_lookup_resume (fuse_state_t *state) -{ - if (!state->loc.parent && !state->loc.inode) { - gf_log ("fuse", GF_LOG_ERROR, "failed to resolve path %s", - state->loc.path); - send_fuse_err (state->this, state->finh, ENOENT); - free_fuse_state (state); - return; - } +fuse_fop_resume(fuse_state_t *state) +{ + fuse_resume_fn_t fn = NULL; + + /* + * Fail fd resolution failures right away. + */ + if (state->resolve.fd && state->resolve.op_ret < 0) { + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } - if (state->loc.inode) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": LOOKUP %s(%s)", state->finh->unique, - state->loc.path, uuid_utoa (state->loc.inode->gfid)); - state->is_revalidate = 1; - } else { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": LOOKUP %s", state->finh->unique, - state->loc.path); - state->loc.inode = inode_new (state->loc.parent->table); - } + fn = state->resume_fn; + fn(state); +} - FUSE_FOP (state, fuse_lookup_cbk, GF_FOP_LOOKUP, - lookup, &state->loc, state->dict); +void +fuse_lookup_resume(fuse_state_t *state) +{ + if (!state->loc.parent && !state->loc.inode) { + gf_log("fuse", GF_LOG_ERROR, "failed to resolve path %s", + state->loc.path); + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } + + /* parent was resolved, entry could not, may be a missing gfid? + * Hence try to do a regular lookup + */ + if ((state->resolve.op_ret == -1) && (state->resolve.op_errno == ENODATA)) { + state->resolve.op_ret = 0; + } + + if (state->loc.inode) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": LOOKUP %s(%s)", + state->finh->unique, state->loc.path, + uuid_utoa(state->loc.inode->gfid)); + state->is_revalidate = 1; + } else { + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": LOOKUP %s", + state->finh->unique, state->loc.path); + state->loc.inode = inode_new(state->loc.parent->table); + if (gf_uuid_is_null(state->gfid)) + gf_uuid_generate(state->gfid); + fuse_gfid_set(state); + } + + FUSE_FOP(state, fuse_lookup_cbk, GF_FOP_LOOKUP, lookup, &state->loc, + state->xdata); } static void -fuse_lookup (xlator_t *this, fuse_in_header_t *finh, void *msg) -{ - char *name = msg; - fuse_state_t *state = NULL; - int32_t ret = -1; - - GET_STATE (this, finh, state); - - ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name); - if (!state->loc.parent || (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": LOOKUP %"PRIu64"/%s (fuse_loc_fill() failed)", - finh->unique, finh->nodeid, name); - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } +fuse_lookup(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + char *name = msg; + fuse_state_t *state = NULL; - if (state->loc.inode) { - uuid_copy (state->resolve.gfid, state->loc.inode->gfid); - } else { - uuid_generate (state->gfid); - } + GET_STATE(this, finh, state); - uuid_copy (state->resolve.pargfid, state->loc.parent->gfid); - state->resolve.bname = gf_strdup (name); - state->resolve.path = gf_strdup (state->loc.path); + (void)fuse_resolve_entry_init(state, &state->resolve, finh->nodeid, name); - fuse_resolve_and_resume (state, fuse_lookup_resume); + fuse_resolve_and_resume(state, fuse_lookup_resume); + return; } - static void -fuse_forget (xlator_t *this, fuse_in_header_t *finh, void *msg) - +do_forget(xlator_t *this, uint64_t unique, uint64_t nodeid, uint64_t nlookup) { - struct fuse_forget_in *ffi = msg; + inode_t *fuse_inode = fuse_ino_to_inode(nodeid, this); - inode_t *fuse_inode; + gf_log("fuse", GF_LOG_TRACE, + "%" PRIu64 ": FORGET %" PRIu64 "/%" PRIu64 " gfid: (%s)", unique, + nodeid, nlookup, uuid_utoa(fuse_inode->gfid)); - if (finh->nodeid == 1) { - GF_FREE (finh); - return; - } + fuse_log_eh(this, "%" PRIu64 ": FORGET %" PRIu64 "/%" PRIu64 " gfid: (%s)", + unique, nodeid, nlookup, uuid_utoa(fuse_inode->gfid)); + + inode_forget_with_unref(fuse_inode, nlookup); +} + +static void +fuse_forget(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": FORGET %"PRIu64"/%"PRIu64, - finh->unique, finh->nodeid, ffi->nlookup); +{ + struct fuse_forget_in *ffi = msg; - fuse_inode = fuse_ino_to_inode (finh->nodeid, this); + if (finh->nodeid == 1) { + GF_FREE(finh); + return; + } - inode_forget (fuse_inode, ffi->nlookup); - inode_unref (fuse_inode); + do_forget(this, finh->unique, finh->nodeid, ffi->nlookup); - GF_FREE (finh); + GF_FREE(finh); } +#if FUSE_KERNEL_MINOR_VERSION >= 16 +static void +fuse_batch_forget(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + struct fuse_batch_forget_in *fbfi = msg; + struct fuse_forget_one *ffo = (struct fuse_forget_one *)(fbfi + 1); + int i; + + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": BATCH_FORGET %" PRIu64 "/%" PRIu32, finh->unique, + finh->nodeid, fbfi->count); + + for (i = 0; i < fbfi->count; i++) { + if (ffo[i].nodeid == 1) + continue; + do_forget(this, finh->unique, ffo[i].nodeid, ffo[i].nlookup); + } + GF_FREE(finh); +} +#endif static int -fuse_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) +fuse_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - fuse_state_t *state; - fuse_in_header_t *finh; - fuse_private_t *priv = NULL; - struct fuse_attr_out fao; + fuse_state_t *state; + fuse_in_header_t *finh; + fuse_private_t *priv = NULL; + struct fuse_attr_out fao; - priv = this->private; - state = frame->root->state; - finh = state->finh; + priv = this->private; + state = frame->root->state; + finh = state->finh; - if (op_ret == 0) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": %s() %s => %"PRId64, frame->root->unique, - gf_fop_list[frame->root->op], - state->loc.path ? state->loc.path : "ERR", - prebuf->ia_ino); + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); - postbuf->ia_blksize = this->ctx->page_size; - gf_fuse_stat2attr (postbuf, &fao.attr); + if (op_ret == 0) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": %s() %s => %" PRIu64, frame->root->unique, + gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR", prebuf->ia_ino); - fao.attr_valid = calc_timeout_sec (priv->attribute_timeout); - fao.attr_valid_nsec = - calc_timeout_nsec (priv->attribute_timeout); + postbuf->ia_blksize = this->ctx->page_size; + gf_fuse_stat2attr(postbuf, &fao.attr, priv->enable_ino32); + + fao.attr_valid = calc_timeout_sec(priv->attribute_timeout); + fao.attr_valid_nsec = calc_timeout_nsec(priv->attribute_timeout); #if FUSE_KERNEL_MINOR_VERSION >= 9 - priv->proto_minor >= 9 ? - send_fuse_obj (this, finh, &fao) : - send_fuse_data (this, finh, &fao, - FUSE_COMPAT_ATTR_OUT_SIZE); + priv->proto_minor >= 9 + ? send_fuse_obj(this, finh, &fao) + : send_fuse_data(this, finh, &fao, FUSE_COMPAT_ATTR_OUT_SIZE); #else - send_fuse_obj (this, finh, &fao); + send_fuse_obj(this, finh, &fao); #endif - } else { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": %s() %s => -1 (%s)", frame->root->unique, - gf_fop_list[frame->root->op], - state->loc.path ? state->loc.path : "ERR", - strerror (op_errno)); + } else { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": %s() %s => -1 (%s)", frame->root->unique, + gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR", strerror(op_errno)); - send_fuse_err (this, finh, op_errno); - } + /* facilitate retry from VFS */ + if ((state->fd == NULL) && (op_errno == ENOENT)) + op_errno = ESTALE; - free_fuse_state (state); - STACK_DESTROY (frame->root); + send_fuse_err(this, finh, op_errno); + } - return 0; + free_fuse_state(state); + STACK_DESTROY(frame->root); + + return 0; } +static int +fuse_root_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stat, dict_t *dict, struct iatt *postparent); static int -fuse_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) +fuse_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *buf, dict_t *xdata) { - fuse_state_t *state; - fuse_in_header_t *finh; - fuse_private_t *priv = NULL; - struct fuse_attr_out fao; - - priv = this->private; - state = frame->root->state; - finh = state->finh; - - if (op_ret == 0) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": %s() %s => %"PRId64, frame->root->unique, - gf_fop_list[frame->root->op], - state->loc.path ? state->loc.path : "ERR", - buf->ia_ino); - - buf->ia_blksize = this->ctx->page_size; - gf_fuse_stat2attr (buf, &fao.attr); - - fao.attr_valid = calc_timeout_sec (priv->attribute_timeout); - fao.attr_valid_nsec = - calc_timeout_nsec (priv->attribute_timeout); + int32_t ret = 0; + fuse_state_t *state; + fuse_in_header_t *finh; + fuse_private_t *priv = NULL; + struct fuse_attr_out fao; + + priv = this->private; + state = frame->root->state; + finh = state->finh; + + fuse_log_eh(this, + "op_ret: %d, op_errno: %d, %" PRIu64 + ": %s() %s => " + "gfid: %s", + op_ret, op_errno, frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, + state->loc.inode ? uuid_utoa(state->loc.inode->gfid) : ""); + if (op_ret == 0) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": %s() %s => %" PRIu64, frame->root->unique, + gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR", buf->ia_ino); + + buf->ia_blksize = this->ctx->page_size; + gf_fuse_stat2attr(buf, &fao.attr, priv->enable_ino32); + + fao.attr_valid = calc_timeout_sec(priv->attribute_timeout); + fao.attr_valid_nsec = calc_timeout_nsec(priv->attribute_timeout); #if FUSE_KERNEL_MINOR_VERSION >= 9 - priv->proto_minor >= 9 ? - send_fuse_obj (this, finh, &fao) : - send_fuse_data (this, finh, &fao, - FUSE_COMPAT_ATTR_OUT_SIZE); + priv->proto_minor >= 9 + ? send_fuse_obj(this, finh, &fao) + : send_fuse_data(this, finh, &fao, FUSE_COMPAT_ATTR_OUT_SIZE); #else - send_fuse_obj (this, finh, &fao); + send_fuse_obj(this, finh, &fao); #endif - } else { - GF_LOG_OCCASIONALLY ( gf_fuse_conn_err_log, "glusterfs-fuse", - GF_LOG_WARNING, - "%"PRIu64": %s() %s => -1 (%s)", - frame->root->unique, - gf_fop_list[frame->root->op], - state->loc.path ? state->loc.path : "ERR", - strerror (op_errno)); - - send_fuse_err (this, finh, op_errno); + } else { + /* This is moved here from fuse_getattr(). It makes sense as + in few cases, like the self-heal processes, some + translators expect a lookup() to come on root inode + (inode number 1). This will make sure we don't fail in any + case, but the positive path will get better performance, + by following common path for all the cases */ + if ((finh->nodeid == 1) && (state->gfid[15] != 1)) { + /* The 'state->gfid[15]' check is added to prevent the + infinite recursions */ + state->gfid[15] = 1; + + ret = fuse_loc_fill(&state->loc, state, finh->nodeid, 0, NULL); + if (ret < 0) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": loc_fill() on / failed", finh->unique); + send_fuse_err(this, finh, ENOENT); + free_fuse_state(state); + return 0; + } + + fuse_gfid_set(state); + + FUSE_FOP(state, fuse_root_lookup_cbk, GF_FOP_LOOKUP, lookup, + &state->loc, state->xdata); + + return 0; } - free_fuse_state (state); - STACK_DESTROY (frame->root); + /* facilitate retry from VFS */ + if ((state->fd == NULL) && (op_errno == ENOENT)) + op_errno = ESTALE; - return 0; -} + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 + ": %s() " + "%s => -1 (%s)", + frame->root->unique, gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR", strerror(op_errno)); + + send_fuse_err(this, finh, op_errno); + } + free_fuse_state(state); + STACK_DESTROY(frame->root); + + return 0; +} static int -fuse_root_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *stat, dict_t *dict, - struct iatt *postparent) +fuse_root_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stat, dict_t *dict, struct iatt *postparent) { - fuse_attr_cbk (frame, cookie, this, op_ret, op_errno, stat); + fuse_attr_cbk(frame, cookie, this, op_ret, op_errno, stat, dict); - return 0; + return 0; } void -fuse_getattr_resume (fuse_state_t *state) +fuse_getattr_resume(fuse_state_t *state) { - if (!state->fd || IA_ISDIR (state->loc.inode->ia_type)) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": GETATTR %"PRIu64" (%s)", - state->finh->unique, state->finh->nodeid, - state->loc.path); - - FUSE_FOP (state, fuse_attr_cbk, GF_FOP_STAT, - stat, &state->loc); - } else { - - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": FGETATTR %"PRIu64" (%s/%p)", - state->finh->unique, state->finh->nodeid, - state->loc.path, state->fd); - - FUSE_FOP (state, fuse_attr_cbk, GF_FOP_FSTAT, - fstat, state->fd); - } + if (!state->loc.inode && !(state->fd && state->fd->inode)) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "%" PRIu64 ": GETATTR %" PRIu64 " (%s) resolution failed", + state->finh->unique, state->finh->nodeid, + uuid_utoa(state->resolve.gfid)); + + /* facilitate retry from VFS */ + if ((state->fd == NULL) && (state->resolve.op_errno == ENOENT)) + state->resolve.op_errno = ESTALE; + + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } + + if (state->fd == NULL && !IA_ISDIR(state->loc.inode->ia_type)) { + state->fd = fd_lookup(state->loc.inode, state->finh->pid); + + if (state->fd == NULL) + state->fd = fd_lookup(state->loc.inode, 0); + } + + if (!state->fd) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": GETATTR %" PRIu64 " (%s)", state->finh->unique, + state->finh->nodeid, state->loc.path); + + FUSE_FOP(state, fuse_attr_cbk, GF_FOP_STAT, stat, &state->loc, + state->xdata); + } else { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": FGETATTR %" PRIu64 " (%s/%p)", state->finh->unique, + state->finh->nodeid, state->loc.path, state->fd); + + FUSE_FOP(state, fuse_attr_cbk, GF_FOP_FSTAT, fstat, state->fd, + state->xdata); + } } static void -fuse_getattr (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_getattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - fuse_state_t *state; - fd_t *fd = NULL; - int32_t ret = -1; - - GET_STATE (this, finh, state); - - if (finh->nodeid == 1) { - state->gfid[15] = 1; - - ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL); - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": GETATTR on / (fuse_loc_fill() failed)", - finh->unique); - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } - - fuse_gfid_set (state); - - FUSE_FOP (state, fuse_root_lookup_cbk, GF_FOP_LOOKUP, - lookup, &state->loc, state->dict); - return; - } +#if FUSE_KERNEL_MINOR_VERSION >= 9 + struct fuse_getattr_in *fgi = msg; + fuse_private_t *priv = NULL; +#endif + fuse_state_t *state; + int ret = -1; - ret = fuse_loc_fill (&state->loc, state, state->finh->nodeid, 0, NULL); + GET_STATE(this, finh, state); +#if FUSE_KERNEL_MINOR_VERSION >= 9 + priv = this->private; + if (priv->proto_minor >= 9 && fgi->getattr_flags & FUSE_GETATTR_FH) + state->fd = fd_ref((fd_t *)(uintptr_t)fgi->fh); +#endif + if (finh->nodeid == 1) { + state->gfid[15] = 1; - if (!state->loc.inode) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": GETATTR %"PRIu64" (%s) (fuse_loc_fill() returned NULL inode)", - state->finh->unique, state->finh->nodeid, state->loc.path); - send_fuse_err (state->this, state->finh, ENOENT); - free_fuse_state (state); - return; + ret = fuse_loc_fill(&state->loc, state, finh->nodeid, 0, NULL); + if (ret < 0) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": GETATTR on / (fuse_loc_fill() failed)", + finh->unique); + send_fuse_err(this, finh, ESTALE); + free_fuse_state(state); + return; } - fd = fd_lookup (state->loc.inode, state->finh->pid); - state->fd = fd; - if (!fd || IA_ISDIR (state->loc.inode->ia_type)) { - /* this is the @ret of fuse_loc_fill, checked here - to permit fstat() to happen even when fuse_loc_fill fails - */ - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": GETATTR %"PRIu64" (fuse_loc_fill() failed)", - state->finh->unique, state->finh->nodeid); - send_fuse_err (state->this, state->finh, ENOENT); - free_fuse_state (state); - return; - } - - if (state->fd) - fd_unref (state->fd); + fuse_gfid_set(state); - state->fd = NULL; - } + FUSE_FOP(state, fuse_root_lookup_cbk, GF_FOP_LOOKUP, lookup, + &state->loc, state->xdata); + return; + } - uuid_copy (state->resolve.gfid, state->loc.inode->gfid); - if (state->loc.path) - state->resolve.path = gf_strdup (state->loc.path); + if (state->fd) + fuse_resolve_fd_init(state, &state->resolve, state->fd); + else + fuse_resolve_inode_init(state, &state->resolve, state->finh->nodeid); - fuse_resolve_and_resume (state, fuse_getattr_resume); + fuse_resolve_and_resume(state, fuse_getattr_resume); } - static int32_t -fuse_fd_inherit_directio (xlator_t *this, fd_t *fd, struct fuse_open_out *foo) -{ - int32_t ret = 0; - fuse_fd_ctx_t *fdctx = NULL, *tmp_fdctx = NULL; - fd_t *tmp_fd = NULL; - uint64_t val = 0; - - GF_VALIDATE_OR_GOTO_WITH_ERROR ("glusterfs-fuse", this, out, ret, - -EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR ("glusterfs-fuse", fd, out, ret, - -EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR ("glusterfs-fuse", foo, out, ret, - -EINVAL); - - fdctx = fuse_fd_ctx_check_n_create (fd, this); - if (!fdctx) { - ret = -ENOMEM; - goto out; - } +fuse_fd_inherit_directio(xlator_t *this, fd_t *fd, struct fuse_open_out *foo) +{ + int32_t ret = 0; + fuse_fd_ctx_t *fdctx = NULL, *tmp_fdctx = NULL; + fd_t *tmp_fd = NULL; - tmp_fd = fd_lookup (fd->inode, 0); - if (tmp_fd) { - ret = fd_ctx_get (tmp_fd, this, &val); - if (!ret) { - tmp_fdctx = (fuse_fd_ctx_t *)(unsigned long)val; - if (tmp_fdctx) { - foo->open_flags &= ~FOPEN_DIRECT_IO; - foo->open_flags |= (tmp_fdctx->open_flags - & FOPEN_DIRECT_IO); - } - } - } + GF_VALIDATE_OR_GOTO_WITH_ERROR("glusterfs-fuse", this, out, ret, -EINVAL); + GF_VALIDATE_OR_GOTO_WITH_ERROR("glusterfs-fuse", fd, out, ret, -EINVAL); + GF_VALIDATE_OR_GOTO_WITH_ERROR("glusterfs-fuse", foo, out, ret, -EINVAL); - fdctx->open_flags |= (foo->open_flags & FOPEN_DIRECT_IO); + fdctx = fuse_fd_ctx_get(this, fd); + if (!fdctx) { + ret = -ENOMEM; + goto out; + } - if (tmp_fd != NULL) { - fd_unref (tmp_fd); + tmp_fd = fd_lookup(fd->inode, 0); + if (tmp_fd) { + tmp_fdctx = fuse_fd_ctx_get(this, tmp_fd); + if (tmp_fdctx) { + foo->open_flags &= ~FOPEN_DIRECT_IO; + foo->open_flags |= (tmp_fdctx->open_flags & FOPEN_DIRECT_IO); } + } - ret = 0; + fdctx->open_flags |= (foo->open_flags & FOPEN_DIRECT_IO); + + if (tmp_fd != NULL) { + fd_unref(tmp_fd); + } + + ret = 0; out: - return ret; + return ret; } +gf_boolean_t +direct_io_mode(dict_t *xdata) +{ + if (xdata && dict_get(xdata, "direct-io-mode")) + return _gf_true; + return _gf_false; +} static int -fuse_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd) -{ - fuse_state_t *state = NULL; - fuse_in_header_t *finh = NULL; - fuse_private_t *priv = NULL; - int32_t ret = 0; - struct fuse_open_out foo = {0, }; - - priv = this->private; - state = frame->root->state; - finh = state->finh; - - if (op_ret >= 0) { - foo.fh = (uintptr_t) fd; - foo.open_flags = 0; - - if (!IA_ISDIR (fd->inode->ia_type)) { - if (((priv->direct_io_mode == 2) - && ((state->flags & O_ACCMODE) != O_RDONLY)) - || (priv->direct_io_mode == 1)) - foo.open_flags |= FOPEN_DIRECT_IO; +fuse_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + fuse_private_t *priv = NULL; + int32_t ret = 0; + struct fuse_open_out foo = { + 0, + }; + + priv = this->private; + state = frame->root->state; + finh = state->finh; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); + + if (op_ret >= 0) { + foo.fh = (uintptr_t)fd; + foo.open_flags = 0; + + if (!IA_ISDIR(fd->inode->ia_type)) { + if (((priv->direct_io_mode == 2) && + ((state->flags & O_ACCMODE) != O_RDONLY)) || + (priv->direct_io_mode == 1) || (direct_io_mode(xdata))) + foo.open_flags |= FOPEN_DIRECT_IO; #ifdef GF_DARWIN_HOST_OS - /* In Linux: by default, buffer cache - * is purged upon open, setting - * FOPEN_KEEP_CACHE implies no-purge - * - * In MacFUSE: by default, buffer cache - * is left intact upon open, setting - * FOPEN_PURGE_UBC implies purge - * - * [[Interesting...]] - */ - foo.open_flags |= FOPEN_PURGE_UBC; + /* In Linux: by default, buffer cache + * is purged upon open, setting + * FOPEN_KEEP_CACHE implies no-purge + * + * In MacFUSE: by default, buffer cache + * is left intact upon open, setting + * FOPEN_PURGE_UBC implies purge + * + * [[Interesting...]] + */ + if (!priv->fopen_keep_cache) + foo.open_flags |= FOPEN_PURGE_UBC; +#else + /* + * If fopen-keep-cache is enabled, we set the associated + * flag here such that files are not invalidated on open. + * File invalidations occur either in fuse or explicitly + * when the cache is set invalid on the inode. + */ + if (priv->fopen_keep_cache) + foo.open_flags |= FOPEN_KEEP_CACHE; #endif - } + } - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": %s() %s => %p", frame->root->unique, - gf_fop_list[frame->root->op], state->loc.path, fd); - - ret = fuse_fd_inherit_directio (this, fd, &foo); - if (ret < 0) { - op_errno = -ret; - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "cannot inherit direct-io values from fds " - "already opened"); - goto err; - } + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": %s() %s => %p", + frame->root->unique, gf_fop_list[frame->root->op], + state->loc.path, fd); - fd_ref (fd); + ret = fuse_fd_inherit_directio(this, fd, &foo); + if (ret < 0) { + op_errno = -ret; + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "cannot inherit direct-io values for fd " + "(ptr:%p inode-gfid:%s) from fds already " + "opened", + fd, uuid_utoa(fd->inode->gfid)); + goto err; + } - if (send_fuse_obj (this, finh, &foo) == ENOENT) { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "open(%s) got EINTR", state->loc.path); - fd_unref (fd); - goto out; - } + if (send_fuse_obj(this, finh, &foo) == ENOENT) { + gf_log("glusterfs-fuse", GF_LOG_DEBUG, "open(%s) got EINTR", + state->loc.path); + gf_fd_put(priv->fdtable, state->fd_no); + goto out; + } - fd_bind (fd); - } else { - err: - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": %s() %s => -1 (%s)", frame->root->unique, - gf_fop_list[frame->root->op], state->loc.path, - strerror (op_errno)); + fd_bind(fd); + } else { + err: + /* OPEN(DIR) being an operation on inode should never fail with + * ENOENT. If gfid is not present, the appropriate error is + * ESTALE. + */ + if (op_errno == ENOENT) + op_errno = ESTALE; - send_fuse_err (this, finh, op_errno); - } + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": %s() %s => -1 (%s)", frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, + strerror(op_errno)); + + send_fuse_err(this, finh, op_errno); + gf_fd_put(priv->fdtable, state->fd_no); + } out: - free_fuse_state (state); - STACK_DESTROY (frame->root); - return 0; + free_fuse_state(state); + STACK_DESTROY(frame->root); + return 0; } - static void -fuse_do_truncate (fuse_state_t *state, size_t size) +fuse_do_truncate(fuse_state_t *state) { - if (state->fd) { - FUSE_FOP (state, fuse_truncate_cbk, GF_FOP_FTRUNCATE, - ftruncate, state->fd, size); - } else { - FUSE_FOP (state, fuse_truncate_cbk, GF_FOP_TRUNCATE, - truncate, &state->loc, size); - } - - return; + if (state->fd) { + FUSE_FOP(state, fuse_truncate_cbk, GF_FOP_FTRUNCATE, ftruncate, + state->fd, state->off, state->xdata); + } else { + FUSE_FOP(state, fuse_truncate_cbk, GF_FOP_TRUNCATE, truncate, + &state->loc, state->off, state->xdata); + } + + return; } - static int -fuse_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *statpre, struct iatt *statpost) +fuse_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) { - fuse_state_t *state; - fuse_in_header_t *finh; - fuse_private_t *priv = NULL; - struct fuse_attr_out fao; - - int op_done = 0; - - priv = this->private; - state = frame->root->state; - finh = state->finh; - - if (op_ret == 0) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": %s() %s => %"PRId64, frame->root->unique, - gf_fop_list[frame->root->op], - state->loc.path ? state->loc.path : "ERR", - statpost->ia_ino); - - statpost->ia_blksize = this->ctx->page_size; - gf_fuse_stat2attr (statpost, &fao.attr); - - fao.attr_valid = calc_timeout_sec (priv->attribute_timeout); - fao.attr_valid_nsec = - calc_timeout_nsec (priv->attribute_timeout); - - if (state->truncate_needed) { - fuse_do_truncate (state, state->size); - } else { + fuse_state_t *state; + fuse_in_header_t *finh; + fuse_private_t *priv = NULL; + struct fuse_attr_out fao; + + int op_done = 0; + + priv = this->private; + state = frame->root->state; + finh = state->finh; + + fuse_log_eh(this, + "op_ret: %d, op_errno: %d, %" PRIu64 + ", %s() %s => " + "gfid: %s", + op_ret, op_errno, frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, + state->loc.inode ? uuid_utoa(state->loc.inode->gfid) : ""); + + if (op_ret == 0) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": %s() %s => %" PRIu64, frame->root->unique, + gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR", statpost->ia_ino); + + statpost->ia_blksize = this->ctx->page_size; + gf_fuse_stat2attr(statpost, &fao.attr, priv->enable_ino32); + + fao.attr_valid = calc_timeout_sec(priv->attribute_timeout); + fao.attr_valid_nsec = calc_timeout_nsec(priv->attribute_timeout); + + if (state->truncate_needed) { + fuse_do_truncate(state); + } else { #if FUSE_KERNEL_MINOR_VERSION >= 9 - priv->proto_minor >= 9 ? - send_fuse_obj (this, finh, &fao) : - send_fuse_data (this, finh, &fao, - FUSE_COMPAT_ATTR_OUT_SIZE); + priv->proto_minor >= 9 + ? send_fuse_obj(this, finh, &fao) + : send_fuse_data(this, finh, &fao, FUSE_COMPAT_ATTR_OUT_SIZE); #else - send_fuse_obj (this, finh, &fao); + send_fuse_obj(this, finh, &fao); #endif - op_done = 1; - } - } else { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": %s() %s => -1 (%s)", frame->root->unique, - gf_fop_list[frame->root->op], - state->loc.path ? state->loc.path : "ERR", - strerror (op_errno)); - - send_fuse_err (this, finh, op_errno); - op_done = 1; + op_done = 1; } + } else { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": %s() %s => -1 (%s)", frame->root->unique, + gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR", strerror(op_errno)); - if (op_done) { - free_fuse_state (state); - } + /* facilitate retry from VFS */ + if ((state->fd == NULL) && (op_errno == ENOENT)) + op_errno = ESTALE; - STACK_DESTROY (frame->root); + send_fuse_err(this, finh, op_errno); + op_done = 1; + } - return 0; -} + if (op_done) { + free_fuse_state(state); + } + STACK_DESTROY(frame->root); + + return 0; +} static int32_t -fattr_to_gf_set_attr (int32_t valid) +fattr_to_gf_set_attr(int32_t valid) { - int32_t gf_valid = 0; + int32_t gf_valid = 0; - if (valid & FATTR_MODE) - gf_valid |= GF_SET_ATTR_MODE; + if (valid & FATTR_MODE) + gf_valid |= GF_SET_ATTR_MODE; - if (valid & FATTR_UID) - gf_valid |= GF_SET_ATTR_UID; + if (valid & FATTR_UID) + gf_valid |= GF_SET_ATTR_UID; - if (valid & FATTR_GID) - gf_valid |= GF_SET_ATTR_GID; + if (valid & FATTR_GID) + gf_valid |= GF_SET_ATTR_GID; - if (valid & FATTR_ATIME) - gf_valid |= GF_SET_ATTR_ATIME; + if (valid & FATTR_ATIME) + gf_valid |= GF_SET_ATTR_ATIME; - if (valid & FATTR_MTIME) - gf_valid |= GF_SET_ATTR_MTIME; + if (valid & FATTR_MTIME) + gf_valid |= GF_SET_ATTR_MTIME; - if (valid & FATTR_SIZE) - gf_valid |= GF_SET_ATTR_SIZE; +#if FUSE_KERNEL_MINOR_VERSION >= 23 + if (valid & FATTR_CTIME) + gf_valid |= GF_SET_ATTR_CTIME; +#endif - return gf_valid; -} +#if FUSE_KERNEL_MINOR_VERSION >= 9 + if (valid & FATTR_ATIME_NOW) + gf_valid |= GF_ATTR_ATIME_NOW; + if (valid & FATTR_MTIME_NOW) + gf_valid |= GF_ATTR_MTIME_NOW; +#endif -#define FATTR_MASK (FATTR_SIZE \ - | FATTR_UID | FATTR_GID \ - | FATTR_ATIME | FATTR_MTIME \ - | FATTR_MODE) + if (valid & FATTR_SIZE) + gf_valid |= GF_SET_ATTR_SIZE; + + return gf_valid; +} + +#define FATTR_MASK \ + (FATTR_SIZE | FATTR_UID | FATTR_GID | FATTR_ATIME | FATTR_MTIME | \ + FATTR_MODE) void -fuse_setattr_resume (fuse_state_t *state) -{ - if ((state->valid & (FATTR_MASK)) != FATTR_SIZE) { - if (state->fd && - !((state->valid & FATTR_ATIME) || - (state->valid & FATTR_MTIME))) { - /* - there is no "futimes" call, so don't send - fsetattr if ATIME or MTIME is set - */ - - FUSE_FOP (state, fuse_setattr_cbk, GF_FOP_FSETATTR, - fsetattr, state->fd, &state->attr, - fattr_to_gf_set_attr (state->valid)); - } else { - FUSE_FOP (state, fuse_setattr_cbk, GF_FOP_SETATTR, - setattr, &state->loc, &state->attr, - fattr_to_gf_set_attr (state->valid)); - } +fuse_setattr_resume(fuse_state_t *state) +{ + if (!state->fd && !state->loc.inode) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "%" PRIu64 ": SETATTR %" PRIu64 " (%s) resolution failed", + state->finh->unique, state->finh->nodeid, + uuid_utoa(state->resolve.gfid)); + + /* facilitate retry from VFS */ + if ((state->fd == NULL) && (state->resolve.op_errno == ENOENT)) + state->resolve.op_errno = ESTALE; + + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } + + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": SETATTR (%" PRIu64 ")%s", state->finh->unique, + state->finh->nodeid, state->loc.path); + +#ifdef GF_TEST_FFOP + /* this is for calls like 'fchmod()' */ + if (!state->fd) + state->fd = fd_lookup(state->loc.inode, state->finh->pid); +#endif /* GF_TEST_FFOP */ + + if ((state->valid & (FATTR_MASK)) != FATTR_SIZE) { + if (state->fd && + !((state->valid & FATTR_ATIME) || (state->valid & FATTR_MTIME) +#if FUSE_KERNEL_MINOR_VERSION >= 23 + || (state->valid & FATTR_CTIME) +#endif + )) { + /* + there is no "futimes" call, so don't send + fsetattr if ATIME or MTIME is set + */ + + FUSE_FOP(state, fuse_setattr_cbk, GF_FOP_FSETATTR, fsetattr, + state->fd, &state->attr, + fattr_to_gf_set_attr(state->valid), state->xdata); } else { - fuse_do_truncate (state, state->size); + FUSE_FOP(state, fuse_setattr_cbk, GF_FOP_SETATTR, setattr, + &state->loc, &state->attr, + fattr_to_gf_set_attr(state->valid), state->xdata); } - + } else { + fuse_do_truncate(state); + } } static void -fuse_setattr (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_setattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - struct fuse_setattr_in *fsi = msg; + struct fuse_setattr_in *fsi = msg; - fuse_private_t *priv = NULL; - fuse_state_t *state = NULL; - int32_t ret = -1; +#if FUSE_KERNEL_MINOR_VERSION >= 9 + fuse_private_t *priv = NULL; +#endif + fuse_state_t *state = NULL; + + GET_STATE(this, finh, state); + + if (fsi->valid & FATTR_FH && !(fsi->valid & (FATTR_ATIME | FATTR_MTIME))) { + /* We need no loc if kernel sent us an fd and + * we are not fiddling with times */ + state->fd = FH_TO_FD(fsi->fh); + fuse_resolve_fd_init(state, &state->resolve, state->fd); + } else { + fuse_resolve_inode_init(state, &state->resolve, finh->nodeid); + } + + /* + * This is just stub code demonstrating how to retrieve + * lock_owner in setattr, according to the FUSE proto. + * We do not make use of ATM. Its purpose is supporting + * mandatory locking, but getting that right is further + * down the road. Cf. + * + * http://thread.gmane.org/gmane.comp.file-systems.fuse.devel/ + * 4962/focus=4982 + * + * http://git.kernel.org/?p=linux/kernel/git/torvalds/ + * linux-2.6.git;a=commit;h=v2.6.23-5896-gf333211 + */ +#if FUSE_KERNEL_MINOR_VERSION >= 9 + priv = this->private; + if (priv->proto_minor >= 9 && fsi->valid & FATTR_LOCKOWNER) + state->lk_owner = fsi->lock_owner; +#endif - GET_STATE (this, finh, state); + state->valid = fsi->valid; - if (fsi->valid & FATTR_FH && - !(fsi->valid & (FATTR_ATIME|FATTR_MTIME))) - /* We need no loc if kernel sent us an fd and - * we are not fiddling with times */ - ret = 1; - else - ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, - NULL); + if ((fsi->valid & (FATTR_MASK)) != FATTR_SIZE) { + if (fsi->valid & FATTR_SIZE) { + state->off = fsi->size; + state->truncate_needed = _gf_true; + } - /* - * This is just stub code demonstrating how to retrieve - * lock_owner in setattr, according to the FUSE proto. - * We do not make use of ATM. Its purpose is supporting - * mandatory locking, but getting that right is further - * down the road. Cf. - * - * http://thread.gmane.org/gmane.comp.file-systems.fuse.devel/ - * 4962/focus=4982 - * - * http://git.kernel.org/?p=linux/kernel/git/torvalds/ - * linux-2.6.git;a=commit;h=v2.6.23-5896-gf333211 - */ - priv = this->private; -#if FUSE_KERNEL_MINOR_VERSION >= 9 - if (priv->proto_minor >= 9 && fsi->valid & FATTR_LOCKOWNER) - state->lk_owner = fsi->lock_owner; + state->attr.ia_size = fsi->size; + state->attr.ia_atime = fsi->atime; + state->attr.ia_mtime = fsi->mtime; +#if FUSE_KERNEL_MINOR_VERSION >= 23 + state->attr.ia_ctime = fsi->ctime; +#endif + state->attr.ia_atime_nsec = fsi->atimensec; + state->attr.ia_mtime_nsec = fsi->mtimensec; +#if FUSE_KERNEL_MINOR_VERSION >= 23 + state->attr.ia_ctime_nsec = fsi->ctimensec; #endif - if ((state->loc.inode == NULL && ret == 0) || - (ret < 0)) { + state->attr.ia_prot = ia_prot_from_st_mode(fsi->mode); + state->attr.ia_uid = fsi->uid; + state->attr.ia_gid = fsi->gid; + } else { + state->off = fsi->size; + } - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": SETATTR %s (fuse_loc_fill() failed)", - finh->unique, state->loc.path); + fuse_resolve_and_resume(state, fuse_setattr_resume); +} - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); +static int +fuse_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; - return; - } + GF_ASSERT(frame); + GF_ASSERT(frame->root); - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": SETATTR (%"PRIu64")%s", finh->unique, - finh->nodeid, state->loc.path); + state = frame->root->state; + finh = state->finh; - state->valid = fsi->valid; + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); - if (fsi->valid & FATTR_FH) { - state->fd = FH_TO_FD (fsi->fh); - } + if (op_ret == 0) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": %s() %s => 0", + frame->root->unique, gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR"); - if ((fsi->valid & (FATTR_MASK)) != FATTR_SIZE) { - if (fsi->valid & FATTR_SIZE) { - state->size = fsi->size; - state->truncate_needed = _gf_true; - } + send_fuse_err(this, finh, 0); + } else { + gf_log("glusterfs-fuse", + (ENODATA == op_errno) ? GF_LOG_DEBUG : GF_LOG_WARNING, + "%" PRIu64 ": %s() of %s on %s => -1 (%s)", frame->root->unique, + gf_fop_list[frame->root->op], state->name ? state->name : "", + state->loc.path ? state->loc.path : "ERR", strerror(op_errno)); - state->attr.ia_size = fsi->size; - state->attr.ia_atime = fsi->atime; - state->attr.ia_mtime = fsi->mtime; - state->attr.ia_atime_nsec = fsi->atimensec; - state->attr.ia_mtime_nsec = fsi->mtimensec; + /* facilitate retry from VFS */ + if ((state->fd == NULL) && (op_errno == ENOENT)) + op_errno = ESTALE; - state->attr.ia_prot = ia_prot_from_st_mode (fsi->mode); - state->attr.ia_uid = fsi->uid; - state->attr.ia_gid = fsi->gid; - } else { - state->size = fsi->size; - } + send_fuse_err(this, finh, op_errno); + } - if (!state->fd) { - uuid_copy (state->resolve.gfid, state->loc.inode->gfid); - state->resolve.path = gf_strdup (state->loc.path); - } + free_fuse_state(state); + STACK_DESTROY(frame->root); - fuse_resolve_and_resume (state, fuse_setattr_resume); + return 0; } - static int -fuse_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +fuse_err_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { - fuse_state_t *state = frame->root->state; - fuse_in_header_t *finh = state->finh; + fuse_state_t *state = frame->root->state; + fuse_in_header_t *finh = state->finh; - if (op_ret == 0) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": %s() %s => 0", frame->root->unique, - gf_fop_list[frame->root->op], - state->loc.path ? state->loc.path : "ERR"); + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); - send_fuse_err (this, finh, 0); - } else { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": %s() %s => -1 (%s)", - frame->root->unique, - gf_fop_list[frame->root->op], - state->loc.path ? state->loc.path : "ERR", - strerror (op_errno)); - - send_fuse_err (this, finh, op_errno); + if (op_ret == 0) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": %s() %s => 0", + frame->root->unique, gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR"); + + send_fuse_err(this, finh, 0); + } else { + if (GF_IGNORE_IF_GSYNCD_SAFE_ERROR(frame, op_errno)) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": %s() %s => -1 (%s)", frame->root->unique, + gf_fop_list[frame->root->op], + state->loc.path ? state->loc.path : "ERR", + strerror(op_errno)); } - free_fuse_state (state); - STACK_DESTROY (frame->root); + /* facilitate retry from VFS */ + if ((state->fd == NULL) && (op_errno == ENOENT)) + op_errno = ESTALE; - return 0; -} + send_fuse_err(this, finh, op_errno); + } + free_fuse_state(state); + STACK_DESTROY(frame->root); -static int -fuse_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) -{ - return fuse_err_cbk (frame, cookie, this, op_ret, op_errno); + return 0; } - static int -fuse_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +fuse_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - if (op_ret == -1 && op_errno == ENOTSUP) - GF_LOG_OCCASIONALLY (gf_fuse_xattr_enotsup_log, - "glusterfs-fuse", GF_LOG_CRITICAL, - "extended attribute not supported " - "by the backend storage"); + fuse_private_t *priv = this->private; - return fuse_err_cbk (frame, cookie, this, op_ret, op_errno); -} + if (priv->flush_handle_interrupt) { + if (fuse_interrupt_finish_fop(frame, this, _gf_false, NULL)) { + return 0; + } + } + return fuse_err_cbk(frame, cookie, this, op_ret, op_errno, xdata); +} static int -fuse_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent) +fuse_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - fuse_state_t *state = NULL; - fuse_in_header_t *finh = NULL; - - state = frame->root->state; - finh = state->finh; - - if (op_ret == 0) - inode_unlink (state->loc.inode, state->loc.parent, - state->loc.name); - - if (op_ret == 0) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": %s() %s => 0", frame->root->unique, - gf_fop_list[frame->root->op], state->loc.path); - - send_fuse_err (this, finh, 0); - } else { - gf_log ("glusterfs-fuse", - op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_WARNING, - "%"PRIu64": %s() %s => -1 (%s)", frame->root->unique, - gf_fop_list[frame->root->op], state->loc.path, - strerror (op_errno)); + return fuse_err_cbk(frame, cookie, this, op_ret, op_errno, xdata); +} - send_fuse_err (this, finh, op_errno); - } +static int +fuse_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + if (op_ret == -1 && op_errno == ENOTSUP) + GF_LOG_OCCASIONALLY(gf_fuse_xattr_enotsup_log, "glusterfs-fuse", + GF_LOG_CRITICAL, + "extended attribute not supported " + "by the backend storage"); - free_fuse_state (state); - STACK_DESTROY (frame->root); + return fuse_err_cbk(frame, cookie, this, op_ret, op_errno, xdata); +} - return 0; +static int +fuse_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + + state = frame->root->state; + finh = state->finh; + + fuse_log_eh(this, + "op_ret: %d, op_errno: %d, %" PRIu64 + ": %s() %s => " + "gfid: %s", + op_ret, op_errno, frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, + state->loc.inode ? uuid_utoa(state->loc.inode->gfid) : ""); + + if (op_ret == 0) { + inode_unlink(state->loc.inode, state->loc.parent, state->loc.name); + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": %s() %s => 0", + frame->root->unique, gf_fop_list[frame->root->op], + state->loc.path); + + send_fuse_err(this, finh, 0); + } else { + if (GF_IGNORE_IF_GSYNCD_SAFE_ERROR(frame, op_errno)) { + gf_log("glusterfs-fuse", + op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_WARNING, + "%" PRIu64 ": %s() %s => -1 (%s)", frame->root->unique, + gf_fop_list[frame->root->op], state->loc.path, + strerror(op_errno)); + } + send_fuse_err(this, finh, op_errno); + } + + free_fuse_state(state); + STACK_DESTROY(frame->root); + + return 0; } void -fuse_access_resume (fuse_state_t *state) +fuse_access_resume(fuse_state_t *state) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64" ACCESS %s/%"PRIu64" mask=%d", - state->finh->unique, state->loc.path, - state->finh->nodeid, state->mask); + if (!state->loc.inode) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "%" PRIu64 ": ACCESS %" PRIu64 " (%s) resolution failed", + state->finh->unique, state->finh->nodeid, + uuid_utoa(state->resolve.gfid)); + + /* facilitate retry from VFS */ + if (state->resolve.op_errno == ENOENT) + state->resolve.op_errno = ESTALE; + + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } - FUSE_FOP (state, fuse_err_cbk, GF_FOP_ACCESS, access, - &state->loc, state->mask); + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 " ACCESS %s/%" PRIu64 " mask=%d", state->finh->unique, + state->loc.path, state->finh->nodeid, state->mask); + FUSE_FOP(state, fuse_err_cbk, GF_FOP_ACCESS, access, &state->loc, + state->mask, state->xdata); } static void -fuse_access (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_access(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - struct fuse_access_in *fai = msg; + struct fuse_access_in *fai = msg; + fuse_state_t *state = NULL; - fuse_state_t *state = NULL; - int32_t ret = -1; + GET_STATE(this, finh, state); - GET_STATE (this, finh, state); - - ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": ACCESS %"PRIu64" (%s) (fuse_loc_fill() failed)", - finh->unique, finh->nodeid, state->loc.path); - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } + fuse_resolve_inode_init(state, &state->resolve, finh->nodeid); - state->mask = fai->mask; + state->mask = fai->mask; - uuid_copy (state->resolve.gfid, state->loc.inode->gfid); - state->resolve.path = gf_strdup (state->loc.path); + fuse_resolve_and_resume(state, fuse_access_resume); - fuse_resolve_and_resume (state, fuse_access_resume); - return; + return; } - static int -fuse_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, const char *linkname, - struct iatt *buf) +fuse_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, const char *linkname, + struct iatt *buf, dict_t *xdata) { - fuse_state_t *state = NULL; - fuse_in_header_t *finh = NULL; - - state = frame->root->state; - finh = state->finh; - - if (op_ret > 0) { - ((char *)linkname)[op_ret] = '\0'; - - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": %s => %s", frame->root->unique, - state->loc.path, linkname); - - send_fuse_data (this, finh, (void *)linkname, op_ret + 1); - } else { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": %s => -1 (%s)", frame->root->unique, - state->loc.path, strerror (op_errno)); - - send_fuse_err (this, finh, op_errno); - } - - free_fuse_state (state); - STACK_DESTROY (frame->root); - - return 0; + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + + state = frame->root->state; + finh = state->finh; + + fuse_log_eh(this, + "op_ret: %d, op_errno: %d %" PRIu64 + ": %s() => %s" + " linkname: %s, gfid: %s", + op_ret, op_errno, frame->root->unique, + gf_fop_list[frame->root->op], state->loc.gfid, linkname, + uuid_utoa(state->loc.gfid)); + + if (op_ret > 0) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": %s => %s (size:%d)", frame->root->unique, + state->loc.path, linkname, op_ret); + send_fuse_data(this, finh, (void *)linkname, op_ret); + } else { + /* facilitate retry from VFS */ + if (op_errno == ENOENT) + op_errno = ESTALE; + + gf_log("glusterfs-fuse", GF_LOG_WARNING, "%" PRIu64 ": %s => -1 (%s)", + frame->root->unique, state->loc.path, strerror(op_errno)); + + send_fuse_err(this, finh, op_errno); + } + + free_fuse_state(state); + STACK_DESTROY(frame->root); + + return 0; } void -fuse_readlink_resume (fuse_state_t *state) +fuse_readlink_resume(fuse_state_t *state) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64" READLINK %s/%s", state->finh->unique, - state->loc.path, uuid_utoa (state->loc.inode->gfid)); + if (!state->loc.inode) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "READLINK %" PRIu64 " (%s) resolution failed", + state->finh->unique, uuid_utoa(state->resolve.gfid)); + + /* facilitate retry from VFS */ + if (state->resolve.op_errno == ENOENT) + state->resolve.op_errno = ESTALE; + + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } - FUSE_FOP (state, fuse_readlink_cbk, GF_FOP_READLINK, - readlink, &state->loc, 4096); + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 " READLINK %s/%s", + state->finh->unique, state->loc.path, + uuid_utoa(state->loc.inode->gfid)); + FUSE_FOP(state, fuse_readlink_cbk, GF_FOP_READLINK, readlink, &state->loc, + 4096, state->xdata); } static void -fuse_readlink (xlator_t *this, fuse_in_header_t *finh, void *msg) -{ - fuse_state_t *state = NULL; - int32_t ret = -1; - - GET_STATE (this, finh, state); - ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64" READLINK %s (fuse_loc_fill() returned NULL inode)", - finh->unique, state->loc.path); - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } +fuse_readlink(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + fuse_state_t *state = NULL; - uuid_copy (state->resolve.gfid, state->loc.inode->gfid); - state->resolve.path = gf_strdup (state->loc.path); + GET_STATE(this, finh, state); - fuse_resolve_and_resume (state, fuse_readlink_resume); + fuse_resolve_inode_init(state, &state->resolve, finh->nodeid); - return; + fuse_resolve_and_resume(state, fuse_readlink_resume); + + return; } void -fuse_mknod_resume (fuse_state_t *state) -{ - if (!state->loc.parent) { - gf_log ("fuse", GF_LOG_ERROR, "failed to resolve path %s", - state->loc.path); - send_fuse_err (state->this, state->finh, ENOENT); - free_fuse_state (state); - return; - } +fuse_mknod_resume(fuse_state_t *state) +{ + if (!state->loc.parent) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "MKNOD %" PRIu64 "/%s (%s/%s) resolution failed", + state->finh->nodeid, state->resolve.bname, + uuid_utoa(state->resolve.gfid), state->resolve.bname); + + /* facilitate retry from VFS */ + if (state->resolve.op_errno == ENOENT) + state->resolve.op_errno = ESTALE; + + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } - if (state->loc.inode) { - gf_log (state->this->name, GF_LOG_DEBUG, "inode already present"); - inode_unref (state->loc.inode); - } + if (state->resolve.op_errno == ENOENT) { + state->resolve.op_ret = 0; + state->resolve.op_errno = 0; + } - state->loc.inode = inode_new (state->loc.parent->table); + if (state->loc.inode) { + gf_log(state->this->name, GF_LOG_DEBUG, "inode already present"); + inode_unref(state->loc.inode); + state->loc.inode = NULL; + } - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": MKNOD %s", state->finh->unique, - state->loc.path); + state->loc.inode = inode_new(state->loc.parent->table); - FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_MKNOD, - mknod, &state->loc, state->mode, state->rdev, state->dict); + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": MKNOD %s", + state->finh->unique, state->loc.path); + FUSE_FOP(state, fuse_newentry_cbk, GF_FOP_MKNOD, mknod, &state->loc, + state->mode, state->rdev, state->umask, state->xdata); } static void -fuse_mknod (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_mknod(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - struct fuse_mknod_in *fmi = msg; - char *name = (char *)(fmi + 1); - - fuse_state_t *state = NULL; - fuse_private_t *priv = NULL; - int32_t ret = -1; + struct fuse_mknod_in *fmi = msg; + char *name = (char *)(fmi + 1); - priv = this->private; + fuse_state_t *state = NULL; #if FUSE_KERNEL_MINOR_VERSION >= 12 - if (priv->proto_minor < 12) - name = (char *)msg + FUSE_COMPAT_MKNOD_IN_SIZE; + fuse_private_t *priv = NULL; + + priv = this->private; + if (priv->proto_minor < 12) + name = (char *)msg + FUSE_COMPAT_MKNOD_IN_SIZE; #endif - GET_STATE (this, finh, state); + GET_STATE(this, finh, state); - uuid_generate (state->gfid); + gf_uuid_generate(state->gfid); - ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name); - if (!state->loc.parent || (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64" MKNOD %s (fuse_loc_fill() failed)", - finh->unique, state->loc.path); - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } + fuse_resolve_entry_init(state, &state->resolve, finh->nodeid, name); - state->mode = fmi->mode; - state->rdev = fmi->rdev; - - priv = this->private; -#if FUSE_KERNEL_MINOR_VERSION >=12 - if (priv->proto_minor >= 12) - state->mode &= ~fmi->umask; - if (priv->proto_minor >= 12 && priv->acl) { - state->dict = dict_new (); - if (!state->dict) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "MKNOD Failed to allocate a param dictionary"); - send_fuse_err (this, finh, ENOMEM); - free_fuse_state (state); - return; - } - ret = dict_set_int16 (state->dict, "umask", fmi->umask); - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "MKNOD Failed adding umask to request"); - dict_destroy (state->dict); - send_fuse_err (this, finh, ENOMEM); - free_fuse_state (state); - return; - } - ret = dict_set_int16 (state->dict, "mode", fmi->mode); - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "MKNOD Failed adding mode to request"); - dict_destroy (state->dict); - send_fuse_err (this, finh, ENOMEM); - free_fuse_state (state); - return; - } - } -#endif + state->mode = fmi->mode; + state->rdev = fmi->rdev; - uuid_copy (state->resolve.pargfid, state->loc.parent->gfid); - state->resolve.bname = gf_strdup (name); - state->resolve.path = gf_strdup (state->loc.path); +#if FUSE_KERNEL_MINOR_VERSION >= 12 + priv = this->private; + FUSE_ENTRY_CREATE(this, priv, finh, state, fmi, "MKNOD"); +#endif - fuse_resolve_and_resume (state, fuse_mknod_resume); + fuse_resolve_and_resume(state, fuse_mknod_resume); - return; + return; } void -fuse_mkdir_resume (fuse_state_t *state) -{ - if (!state->loc.parent) { - gf_log ("fuse", GF_LOG_ERROR, "failed to resolve path %s", - state->loc.path); - send_fuse_err (state->this, state->finh, ENOENT); - free_fuse_state (state); - return; - } +fuse_mkdir_resume(fuse_state_t *state) +{ + if (!state->loc.parent) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "MKDIR %" PRIu64 " (%s/%s) resolution failed", + state->finh->nodeid, uuid_utoa(state->resolve.gfid), + state->resolve.bname); + + /* facilitate retry from VFS */ + if (state->resolve.op_errno == ENOENT) + state->resolve.op_errno = ESTALE; + + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } - if (state->loc.inode) { - gf_log (state->this->name, GF_LOG_DEBUG, "inode already present"); - inode_unref (state->loc.inode); - } + if (state->resolve.op_errno == ENOENT) { + state->resolve.op_ret = 0; + state->resolve.op_errno = 0; + } + + if (state->loc.inode) { + gf_log(state->this->name, GF_LOG_DEBUG, "inode already present"); + inode_unref(state->loc.inode); + state->loc.inode = NULL; + } - state->loc.inode = inode_new (state->loc.parent->table); + state->loc.inode = inode_new(state->loc.parent->table); - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": MKDIR %s", state->finh->unique, - state->loc.path); + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": MKDIR %s", + state->finh->unique, state->loc.path); - FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_MKDIR, - mkdir, &state->loc, state->mode, state->dict); + FUSE_FOP(state, fuse_newentry_cbk, GF_FOP_MKDIR, mkdir, &state->loc, + state->mode, state->umask, state->xdata); } static void -fuse_mkdir (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_mkdir(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - struct fuse_mkdir_in *fmi = msg; - char *name = (char *)(fmi + 1); - fuse_private_t *priv = NULL; + struct fuse_mkdir_in *fmi = msg; + char *name = (char *)(fmi + 1); +#if FUSE_KERNEL_MINOR_VERSION >= 12 + fuse_private_t *priv = NULL; +#endif - fuse_state_t *state; - int32_t ret = -1; + fuse_state_t *state; - GET_STATE (this, finh, state); + GET_STATE(this, finh, state); - uuid_generate (state->gfid); + gf_uuid_generate(state->gfid); - ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name); - if (!state->loc.parent || (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64" MKDIR %s (fuse_loc_fill() failed)", - finh->unique, state->loc.path); - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } + fuse_resolve_entry_init(state, &state->resolve, finh->nodeid, name); - state->mode = fmi->mode; - - priv = this->private; -#if FUSE_KERNEL_MINOR_VERSION >=12 - if (priv->proto_minor >= 12) - state->mode &= ~fmi->umask; - if (priv->proto_minor >= 12 && priv->acl) { - state->dict = dict_new (); - if (!state->dict) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "MKDIR Failed to allocate a param dictionary"); - send_fuse_err (this, finh, ENOMEM); - free_fuse_state (state); - return; - } - ret = dict_set_int16 (state->dict, "umask", fmi->umask); - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "MKDIR Failed adding umask to request"); - dict_destroy (state->dict); - send_fuse_err (this, finh, ENOMEM); - free_fuse_state (state); - return; - } - ret = dict_set_int16 (state->dict, "mode", fmi->mode); - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "MKDIR Failed adding mode to request"); - dict_destroy (state->dict); - send_fuse_err (this, finh, ENOMEM); - free_fuse_state (state); - return; - } - } -#endif + state->mode = fmi->mode; - uuid_copy (state->resolve.pargfid, state->loc.parent->gfid); - state->resolve.bname = gf_strdup (name); - state->resolve.path = gf_strdup (state->loc.path); +#if FUSE_KERNEL_MINOR_VERSION >= 12 + priv = this->private; + FUSE_ENTRY_CREATE(this, priv, finh, state, fmi, "MKDIR"); +#endif - fuse_resolve_and_resume (state, fuse_mkdir_resume); + fuse_resolve_and_resume(state, fuse_mkdir_resume); - return; + return; } void -fuse_unlink_resume (fuse_state_t *state) +fuse_unlink_resume(fuse_state_t *state) { - if (!state->loc.inode) { - gf_log ("fuse", GF_LOG_WARNING, "path resolving failed"); - send_fuse_err (state->this, state->finh, ENOENT); - free_fuse_state (state); - return; - } - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": UNLINK %s", state->finh->unique, - state->loc.path); + if (!state->loc.parent || !state->loc.inode) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "UNLINK %" PRIu64 " (%s/%s) resolution failed", + state->finh->nodeid, uuid_utoa(state->resolve.gfid), + state->resolve.bname); + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } - FUSE_FOP (state, fuse_unlink_cbk, GF_FOP_UNLINK, - unlink, &state->loc); + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": UNLINK %s", + state->finh->unique, state->loc.path); + FUSE_FOP(state, fuse_unlink_cbk, GF_FOP_UNLINK, unlink, &state->loc, 0, + state->xdata); } static void -fuse_unlink (xlator_t *this, fuse_in_header_t *finh, void *msg) -{ - char *name = msg; - - fuse_state_t *state = NULL; - int32_t ret = -1; - - GET_STATE (this, finh, state); - ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name); - if (!state->loc.parent || (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": UNLINK %s (fuse_loc_fill() returned NULL inode)", - finh->unique, state->loc.path); - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } +fuse_unlink(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + char *name = msg; + fuse_state_t *state = NULL; - uuid_copy (state->resolve.pargfid, state->loc.parent->gfid); - state->resolve.bname = gf_strdup (name); - state->resolve.path = gf_strdup (state->loc.path); + GET_STATE(this, finh, state); - fuse_resolve_and_resume (state, fuse_unlink_resume); + fuse_resolve_entry_init(state, &state->resolve, finh->nodeid, name); - return; + fuse_resolve_and_resume(state, fuse_unlink_resume); + + return; } void -fuse_rmdir_resume (fuse_state_t *state) +fuse_rmdir_resume(fuse_state_t *state) { - if (!state->loc.inode) { - gf_log ("fuse", GF_LOG_WARNING, "path resolving failed"); - send_fuse_err (state->this, state->finh, ENOENT); - free_fuse_state (state); - return; - } + if (!state->loc.parent || !state->loc.inode) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "RMDIR %" PRIu64 " (%s/%s) resolution failed", + state->finh->nodeid, uuid_utoa(state->resolve.gfid), + state->resolve.bname); + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": RMDIR %s", state->finh->unique, - state->loc.path); + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": RMDIR %s", + state->finh->unique, state->loc.path); - FUSE_FOP (state, fuse_unlink_cbk, GF_FOP_RMDIR, - rmdir, &state->loc, 0); + FUSE_FOP(state, fuse_unlink_cbk, GF_FOP_RMDIR, rmdir, &state->loc, 0, + state->xdata); } static void -fuse_rmdir (xlator_t *this, fuse_in_header_t *finh, void *msg) -{ - char *name = msg; - - fuse_state_t *state = NULL; - int32_t ret = -1; - - GET_STATE (this, finh, state); - ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name); - if (!state->loc.parent || (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": RMDIR %s (fuse_loc_fill() failed)", - finh->unique, state->loc.path); - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } +fuse_rmdir(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + char *name = msg; + fuse_state_t *state = NULL; - uuid_copy (state->resolve.pargfid, state->loc.parent->gfid); - state->resolve.bname = gf_strdup (name); - state->resolve.path = gf_strdup (state->loc.path); + GET_STATE(this, finh, state); - fuse_resolve_and_resume (state, fuse_rmdir_resume); - return; + fuse_resolve_entry_init(state, &state->resolve, finh->nodeid, name); + + fuse_resolve_and_resume(state, fuse_rmdir_resume); + + return; } void -fuse_symlink_resume (fuse_state_t *state) -{ - if (!state->loc.parent) { - gf_log ("fuse", GF_LOG_ERROR, "failed to resolve path %s", - state->loc.path); - send_fuse_err (state->this, state->finh, ENOENT); - free_fuse_state (state); - return; - } +fuse_symlink_resume(fuse_state_t *state) +{ + if (!state->loc.parent) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "SYMLINK %" PRIu64 " (%s/%s) -> %s resolution failed", + state->finh->nodeid, uuid_utoa(state->resolve.gfid), + state->resolve.bname, state->name); + + /* facilitate retry from VFS */ + if (state->resolve.op_errno == ENOENT) + state->resolve.op_errno = ESTALE; + + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } - if (state->loc.inode) { - gf_log (state->this->name, GF_LOG_DEBUG, "inode already present"); - inode_unref (state->loc.inode); - } + if (state->resolve.op_errno == ENOENT) { + state->resolve.op_ret = 0; + state->resolve.op_errno = 0; + } + + if (state->loc.inode) { + gf_log(state->this->name, GF_LOG_DEBUG, "inode already present"); + inode_unref(state->loc.inode); + state->loc.inode = NULL; + } - state->loc.inode = inode_new (state->loc.parent->table); + state->loc.inode = inode_new(state->loc.parent->table); - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": SYMLINK %s -> %s", state->finh->unique, - state->loc.path, state->name); + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": SYMLINK %s -> %s", + state->finh->unique, state->loc.path, state->name); - FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_SYMLINK, - symlink, state->name, &state->loc, state->dict); + FUSE_FOP(state, fuse_newentry_cbk, GF_FOP_SYMLINK, symlink, state->name, + &state->loc, state->umask, state->xdata); } static void -fuse_symlink (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_symlink(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - char *name = msg; - char *linkname = name + strlen (name) + 1; + char *name = msg; + char *linkname = name + strlen(name) + 1; + fuse_state_t *state = NULL; - fuse_state_t *state = NULL; - int32_t ret = -1; + GET_STATE(this, finh, state); - GET_STATE (this, finh, state); + gf_uuid_generate(state->gfid); - uuid_generate (state->gfid); - - ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name); - if (!state->loc.parent || (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64" SYMLINK %s -> %s (fuse_loc_fill() failed)", - finh->unique, state->loc.path, linkname); - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } + fuse_resolve_entry_init(state, &state->resolve, finh->nodeid, name); - state->name = gf_strdup (linkname); + state->name = gf_strdup(linkname); - uuid_copy (state->resolve.pargfid, state->loc.parent->gfid); - state->resolve.bname = gf_strdup (name); - state->resolve.path = gf_strdup (state->loc.path); + fuse_resolve_and_resume(state, fuse_symlink_resume); - fuse_resolve_and_resume (state, fuse_symlink_resume); - return; + return; } - int -fuse_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent) +fuse_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { - fuse_state_t *state = NULL; - fuse_in_header_t *finh = NULL; + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + char loc_uuid_str[64] = {0}, loc2_uuid_str[64] = {0}; + + state = frame->root->state; + finh = state->finh; + + fuse_log_eh( + this, + "op_ret: %d, op_errno: %d, %" PRIu64 + ": %s() " + "path: %s parent: %s ==> path: %s parent: %s" + "gfid: %s", + op_ret, op_errno, frame->root->unique, gf_fop_list[frame->root->op], + state->loc.path, + (state->loc.parent ? uuid_utoa_r(state->loc.parent->gfid, loc_uuid_str) + : ""), + state->loc2.path, + (state->loc2.parent + ? uuid_utoa_r(state->loc2.parent->gfid, loc2_uuid_str) + : ""), + state->loc.inode ? uuid_utoa(state->loc.inode->gfid) : ""); + + /* need to check for loc->parent to keep clang-scan happy. + It gets dereferenced below, and is checked for NULL above. */ + if ((op_ret == 0) && (state->loc.parent) && (state->loc.inode)) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": %s -> %s => 0 (buf->ia_ino=%" PRIu64 ")", + frame->root->unique, state->loc.path, state->loc2.path, + buf->ia_ino); - state = frame->root->state; - finh = state->finh; + { + /* ugly ugly - to stay blind to situation where + rename happens on a new inode + */ + buf->ia_type = state->loc.inode->ia_type; + } + buf->ia_blksize = this->ctx->page_size; + + inode_rename(state->loc.parent->table, state->loc.parent, + state->loc.name, state->loc2.parent, state->loc2.name, + state->loc.inode, buf); + + send_fuse_err(this, finh, 0); + } else { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": %s -> %s => -1 (%s)", frame->root->unique, + state->loc.path, state->loc2.path, strerror(op_errno)); + send_fuse_err(this, finh, op_errno); + } + + free_fuse_state(state); + STACK_DESTROY(frame->root); + return 0; +} - if (op_ret == 0) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": %s -> %s => 0 (buf->ia_ino=%"PRId64")", - frame->root->unique, state->loc.path, state->loc2.path, - buf->ia_ino); +void +fuse_rename_resume(fuse_state_t *state) +{ + char loc_uuid[64] = { + 0, + }; + char loc2_uuid[64] = { + 0, + }; + + if (!state->loc.parent || !state->loc.inode) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "RENAME %" PRIu64 " %s/%s -> %s/%s src resolution failed", + state->finh->unique, uuid_utoa_r(state->resolve.gfid, loc_uuid), + state->resolve.bname, + uuid_utoa_r(state->resolve2.gfid, loc2_uuid), + state->resolve2.bname); + + /* facilitate retry from VFS */ + if ((!state->loc.inode) && (state->resolve.op_errno == ENOENT)) + state->resolve.op_errno = ESTALE; + + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } + + if (!state->loc2.parent) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "RENAME %" PRIu64 " %s/%s -> %s/%s dst resolution failed", + state->finh->unique, uuid_utoa_r(state->resolve.gfid, loc_uuid), + state->resolve.bname, + uuid_utoa_r(state->resolve2.gfid, loc2_uuid), + state->resolve2.bname); + + send_fuse_err(state->this, state->finh, ESTALE); + free_fuse_state(state); + return; + } - { - /* ugly ugly - to stay blind to situation where - rename happens on a new inode - */ - buf->ia_type = state->loc.inode->ia_type; - } - buf->ia_blksize = this->ctx->page_size; + state->resolve.op_ret = 0; + state->resolve2.op_ret = 0; - inode_rename (state->loc.parent->table, - state->loc.parent, state->loc.name, - state->loc2.parent, state->loc2.name, - state->loc.inode, buf); + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": RENAME `%s (%s)' -> `%s (%s)'", state->finh->unique, + state->loc.path, loc_uuid, state->loc2.path, loc2_uuid); - send_fuse_err (this, finh, 0); - } else { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": %s -> %s => -1 (%s)", frame->root->unique, - state->loc.path, state->loc2.path, - strerror (op_errno)); - send_fuse_err (this, finh, op_errno); - } + FUSE_FOP(state, fuse_rename_cbk, GF_FOP_RENAME, rename, &state->loc, + &state->loc2, state->xdata); +} - free_fuse_state (state); - STACK_DESTROY (frame->root); - return 0; +static void +fuse_rename(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + struct fuse_rename_in *fri = msg; + char *oldname = (char *)(fri + 1); + char *newname = oldname + strlen(oldname) + 1; + fuse_state_t *state = NULL; + + GET_STATE(this, finh, state); + + fuse_resolve_entry_init(state, &state->resolve, finh->nodeid, oldname); + + fuse_resolve_entry_init(state, &state->resolve2, fri->newdir, newname); + + fuse_resolve_and_resume(state, fuse_rename_resume); + + return; } void -fuse_rename_resume (fuse_state_t *state) +fuse_link_resume(fuse_state_t *state) { - char loc_uuid[64] = {0,}; - char loc2_uuid[64] = {0,}; + if (!state->loc2.inode || !state->loc.parent) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "fuse_loc_fill() failed %" PRIu64 ": LINK %s %s", + state->finh->unique, state->loc2.path, state->loc.path); - if (!state->loc.inode) { - send_fuse_err (state->this, state->finh, ENOENT); - free_fuse_state (state); - return; - } + /* facilitate retry from VFS */ + if (!state->loc2.inode && (state->resolve.op_errno == ENOENT)) + state->resolve.op_errno = ESTALE; - uuid_utoa_r (state->loc.inode->gfid, loc_uuid); - if (state->loc2.inode) - uuid_utoa_r (state->loc2.inode->gfid, loc2_uuid); - else - strcpy (loc2_uuid, "0"); + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": RENAME `%s (%s)' -> `%s (%s)'", - state->finh->unique, state->loc.path, loc_uuid, - state->loc2.path, loc2_uuid); + state->resolve.op_ret = 0; + state->resolve2.op_ret = 0; - FUSE_FOP (state, fuse_rename_cbk, GF_FOP_RENAME, - rename, &state->loc, &state->loc2); + if (state->loc.inode) { + inode_unref(state->loc.inode); + state->loc.inode = NULL; + } + state->loc.inode = inode_ref(state->loc2.inode); + + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": LINK() %s -> %s", + state->finh->unique, state->loc2.path, state->loc.path); + + FUSE_FOP(state, fuse_newentry_cbk, GF_FOP_LINK, link, &state->loc2, + &state->loc, state->xdata); } static void -fuse_rename (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_link(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - struct fuse_rename_in *fri = msg; - char *oldname = (char *)(fri + 1); - char *newname = oldname + strlen (oldname) + 1; + struct fuse_link_in *fli = msg; + char *name = (char *)(fli + 1); + fuse_state_t *state = NULL; + + GET_STATE(this, finh, state); - fuse_state_t *state = NULL; - int32_t ret = -1; + fuse_resolve_inode_init(state, &state->resolve2, fli->oldnodeid); - GET_STATE (this, finh, state); + fuse_resolve_entry_init(state, &state->resolve, finh->nodeid, name); - ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, oldname); - if (!state->loc.parent || (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "for %s %"PRIu64": RENAME `%s' -> `%s' (fuse_loc_fill() failed)", - state->loc.path, finh->unique, state->loc.path, - state->loc2.path); + fuse_resolve_and_resume(state, fuse_link_resume); + + return; +} - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; +static int +fuse_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + fuse_private_t *priv = NULL; + struct fuse_out_header fouh = { + 0, + }; + struct fuse_entry_out feo = { + 0, + }; + struct fuse_open_out foo = { + 0, + }; + struct iovec iov_out[3]; + inode_t *linked_inode = NULL; + uint64_t ctx_value = LOOKUP_NOT_NEEDED; + + state = frame->root->state; + priv = this->private; + finh = state->finh; + foo.open_flags = 0; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); + + if (op_ret >= 0) { + foo.fh = (uintptr_t)fd; + + if (((priv->direct_io_mode == 2) && + ((state->flags & O_ACCMODE) != O_RDONLY)) || + (priv->direct_io_mode == 1) || direct_io_mode(xdata)) + foo.open_flags |= FOPEN_DIRECT_IO; + + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": %s() %s => %p (ino=%" PRIu64 ")", + frame->root->unique, gf_fop_list[frame->root->op], + state->loc.path, fd, buf->ia_ino); + + buf->ia_blksize = this->ctx->page_size; + gf_fuse_stat2attr(buf, &feo.attr, priv->enable_ino32); + + linked_inode = inode_link(inode, state->loc.parent, state->loc.name, + buf); + + if (linked_inode != inode) { + /* + VERY racy code (if used anywhere else) + -- don't do this without understanding + */ + inode_unref(fd->inode); + fd->inode = inode_ref(linked_inode); + } else { + inode_ctx_set(linked_inode, this, &ctx_value); } - ret = fuse_loc_fill (&state->loc2, state, 0, fri->newdir, newname); - if (!state->loc2.parent && (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "for %s %"PRIu64": RENAME `%s' -> `%s' (fuse_loc_fill() failed)", - state->loc.path, finh->unique, state->loc.path, - state->loc2.path); + inode_lookup(linked_inode); + + inode_unref(linked_inode); - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; + feo.nodeid = inode_to_fuse_nodeid(linked_inode); + + feo.entry_valid = calc_timeout_sec(priv->entry_timeout); + feo.entry_valid_nsec = calc_timeout_nsec(priv->entry_timeout); + feo.attr_valid = calc_timeout_sec(priv->attribute_timeout); + feo.attr_valid_nsec = calc_timeout_nsec(priv->attribute_timeout); + + fouh.error = 0; + iov_out[0].iov_base = &fouh; + iov_out[1].iov_base = &feo; +#if FUSE_KERNEL_MINOR_VERSION >= 9 + iov_out[1].iov_len = priv->proto_minor >= 9 + ? sizeof(feo) + : FUSE_COMPAT_ENTRY_OUT_SIZE; +#else + iov_out[1].iov_len = sizeof(feo); +#endif + iov_out[2].iov_base = &foo; + iov_out[2].iov_len = sizeof(foo); + + if (send_fuse_iov(this, finh, iov_out, 3) == ENOENT) { + gf_log("glusterfs-fuse", GF_LOG_DEBUG, "create(%s) got EINTR", + state->loc.path); + inode_forget(inode, 1); + gf_fd_put(priv->fdtable, state->fd_no); + goto out; } - uuid_copy (state->resolve.pargfid, state->loc.parent->gfid); - state->resolve.bname = gf_strdup (oldname); - state->resolve.path = gf_strdup (state->loc.path); + fd_bind(fd); + } else { + /* facilitate retry from VFS */ + if (op_errno == ENOENT) + op_errno = ESTALE; - uuid_copy (state->resolve2.pargfid, state->loc2.parent->gfid); - state->resolve2.bname = gf_strdup (newname); - state->resolve2.path = gf_strdup (state->loc2.path); + gf_log("glusterfs-fuse", GF_LOG_WARNING, "%" PRIu64 ": %s => -1 (%s)", + finh->unique, state->loc.path, strerror(op_errno)); - fuse_resolve_and_resume (state, fuse_rename_resume); + send_fuse_err(this, finh, op_errno); + gf_fd_put(priv->fdtable, state->fd_no); + } +out: + free_fuse_state(state); + STACK_DESTROY(frame->root); - return; + return 0; } void -fuse_link_resume (fuse_state_t *state) +fuse_create_resume(fuse_state_t *state) { - if (state->loc.inode) { - gf_log (state->this->name, GF_LOG_DEBUG, "inode already present"); - inode_unref (state->loc.inode); - } + fd_t *fd = NULL; + fuse_private_t *priv = NULL; + fuse_fd_ctx_t *fdctx = NULL; + + if (!state->loc.parent) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 " CREATE %s/%s resolution failed", + state->finh->unique, uuid_utoa(state->resolve.gfid), + state->resolve.bname); + + /* facilitate retry from VFS */ + if (state->resolve.op_errno == ENOENT) + state->resolve.op_errno = ESTALE; + + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } + + if (state->resolve.op_errno == ENOENT) { + state->resolve.op_ret = 0; + state->resolve.op_errno = 0; + } + + if (state->loc.inode) { + gf_log(state->this->name, GF_LOG_DEBUG, "inode already present"); + inode_unref(state->loc.inode); + } + + state->loc.inode = inode_new(state->loc.parent->table); + + fd = fd_create(state->loc.inode, state->finh->pid); + if (fd == NULL) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 " CREATE cannot create a new fd", + state->finh->unique); + send_fuse_err(state->this, state->finh, ENOMEM); + free_fuse_state(state); + return; + } + + fdctx = fuse_fd_ctx_check_n_create(state->this, fd); + if (fdctx == NULL) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 " CREATE creation of fdctx failed", + state->finh->unique); + fd_unref(fd); + send_fuse_err(state->this, state->finh, ENOMEM); + free_fuse_state(state); + return; + } - state->loc.inode = inode_ref (state->loc2.inode); + priv = state->this->private; - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": LINK() %s -> %s", - state->finh->unique, state->loc2.path, - state->loc.path); + state->fd_no = gf_fd_unused_get(priv->fdtable, fd); - FUSE_FOP (state, fuse_newentry_cbk, GF_FOP_LINK, - link, &state->loc2, &state->loc); + state->fd = fd_ref(fd); + fd->flags = state->flags; + + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": CREATE %s", + state->finh->unique, state->loc.path); + + FUSE_FOP(state, fuse_create_cbk, GF_FOP_CREATE, create, &state->loc, + state->flags, state->mode, state->umask, fd, state->xdata); } static void -fuse_link (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_create(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - struct fuse_link_in *fli = msg; - char *name = (char *)(fli + 1); +#if FUSE_KERNEL_MINOR_VERSION >= 12 + struct fuse_create_in *fci = msg; + fuse_private_t *priv = NULL; +#else + struct fuse_open_in *fci = msg; +#endif + char *name = (char *)(fci + 1); - fuse_state_t *state = NULL; - int32_t ret = -1; + fuse_state_t *state = NULL; - GET_STATE (this, finh, state); +#if FUSE_KERNEL_MINOR_VERSION >= 12 + priv = this->private; + if (priv->proto_minor < 12) + name = (char *)((struct fuse_open_in *)msg + 1); +#endif - ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name); - if (ret == 0) - ret = fuse_loc_fill (&state->loc2, state, fli->oldnodeid, 0, - NULL); + GET_STATE(this, finh, state); - if (!state->loc2.inode || (ret < 0) || !state->loc.parent) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "fuse_loc_fill() failed %"PRIu64": LINK %s %s", - finh->unique, state->loc2.path, state->loc.path); - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } + gf_uuid_generate(state->gfid); - uuid_copy (state->resolve.pargfid, state->loc.parent->gfid); - state->resolve.bname = gf_strdup (name); - state->resolve.path = gf_strdup (state->loc.path); + fuse_resolve_entry_init(state, &state->resolve, finh->nodeid, name); - uuid_copy (state->resolve2.gfid, state->loc2.inode->gfid); - state->resolve2.path = gf_strdup (state->loc2.path); + state->mode = fci->mode; + state->flags = fci->flags; - fuse_resolve_and_resume (state, fuse_link_resume); +#if FUSE_KERNEL_MINOR_VERSION >= 12 + priv = this->private; + FUSE_ENTRY_CREATE(this, priv, finh, state, fci, "CREATE"); +#endif + fuse_resolve_and_resume(state, fuse_create_resume); - return; + return; } +void +fuse_open_resume(fuse_state_t *state) +{ + fd_t *fd = NULL; + fuse_private_t *priv = NULL; + fuse_fd_ctx_t *fdctx = NULL; -static int -fuse_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - fd_t *fd, inode_t *inode, struct iatt *buf, - struct iatt *preparent, struct iatt *postparent) -{ - fuse_state_t *state = NULL; - fuse_in_header_t *finh = NULL; - fuse_private_t *priv = NULL; - struct fuse_out_header fouh = {0, }; - struct fuse_entry_out feo = {0, }; - struct fuse_open_out foo = {0, }; - struct iovec iov_out[3]; - inode_t *linked_inode = NULL; - - state = frame->root->state; - priv = this->private; - finh = state->finh; - foo.open_flags = 0; + if (!state->loc.inode) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "%" PRIu64 ": OPEN %s resolution failed", state->finh->unique, + uuid_utoa(state->resolve.gfid)); - if (op_ret >= 0) { - foo.fh = (uintptr_t) fd; + /* facilitate retry from VFS */ + if (state->resolve.op_errno == ENOENT) + state->resolve.op_errno = ESTALE; - if (((priv->direct_io_mode == 2) - && ((state->flags & O_ACCMODE) != O_RDONLY)) - || (priv->direct_io_mode == 1)) - foo.open_flags |= FOPEN_DIRECT_IO; + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": %s() %s => %p (ino=%"PRId64")", - frame->root->unique, gf_fop_list[frame->root->op], - state->loc.path, fd, buf->ia_ino); + fd = fd_create(state->loc.inode, state->finh->pid); + if (!fd) { + gf_log("fuse", GF_LOG_ERROR, "fd is NULL"); + send_fuse_err(state->this, state->finh, ENOENT); + free_fuse_state(state); + return; + } + + fdctx = fuse_fd_ctx_check_n_create(state->this, fd); + if (fdctx == NULL) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": OPEN creation of fdctx failed", + state->finh->unique); + fd_unref(fd); + send_fuse_err(state->this, state->finh, ENOMEM); + free_fuse_state(state); + return; + } - buf->ia_blksize = this->ctx->page_size; - gf_fuse_stat2attr (buf, &feo.attr); + priv = state->this->private; - linked_inode = inode_link (inode, state->loc.parent, - state->loc.name, buf); + state->fd_no = gf_fd_unused_get(priv->fdtable, fd); + state->fd = fd_ref(fd); + fd->flags = state->flags; - if (linked_inode != inode) { - /* - VERY racy code (if used anywhere else) - -- don't do this without understanding - */ - inode_unref (fd->inode); - fd->inode = inode_ref (linked_inode); - } + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": OPEN %s", + state->finh->unique, state->loc.path); - inode_lookup (linked_inode); + FUSE_FOP(state, fuse_fd_cbk, GF_FOP_OPEN, open, &state->loc, state->flags, + fd, state->xdata); +} - inode_unref (linked_inode); +static void +fuse_open(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + struct fuse_open_in *foi = msg; + fuse_state_t *state = NULL; - fd_ref (fd); + GET_STATE(this, finh, state); - feo.nodeid = inode_to_fuse_nodeid (linked_inode); + fuse_resolve_inode_init(state, &state->resolve, finh->nodeid); - feo.entry_valid = calc_timeout_sec (priv->entry_timeout); - feo.entry_valid_nsec = calc_timeout_nsec (priv->entry_timeout); - feo.attr_valid = calc_timeout_sec (priv->attribute_timeout); - feo.attr_valid_nsec = - calc_timeout_nsec (priv->attribute_timeout); + state->flags = foi->flags; - fouh.error = 0; - iov_out[0].iov_base = &fouh; - iov_out[1].iov_base = &feo; -#if FUSE_KERNEL_MINOR_VERSION >= 9 - iov_out[1].iov_len = priv->proto_minor >= 9 ? - sizeof (feo) : - FUSE_COMPAT_ENTRY_OUT_SIZE; -#else - iov_out[1].iov_len = sizeof (feo); -#endif - iov_out[2].iov_base = &foo; - iov_out[2].iov_len = sizeof (foo); - if (send_fuse_iov (this, finh, iov_out, 3) == ENOENT) { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "create(%s) got EINTR", state->loc.path); - inode_forget (inode, 1); - fd_unref (fd); - goto out; - } + fuse_resolve_and_resume(state, fuse_open_resume); - fd_bind (fd); - } else { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": %s => -1 (%s)", finh->unique, - state->loc.path, strerror (op_errno)); - send_fuse_err (this, finh, op_errno); - } -out: - free_fuse_state (state); - STACK_DESTROY (frame->root); + return; +} - return 0; +static int +fuse_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) +{ + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + struct fuse_out_header fouh = { + 0, + }; + struct iovec *iov_out = NULL; + + state = frame->root->state; + finh = state->finh; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); + + if (op_ret >= 0) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": READ => %d/%" GF_PRI_SIZET ",%" PRId64 "/%" PRIu64, + frame->root->unique, op_ret, state->size, state->off, + stbuf->ia_size); + + iov_out = GF_CALLOC(count + 1, sizeof(*iov_out), gf_fuse_mt_iovec); + if (iov_out) { + fouh.error = 0; + iov_out[0].iov_base = &fouh; + memcpy(iov_out + 1, vector, count * sizeof(*iov_out)); + send_fuse_iov(this, finh, iov_out, count + 1); + GF_FREE(iov_out); + } else + send_fuse_err(this, finh, ENOMEM); + } else { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": READ => %d gfid=%s fd=%p (%s)", + frame->root->unique, op_ret, + (state->fd && state->fd->inode) + ? uuid_utoa(state->fd->inode->gfid) + : "nil", + state->fd, strerror(op_errno)); + + send_fuse_err(this, finh, op_errno); + } + + free_fuse_state(state); + STACK_DESTROY(frame->root); + + return 0; } void -fuse_create_resume (fuse_state_t *state) +fuse_readv_resume(fuse_state_t *state) { - fd_t *fd = NULL; - - if (!state->loc.parent) { - gf_log ("fuse", GF_LOG_ERROR, "failed to resolve path %s", - state->loc.path); - send_fuse_err (state->this, state->finh, ENOENT); - free_fuse_state (state); - return; - } - - if (state->loc.inode) { - gf_log (state->this->name, GF_LOG_DEBUG, "inode already present"); - inode_unref (state->loc.inode); - } - - state->loc.inode = inode_new (state->loc.parent->table); - - fd = fd_create (state->loc.inode, state->finh->pid); - state->fd = fd; - fd->flags = state->flags; - - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": CREATE %s", state->finh->unique, - state->loc.path); - - FUSE_FOP (state, fuse_create_cbk, GF_FOP_CREATE, - create, &state->loc, state->flags, state->mode, - fd, state->dict); + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": READ (%p, size=%zu, offset=%" PRIu64 ")", + state->finh->unique, state->fd, state->size, state->off); + FUSE_FOP(state, fuse_readv_cbk, GF_FOP_READ, readv, state->fd, state->size, + state->off, state->io_flags, state->xdata); } static void -fuse_create (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_readv(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { -#if FUSE_KERNEL_MINOR_VERSION >= 12 - struct fuse_create_in *fci = msg; -#else - struct fuse_open_in *fci = msg; -#endif - char *name = (char *)(fci + 1); + struct fuse_read_in *fri = msg; - fuse_private_t *priv = NULL; - fuse_state_t *state = NULL; - int32_t ret = -1; - - priv = this->private; -#if FUSE_KERNEL_MINOR_VERSION >= 12 - if (priv->proto_minor < 12) - name = (char *)((struct fuse_open_in *)msg + 1); +#if FUSE_KERNEL_MINOR_VERSION >= 9 + fuse_private_t *priv = NULL; #endif + fuse_state_t *state = NULL; + fd_t *fd = NULL; - GET_STATE (this, finh, state); + GET_STATE(this, finh, state); - uuid_generate (state->gfid); + fd = FH_TO_FD(fri->fh); + state->fd = fd; - ret = fuse_loc_fill (&state->loc, state, 0, finh->nodeid, name); - if (!state->loc.parent || (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64" CREATE %s (fuse_loc_fill() failed)", - finh->unique, state->loc.path); - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } + fuse_resolve_fd_init(state, &state->resolve, fd); - state->mode = fci->mode; - state->flags = fci->flags; - - priv = this->private; -#if FUSE_KERNEL_MINOR_VERSION >=12 - if (priv->proto_minor >= 12) - state->mode &= ~fci->umask; - if (priv->proto_minor >= 12 && priv->acl) { - state->dict = dict_new (); - if (!state->dict) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "CREATE Failed to allocate a param dictionary"); - send_fuse_err (this, finh, ENOMEM); - free_fuse_state (state); - return; - } - ret = dict_set_int16 (state->dict, "umask", fci->umask); - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "CREATE Failed adding umask to request"); - dict_destroy (state->dict); - send_fuse_err (this, finh, ENOMEM); - free_fuse_state (state); - return; - } - ret = dict_set_int16 (state->dict, "mode", fci->mode); - if (ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "CREATE Failed adding mode to request"); - dict_destroy (state->dict); - send_fuse_err (this, finh, ENOMEM); - free_fuse_state (state); - return; - } - } + /* See comment by similar code in fuse_settatr */ +#if FUSE_KERNEL_MINOR_VERSION >= 9 + priv = this->private; + if (priv->proto_minor >= 9 && fri->read_flags & FUSE_READ_LOCKOWNER) + state->lk_owner = fri->lock_owner; #endif - uuid_copy (state->resolve.pargfid, state->loc.parent->gfid); - state->resolve.bname = gf_strdup (name); - state->resolve.path = gf_strdup (state->loc.path); - - fuse_resolve_and_resume (state, fuse_create_resume); + state->size = fri->size; + state->off = fri->offset; + /* lets ignore 'fri->read_flags', but just consider 'fri->flags' */ +#if FUSE_KERNEL_MINOR_VERSION >= 9 + state->io_flags = fri->flags; +#endif + fuse_resolve_and_resume(state, fuse_readv_resume); +} - return; +static int +fuse_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + struct iatt *postbuf, dict_t *xdata) +{ + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + struct fuse_write_out fwo = { + 0, + }; + + state = frame->root->state; + finh = state->finh; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); + + if (op_ret >= 0) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": WRITE => %d/%" GF_PRI_SIZET ",%" PRId64 + "/%" PRIu64, + frame->root->unique, op_ret, state->size, state->off, + stbuf->ia_size); + + fwo.size = op_ret; + send_fuse_obj(this, finh, &fwo); + } else { + gf_log( + "glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": WRITE => -1 gfid=%s fd=%p (%s)", frame->root->unique, + (state->fd && state->fd->inode) ? uuid_utoa(state->fd->inode->gfid) + : "nil", + state->fd, strerror(op_errno)); + + send_fuse_err(this, finh, op_errno); + } + + free_fuse_state(state); + STACK_DESTROY(frame->root); + + return 0; } void -fuse_open_resume (fuse_state_t *state) +fuse_write_resume(fuse_state_t *state) { - fd_t *fd = NULL; + struct iobref *iobref = NULL; - fd = fd_create (state->loc.inode, state->finh->pid); - if (!fd) { - gf_log ("fuse", GF_LOG_ERROR, - "fd is NULL"); - send_fuse_err (state->this, state->finh, ENOENT); - free_fuse_state (state); - return; - } + iobref = iobref_new(); + if (!iobref) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "%" PRIu64 ": WRITE iobref allocation failed", + state->finh->unique); + send_fuse_err(state->this, state->finh, ENOMEM); + + free_fuse_state(state); + return; + } - state->fd = fd; - fd->flags = state->flags; + iobref_add(iobref, state->iobuf); - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": OPEN %s", state->finh->unique, - state->loc.path); + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": WRITE (%p, size=%" GF_PRI_SIZET ", offset=%" PRId64 + ")", + state->finh->unique, state->fd, state->size, state->off); - FUSE_FOP (state, fuse_fd_cbk, GF_FOP_OPEN, - open, &state->loc, state->flags, fd, 0); + FUSE_FOP(state, fuse_writev_cbk, GF_FOP_WRITE, writev, state->fd, + &state->vector, 1, state->off, state->io_flags, iobref, + state->xdata); + + iobref_unref(iobref); } static void -fuse_open (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_write(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - struct fuse_open_in *foi = msg; + /* WRITE is special, metadata is attached to in_header, + * and msg is the payload as-is. + */ + struct fuse_write_in *fwi = (struct fuse_write_in *)(finh + 1); - fuse_state_t *state = NULL; - int32_t ret = -1; + fuse_state_t *state = NULL; + fd_t *fd = NULL; +#if FUSE_KERNEL_MINOR_VERSION >= 9 + fuse_private_t *priv = NULL; + priv = this->private; +#endif - GET_STATE (this, finh, state); + GET_STATE(this, finh, state); + fd = FH_TO_FD(fwi->fh); + state->fd = fd; + state->size = fwi->size; + state->off = fwi->offset; - ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": OPEN %s (fuse_loc_fill() failed)", - finh->unique, state->loc.path); + /* lets ignore 'fwi->write_flags', but just consider 'fwi->flags' */ +#if FUSE_KERNEL_MINOR_VERSION >= 9 + state->io_flags = fwi->flags; +#else + state->io_flags = fwi->write_flags; +#endif + /* TODO: may need to handle below flag + (fwi->write_flags & FUSE_WRITE_CACHE); + */ - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } + fuse_resolve_fd_init(state, &state->resolve, fd); - state->flags = foi->flags; + /* See comment by similar code in fuse_settatr */ +#if FUSE_KERNEL_MINOR_VERSION >= 9 + priv = this->private; + if (priv->proto_minor >= 9 && fwi->write_flags & FUSE_WRITE_LOCKOWNER) + state->lk_owner = fwi->lock_owner; +#endif - uuid_copy (state->resolve.gfid, state->loc.inode->gfid); - state->resolve.path = gf_strdup (state->loc.path); + state->vector.iov_base = msg; + state->vector.iov_len = fwi->size; + state->iobuf = iobuf; - fuse_resolve_and_resume (state, fuse_open_resume); + fuse_resolve_and_resume(state, fuse_write_resume); - return; + return; } - +#if FUSE_KERNEL_MINOR_VERSION >= 28 static int -fuse_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iovec *vector, int32_t count, - struct iatt *stbuf, struct iobref *iobref) -{ - fuse_state_t *state = NULL; - fuse_in_header_t *finh = NULL; - struct fuse_out_header fouh = {0, }; - struct iovec *iov_out = NULL; - - state = frame->root->state; - finh = state->finh; - - if (op_ret >= 0) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": READ => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRId64, - frame->root->unique, - op_ret, state->size, state->off, stbuf->ia_size); - - iov_out = GF_CALLOC (count + 1, sizeof (*iov_out), - gf_fuse_mt_iovec); - if (iov_out) { - fouh.error = 0; - iov_out[0].iov_base = &fouh; - memcpy (iov_out + 1, vector, count * sizeof (*iov_out)); - send_fuse_iov (this, finh, iov_out, count + 1); - GF_FREE (iov_out); - } else - send_fuse_err (this, finh, ENOMEM); - } else { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": READ => %d (%s)", frame->root->unique, - op_ret, strerror (op_errno)); +fuse_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, struct iatt *postbuf_dst, + dict_t *xdata) +{ + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + /* + * Fuse kernel module uses fuse_write_out itself as the + * output collector. In fact, fuse_kernel.h in the upstream + * kernel just defines the input structure fuse_copy_file_range_in + * for the fop. So, just use the fuse_write_out to send the + * response back to the kernel. + */ + struct fuse_write_out fcfro = { + 0, + }; + + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + + state = frame->root->state; + finh = state->finh; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); + + if (op_ret >= 0) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": WRITE => %d/%" GF_PRI_SIZET ",%" PRIu64 + " , %" PRIu64 " ,%" PRIu64 ",%" PRIu64, + frame->root->unique, op_ret, state->size, state->off_in, + state->off_out, stbuf->ia_size, postbuf_dst->ia_size); + + fcfro.size = op_ret; + send_fuse_obj(this, finh, &fcfro); + } else { + if (state->fd && state->fd->inode) + uuid_utoa_r(state->fd->inode->gfid, src_gfid); + else + snprintf(src_gfid, sizeof(src_gfid), "nil"); - send_fuse_err (this, finh, op_errno); - } + if (state->fd_dst && state->fd_dst->inode) + uuid_utoa_r(state->fd_dst->inode->gfid, dst_gfid); + else + snprintf(dst_gfid, sizeof(dst_gfid), "nil"); - free_fuse_state (state); - STACK_DESTROY (frame->root); + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 + ": COPY_FILE_RANGE => -1 gfid_in=%s fd_in=%p " + "gfid_out=%s fd_out=%p (%s)", + frame->root->unique, src_gfid, state->fd, dst_gfid, + state->fd_dst, strerror(op_errno)); - return 0; + send_fuse_err(this, finh, op_errno); + } + + free_fuse_state(state); + STACK_DESTROY(frame->root); + + return 0; } void -fuse_readv_resume (fuse_state_t *state) +fuse_copy_file_range_resume(fuse_state_t *state) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": READ (%p, size=%zu, offset=%"PRIu64")", - state->finh->unique, state->fd, state->size, state->off); - - FUSE_FOP (state, fuse_readv_cbk, GF_FOP_READ, - readv, state->fd, state->size, state->off); + char fd_uuid_str[64] = {0}, fd_dst_uuid_str[64] = {0}; + + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 + ": COPY_FILE_RANGE " + "(input fd: %p (gfid: %s), " + "output fd: %p (gfid: %s) size=%zu, " + "offset_in=%" PRIu64 ", offset_out=%" PRIu64 ")", + state->finh->unique, state->fd, + uuid_utoa_r(state->fd->inode->gfid, fd_uuid_str), state->fd_dst, + uuid_utoa_r(state->fd_dst->inode->gfid, fd_dst_uuid_str), + state->size, state->off_in, state->off_out); + + FUSE_FOP(state, fuse_copy_file_range_cbk, GF_FOP_COPY_FILE_RANGE, + copy_file_range, state->fd, state->off_in, state->fd_dst, + state->off_out, state->size, state->io_flags, state->xdata); } static void -fuse_readv (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_copy_file_range(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - struct fuse_read_in *fri = msg; - - fuse_private_t *priv = NULL; - fuse_state_t *state = NULL; - fd_t *fd = NULL; - - GET_STATE (this, finh, state); + struct fuse_copy_file_range_in *fcfri = msg; + fuse_state_t *state = NULL; + fd_t *fd_in = NULL; + fd_t *fd_out = NULL; + GET_STATE(this, finh, state); - fd = FH_TO_FD (fri->fh); - state->fd = fd; + fd_in = FH_TO_FD(fcfri->fh_in); + fd_out = FH_TO_FD(fcfri->fh_out); + state->fd = fd_in; + state->fd_dst = fd_out; - /* See comment by similar code in fuse_settatr */ - priv = this->private; -#if FUSE_KERNEL_MINOR_VERSION >= 9 - if (priv->proto_minor >= 9 && fri->read_flags & FUSE_READ_LOCKOWNER) - state->lk_owner = fri->lock_owner; -#endif + fuse_resolve_fd_init(state, &state->resolve, fd_in); + fuse_resolve_fd_init(state, &state->resolve2, fd_out); - state->size = fri->size; - state->off = fri->offset; + state->size = fcfri->len; + state->off_in = fcfri->off_in; + state->off_out = fcfri->off_out; + state->io_flags = fcfri->flags; - fuse_resolve_and_resume (state, fuse_readv_resume); + fuse_resolve_and_resume(state, fuse_copy_file_range_resume); } +#endif /* FUSE_KERNEL_MINOR_VERSION >= 28 */ - +#if FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE static int -fuse_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *stbuf, struct iatt *postbuf) +fuse_lseek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, off_t offset, dict_t *xdata) { - fuse_state_t *state = NULL; - fuse_in_header_t *finh = NULL; - struct fuse_write_out fwo = {0, }; + fuse_state_t *state = frame->root->state; + fuse_in_header_t *finh = state->finh; + struct fuse_lseek_out flo = { + 0, + }; - state = frame->root->state; - finh = state->finh; + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); - if (op_ret >= 0) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": WRITE => %d/%"GF_PRI_SIZET",%"PRId64"/%"PRId64, - frame->root->unique, - op_ret, state->size, state->off, stbuf->ia_size); + if (op_ret >= 0) { + flo.offset = offset; + send_fuse_obj(this, finh, &flo); + } else { + send_fuse_err(this, finh, op_errno); + } - fwo.size = op_ret; - send_fuse_obj (this, finh, &fwo); - } else { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": WRITE => -1 (%s)", frame->root->unique, - strerror (op_errno)); + free_fuse_state(state); + STACK_DESTROY(frame->root); - send_fuse_err (this, finh, op_errno); - } + return 0; +} - free_fuse_state (state); - STACK_DESTROY (frame->root); +static void +fuse_lseek_resume(fuse_state_t *state) +{ + FUSE_FOP(state, fuse_lseek_cbk, GF_FOP_SEEK, seek, state->fd, state->off, + state->whence, state->xdata); +} - return 0; +static void +fuse_lseek(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + struct fuse_lseek_in *ffi = msg; + fuse_state_t *state = NULL; + + GET_STATE(this, finh, state); + state->fd = FH_TO_FD(ffi->fh); + state->off = ffi->offset; + + switch (ffi->whence) { + case SEEK_DATA: + state->whence = GF_SEEK_DATA; + break; + case SEEK_HOLE: + state->whence = GF_SEEK_HOLE; + break; + default: + /* fuse should handle other whence internally */ + send_fuse_err(this, finh, EINVAL); + free_fuse_state(state); + return; + } + + fuse_resolve_fd_init(state, &state->resolve, state->fd); + fuse_resolve_and_resume(state, fuse_lseek_resume); } +#endif /* FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE */ void -fuse_write_resume (fuse_state_t *state) +fuse_flush_resume(fuse_state_t *state) { - struct iobref *iobref = NULL; - struct iobuf *iobuf = NULL; - - if (!state->fd || !state->fd->inode) { - send_fuse_err (state->this, state->finh, EBADFD); - free_fuse_state (state); - return; - } - - iobref = iobref_new (); - if (!iobref) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "%"PRIu64": WRITE iobref allocation failed", - state->finh->unique); - send_fuse_err (state->this, state->finh, ENOMEM); - - free_fuse_state (state); - return; - } - - iobuf = ((fuse_private_t *) (state->this->private))->iobuf; - iobref_add (iobref, iobuf); + FUSE_FOP(state, fuse_flush_cbk, GF_FOP_FLUSH, flush, state->fd, + state->xdata); +} - FUSE_FOP (state, fuse_writev_cbk, GF_FOP_WRITE, writev, state->fd, - &state->vector, 1, state->off, iobref); +static void +fuse_flush_interrupt_handler(xlator_t *this, fuse_interrupt_record_t *fir) +{ + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "FLUSH unique %" PRIu64 ": interrupt handler triggered", + fir->fuse_in_header.unique); - iobref_unref (iobref); + fuse_interrupt_finish_interrupt(this, fir, INTERRUPT_HANDLED, _gf_false, + NULL); } static void -fuse_write (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_flush(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - /* WRITE is special, metadata is attached to in_header, - * and msg is the payload as-is. - */ - struct fuse_write_in *fwi = (struct fuse_write_in *) - (finh + 1); + struct fuse_flush_in *ffi = msg; + fuse_private_t *priv = NULL; - fuse_private_t *priv = NULL; - fuse_state_t *state = NULL; - fd_t *fd = NULL; + fuse_state_t *state = NULL; + fd_t *fd = NULL; - priv = this->private; + GET_STATE(this, finh, state); + fd = FH_TO_FD(ffi->fh); + state->fd = fd; - GET_STATE (this, finh, state); - fd = FH_TO_FD (fwi->fh); - state->fd = fd; - state->size = fwi->size; - state->off = fwi->offset; + priv = this->private; + if (priv->flush_handle_interrupt) { + fuse_interrupt_record_t *fir = NULL; - /* See comment by similar code in fuse_settatr */ - priv = this->private; -#if FUSE_KERNEL_MINOR_VERSION >= 9 - if (priv->proto_minor >= 9 && fwi->write_flags & FUSE_WRITE_LOCKOWNER) - state->lk_owner = fwi->lock_owner; -#endif + fir = fuse_interrupt_record_new(finh, fuse_flush_interrupt_handler); + if (!fir) { + send_fuse_err(this, finh, ENOMEM); - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": WRITE (%p, size=%"PRIu32", offset=%"PRId64")", - finh->unique, fd, fwi->size, fwi->offset); + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "FLUSH unique %" PRIu64 + ":" + " interrupt record allocation failed", + finh->unique); + free_fuse_state(state); - state->vector.iov_base = msg; - state->vector.iov_len = fwi->size; + return; + } + fuse_interrupt_record_insert(this, fir); + } - fuse_resolve_and_resume (state, fuse_write_resume); + fuse_resolve_fd_init(state, &state->resolve, fd); - return; + state->lk_owner = ffi->lock_owner; + + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": FLUSH %p", + finh->unique, fd); + + fuse_resolve_and_resume(state, fuse_flush_resume); + + return; } -void -fuse_flush_resume (fuse_state_t *state) +int +fuse_internal_release(xlator_t *this, fd_t *fd) { - FUSE_FOP (state, fuse_err_cbk, GF_FOP_FLUSH, - flush, state->fd); + /* This is important we cleanup our context here to avoid a leak + in case an error occurs and we get cleanup up by + call_unwind_error->...->args_wipe instead of the normal path. + */ + fuse_fd_ctx_destroy(this, fd); + + return 0; } static void -fuse_flush (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_release(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - struct fuse_flush_in *ffi = msg; + struct fuse_release_in *fri = msg; + fd_t *fd = NULL; + fuse_state_t *state = NULL; + fuse_private_t *priv = NULL; - fuse_state_t *state = NULL; - fd_t *fd = NULL; + GET_STATE(this, finh, state); + fd = FH_TO_FD(fri->fh); + if (!fd) + goto out; - GET_STATE (this, finh, state); - fd = FH_TO_FD (ffi->fh); - state->fd = fd; + state->fd = fd; - state->lk_owner = ffi->lock_owner; + priv = this->private; - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": FLUSH %p", finh->unique, fd); + fuse_log_eh(this, "RELEASE(): finh->unique: %" PRIu64 ":, fd: %p, gfid: %s", + finh->unique, fd, uuid_utoa(fd->inode->gfid)); - fuse_resolve_and_resume (state, fuse_flush_resume); + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "finh->unique: %" PRIu64 ": RELEASE %p", finh->unique, state->fd); - return; -} + fd_close(state->fd); -static void -fuse_release (xlator_t *this, fuse_in_header_t *finh, void *msg) -{ - struct fuse_release_in *fri = msg; - fd_t *new_fd = NULL; - fd_t *fd = NULL; - uint64_t val = 0; - int ret = 0; - fuse_state_t *state = NULL; - fuse_fd_ctx_t *fdctx = NULL; - - GET_STATE (this, finh, state); - fd = FH_TO_FD (fri->fh); - state->fd = fd; - - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": RELEASE %p", finh->unique, state->fd); - - ret = fd_ctx_del (fd, this, &val); - if (!ret) { - fdctx = (fuse_fd_ctx_t *)(unsigned long)val; - if (fdctx) { - new_fd = fdctx->fd; - if (new_fd) { - fd_unref (new_fd); - } + fuse_fd_ctx_destroy(this, state->fd); + fd_unref(fd); - GF_FREE (fdctx); - } - } - fd_unref (fd); + gf_fdptr_put(priv->fdtable, fd); - send_fuse_err (this, finh, 0); + state->fd = NULL; - free_fuse_state (state); - return; +out: + send_fuse_err(this, finh, 0); + + free_fuse_state(state); + return; } void -fuse_fsync_resume (fuse_state_t *state) +fuse_fsync_resume(fuse_state_t *state) { - /* fsync_flags: 1 means "datasync" (no defines for this) */ - FUSE_FOP (state, fuse_fsync_cbk, GF_FOP_FSYNC, - fsync, state->fd, state->flags & 1); + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": FSYNC %p", + state->finh->unique, state->fd); + + /* fsync_flags: 1 means "datasync" (no defines for this) */ + FUSE_FOP(state, fuse_fsync_cbk, GF_FOP_FSYNC, fsync, state->fd, + (state->flags & 1), state->xdata); } static void -fuse_fsync (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_fsync(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - struct fuse_fsync_in *fsi = msg; + struct fuse_fsync_in *fsi = msg; - fuse_state_t *state = NULL; - fd_t *fd = NULL; + fuse_state_t *state = NULL; + fd_t *fd = NULL; - GET_STATE (this, finh, state); - fd = FH_TO_FD (fsi->fh); - state->fd = fd; + GET_STATE(this, finh, state); + fd = FH_TO_FD(fsi->fh); + state->fd = fd; - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": FSYNC %p", finh->unique, fd); + fuse_resolve_fd_init(state, &state->resolve, fd); - state->flags = fsi->fsync_flags; - fuse_resolve_and_resume (state, fuse_fsync_resume); - return; + state->flags = fsi->fsync_flags; + fuse_resolve_and_resume(state, fuse_fsync_resume); + return; } void -fuse_opendir_resume (fuse_state_t *state) +fuse_opendir_resume(fuse_state_t *state) { - fd_t *fd = NULL; + fd_t *fd = NULL; + fuse_private_t *priv = NULL; + fuse_fd_ctx_t *fdctx = NULL; + + priv = state->this->private; - fd = fd_create (state->loc.inode, state->finh->pid); - state->fd = fd; + if (!state->loc.inode) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": OPENDIR (%s) resolution failed", + state->finh->unique, uuid_utoa(state->resolve.gfid)); - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": OPENDIR %s", state->finh->unique, - state->loc.path); + /* facilitate retry from VFS */ + if (state->resolve.op_errno == ENOENT) + state->resolve.op_errno = ESTALE; + + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } + + fd = fd_create(state->loc.inode, state->finh->pid); + if (fd == NULL) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": OPENDIR fd creation failed", state->finh->unique); + send_fuse_err(state->this, state->finh, ENOMEM); + free_fuse_state(state); + return; + } + + fdctx = fuse_fd_ctx_check_n_create(state->this, fd); + if (fdctx == NULL) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": OPENDIR creation of fdctx failed", + state->finh->unique); + fd_unref(fd); + send_fuse_err(state->this, state->finh, ENOMEM); + free_fuse_state(state); + return; + } - FUSE_FOP (state, fuse_fd_cbk, GF_FOP_OPENDIR, - opendir, &state->loc, fd); + state->fd = fd_ref(fd); + state->fd_no = gf_fd_unused_get(priv->fdtable, fd); + + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": OPENDIR %s", + state->finh->unique, state->loc.path); + + FUSE_FOP(state, fuse_fd_cbk, GF_FOP_OPENDIR, opendir, &state->loc, fd, + state->xdata); } static void -fuse_opendir (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_opendir(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - /* - struct fuse_open_in *foi = msg; - */ + /* + struct fuse_open_in *foi = msg; + */ - fuse_state_t *state = NULL; - int32_t ret = -1; + fuse_state_t *state = NULL; - GET_STATE (this, finh, state); - ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL); - if ((state->loc.inode == NULL) || (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": OPENDIR %s (fuse_loc_fill() failed)", - finh->unique, state->loc.path); + GET_STATE(this, finh, state); - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } + fuse_resolve_inode_init(state, &state->resolve, finh->nodeid); - uuid_copy (state->resolve.gfid, state->loc.inode->gfid); - state->resolve.path = gf_strdup (state->loc.path); - - fuse_resolve_and_resume (state, fuse_opendir_resume); + fuse_resolve_and_resume(state, fuse_opendir_resume); } - unsigned char -d_type_from_stat (struct iatt *buf) +d_type_from_stat(struct iatt *buf) { - unsigned char d_type; + unsigned char d_type; - if (IA_ISLNK (buf->ia_type)) { - d_type = DT_LNK; + if (IA_ISLNK(buf->ia_type)) { + d_type = DT_LNK; - } else if (IA_ISDIR (buf->ia_type)) { - d_type = DT_DIR; + } else if (IA_ISDIR(buf->ia_type)) { + d_type = DT_DIR; - } else if (IA_ISFIFO (buf->ia_type)) { - d_type = DT_FIFO; + } else if (IA_ISFIFO(buf->ia_type)) { + d_type = DT_FIFO; - } else if (IA_ISSOCK (buf->ia_type)) { - d_type = DT_SOCK; + } else if (IA_ISSOCK(buf->ia_type)) { + d_type = DT_SOCK; - } else if (IA_ISCHR (buf->ia_type)) { - d_type = DT_CHR; + } else if (IA_ISCHR(buf->ia_type)) { + d_type = DT_CHR; - } else if (IA_ISBLK (buf->ia_type)) { - d_type = DT_BLK; + } else if (IA_ISBLK(buf->ia_type)) { + d_type = DT_BLK; - } else if (IA_ISREG (buf->ia_type)) { - d_type = DT_REG; + } else if (IA_ISREG(buf->ia_type)) { + d_type = DT_REG; - } else { - d_type = DT_UNKNOWN; - } + } else { + d_type = DT_UNKNOWN; + } + + return d_type; +} + +static int +fuse_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + size_t size = 0; + size_t max_size = 0; + char *buf = NULL; + gf_dirent_t *entry = NULL; + struct fuse_dirent *fde = NULL; + fuse_private_t *priv = NULL; + + state = frame->root->state; + finh = state->finh; + priv = state->this->private; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); + + if (op_ret < 0) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": READDIR => -1 (%s)", frame->root->unique, + strerror(op_errno)); + + send_fuse_err(this, finh, op_errno); + goto out; + } + + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": READDIR => %d/%" GF_PRI_SIZET ",%" PRId64, + frame->root->unique, op_ret, state->size, state->off); + + list_for_each_entry(entry, &entries->list, list) + { + size_t fde_size = FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + + strlen(entry->d_name)); + max_size += fde_size; + + if (max_size > state->size) { + /* we received too many entries to fit in the reply */ + max_size -= fde_size; + break; + } + } + + if (max_size == 0) { + send_fuse_data(this, finh, 0, 0); + goto out; + } + + buf = GF_CALLOC(1, max_size, gf_fuse_mt_char); + if (!buf) { + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "%" PRIu64 ": READDIR => -1 (%s)", frame->root->unique, + strerror(ENOMEM)); + send_fuse_err(this, finh, ENOMEM); + goto out; + } + + size = 0; + list_for_each_entry(entry, &entries->list, list) + { + fde = (struct fuse_dirent *)(buf + size); + gf_fuse_fill_dirent(entry, fde, priv->enable_ino32); + size += FUSE_DIRENT_SIZE(fde); + + if (size == max_size) + break; + } + + send_fuse_data(this, finh, buf, size); + + /* TODO: */ + /* gf_link_inodes_from_dirent (this, state->fd->inode, entries); */ + +out: + free_fuse_state(state); + STACK_DESTROY(frame->root); + GF_FREE(buf); + return 0; +} + +void +fuse_readdir_resume(fuse_state_t *state) +{ + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": READDIR (%p, size=%" GF_PRI_SIZET ", offset=%" PRId64 + ")", + state->finh->unique, state->fd, state->size, state->off); - return d_type; + FUSE_FOP(state, fuse_readdir_cbk, GF_FOP_READDIR, readdir, state->fd, + state->size, state->off, state->xdata); } +static void +fuse_readdir(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + struct fuse_read_in *fri = msg; + + fuse_state_t *state = NULL; + fd_t *fd = NULL; + GET_STATE(this, finh, state); + state->size = fri->size; + state->off = fri->offset; + fd = FH_TO_FD(fri->fh); + state->fd = fd; + + fuse_resolve_fd_init(state, &state->resolve, fd); + + fuse_resolve_and_resume(state, fuse_readdir_resume); +} + +#if FUSE_KERNEL_MINOR_VERSION >= 20 static int -fuse_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries) +fuse_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) { - fuse_state_t *state = NULL; - fuse_in_header_t *finh = NULL; - int size = 0; - char *buf = NULL; - gf_dirent_t *entry = NULL; - struct fuse_dirent *fde = NULL; + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + size_t max_size = 0; + size_t size = 0; + char *buf = NULL; + gf_dirent_t *entry = NULL; + struct fuse_direntplus *fde = NULL; + struct fuse_entry_out *feo = NULL; + fuse_private_t *priv = NULL; + + state = frame->root->state; + finh = state->finh; + priv = this->private; + + if (op_ret < 0) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": READDIRP => -1 (%s)", frame->root->unique, + strerror(op_errno)); + + send_fuse_err(this, finh, op_errno); + goto out; + } + + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": READDIRP => %d/%" GF_PRI_SIZET ",%" PRId64, + frame->root->unique, op_ret, state->size, state->off); + + list_for_each_entry(entry, &entries->list, list) + { + size_t fdes = FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + + strlen(entry->d_name)); + max_size += fdes; + + if (max_size > state->size) { + /* we received too many entries to fit in the reply */ + max_size -= fdes; + break; + } + } + + if (max_size == 0) { + send_fuse_data(this, finh, 0, 0); + goto out; + } + + buf = GF_CALLOC(1, max_size, gf_fuse_mt_char); + if (!buf) { + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "%" PRIu64 ": READDIRP => -1 (%s)", frame->root->unique, + strerror(ENOMEM)); + send_fuse_err(this, finh, ENOMEM); + goto out; + } + + size = 0; + list_for_each_entry(entry, &entries->list, list) + { + inode_t *linked_inode; + + fde = (struct fuse_direntplus *)(buf + size); + feo = &fde->entry_out; + + if (priv->enable_ino32) + fde->dirent.ino = GF_FUSE_SQUASH_INO(entry->d_ino); + else + fde->dirent.ino = entry->d_ino; - state = frame->root->state; - finh = state->finh; + fde->dirent.off = entry->d_off; + fde->dirent.type = entry->d_type; + fde->dirent.namelen = strlen(entry->d_name); + (void)memcpy(fde->dirent.name, entry->d_name, fde->dirent.namelen); + size += FUSE_DIRENTPLUS_SIZE(fde); - if (op_ret < 0) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": READDIR => -1 (%s)", frame->root->unique, - strerror (op_errno)); + if (!entry->inode) + goto next_entry; - send_fuse_err (this, finh, op_errno); - goto out; - } + entry->d_stat.ia_blksize = this->ctx->page_size; + gf_fuse_stat2attr(&entry->d_stat, &feo->attr, priv->enable_ino32); - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": READDIR => %d/%"GF_PRI_SIZET",%"PRId64, - frame->root->unique, op_ret, state->size, state->off); + linked_inode = inode_link(entry->inode, state->fd->inode, entry->d_name, + &entry->d_stat); + if (!linked_inode) + goto next_entry; - list_for_each_entry (entry, &entries->list, list) { - size += FUSE_DIRENT_ALIGN (FUSE_NAME_OFFSET + - strlen (entry->d_name)); + if (entry->inode != linked_inode) { + memset(&entry->d_stat, 0, sizeof(entry->d_stat)); } - buf = GF_CALLOC (1, size, gf_fuse_mt_char); - if (!buf) { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "%"PRIu64": READDIR => -1 (%s)", frame->root->unique, - strerror (ENOMEM)); - send_fuse_err (this, finh, ENOMEM); - goto out; + feo->nodeid = inode_to_fuse_nodeid(linked_inode); + + if (!((strcmp(entry->d_name, ".") == 0) || + (strcmp(entry->d_name, "..") == 0))) { + inode_lookup(linked_inode); } - size = 0; - list_for_each_entry (entry, &entries->list, list) { - fde = (struct fuse_dirent *)(buf + size); - fde->ino = entry->d_ino; - fde->off = entry->d_off; - fde->namelen = strlen (entry->d_name); - strncpy (fde->name, entry->d_name, fde->namelen); - size += FUSE_DIRENT_SIZE (fde); + inode_unref(linked_inode); + + feo->entry_valid = calc_timeout_sec(priv->entry_timeout); + feo->entry_valid_nsec = calc_timeout_nsec(priv->entry_timeout); + + if (entry->d_stat.ia_ctime) { + feo->attr_valid = calc_timeout_sec(priv->attribute_timeout); + feo->attr_valid_nsec = calc_timeout_nsec(priv->attribute_timeout); + } else { + feo->attr_valid = feo->attr_valid_nsec = 0; } - send_fuse_data (this, finh, buf, size); + next_entry: + if (size == max_size) + break; + } + send_fuse_data(this, finh, buf, size); out: - free_fuse_state (state); - STACK_DESTROY (frame->root); - if (buf) - GF_FREE (buf); - return 0; - + free_fuse_state(state); + STACK_DESTROY(frame->root); + GF_FREE(buf); + return 0; } void -fuse_readdir_resume (fuse_state_t *state) +fuse_readdirp_resume(fuse_state_t *state) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": READDIR (%p, size=%zu, offset=%"PRId64")", - state->finh->unique, state->fd, state->size, state->off); + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": READDIRP (%p, size=%" GF_PRI_SIZET ", offset=%" PRId64 + ")", + state->finh->unique, state->fd, state->size, state->off); - FUSE_FOP (state, fuse_readdir_cbk, GF_FOP_READDIR, - readdir, state->fd, state->size, state->off); + FUSE_FOP(state, fuse_readdirp_cbk, GF_FOP_READDIRP, readdirp, state->fd, + state->size, state->off, state->xdata); } static void -fuse_readdir (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_readdirp(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - struct fuse_read_in *fri = msg; + struct fuse_read_in *fri = msg; - fuse_state_t *state = NULL; - fd_t *fd = NULL; + fuse_state_t *state = NULL; + fd_t *fd = NULL; - GET_STATE (this, finh, state); - state->size = fri->size; - state->off = fri->offset; - fd = FH_TO_FD (fri->fh); - state->fd = fd; + GET_STATE(this, finh, state); + state->size = fri->size; + state->off = fri->offset; + fd = FH_TO_FD(fri->fh); + state->fd = fd; - fuse_resolve_and_resume (state, fuse_readdir_resume); + fuse_resolve_fd_init(state, &state->resolve, fd); + + fuse_resolve_and_resume(state, fuse_readdirp_resume); } +#endif +#if FUSE_KERNEL_MINOR_VERSION >= 19 +#ifdef FALLOC_FL_KEEP_SIZE +static int +fuse_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + return fuse_err_cbk(frame, cookie, this, op_ret, op_errno, xdata); +} static void -fuse_releasedir (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_fallocate_resume(fuse_state_t *state) { - struct fuse_release_in *fri = msg; - fd_t *new_fd = NULL; - uint64_t val = 0; - int ret = 0; - fuse_state_t *state = NULL; - fuse_fd_ctx_t *fdctx = NULL; + gf_log( + "glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": FALLOCATE (%p, flags=%d, size=%zu, offset=%" PRId64 ")", + state->finh->unique, state->fd, state->flags, state->size, state->off); + + if (state->flags & FALLOC_FL_PUNCH_HOLE) + FUSE_FOP(state, fuse_fallocate_cbk, GF_FOP_DISCARD, discard, state->fd, + state->off, state->size, state->xdata); + else + FUSE_FOP(state, fuse_fallocate_cbk, GF_FOP_FALLOCATE, fallocate, + state->fd, (state->flags & FALLOC_FL_KEEP_SIZE), state->off, + state->size, state->xdata); +} - GET_STATE (this, finh, state); - state->fd = FH_TO_FD (fri->fh); +static void +fuse_fallocate(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + struct fuse_fallocate_in *ffi = msg; + fuse_state_t *state = NULL; - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": RELEASEDIR %p", finh->unique, state->fd); + GET_STATE(this, finh, state); + state->off = ffi->offset; + state->size = ffi->length; + state->flags = ffi->mode; + state->fd = FH_TO_FD(ffi->fh); - ret = fd_ctx_del (state->fd, this, &val); + fuse_resolve_fd_init(state, &state->resolve, state->fd); + fuse_resolve_and_resume(state, fuse_fallocate_resume); +} +#endif /* FALLOC_FL_KEEP_SIZE */ +#endif /* FUSE minor version >= 19 */ - if (!ret) { - fdctx = (fuse_fd_ctx_t *)(unsigned long)val; - if (fdctx) { - new_fd = fdctx->fd; - if (new_fd) { - fd_unref (new_fd); - } +static void +fuse_releasedir(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + struct fuse_release_in *fri = msg; + fuse_state_t *state = NULL; + fuse_private_t *priv = NULL; - GF_FREE (fdctx); - } - } + GET_STATE(this, finh, state); + state->fd = FH_TO_FD(fri->fh); + if (!state->fd) + goto out; - fd_unref (state->fd); + priv = this->private; - send_fuse_err (this, finh, 0); + fuse_log_eh(this, + "RELEASEDIR (): finh->unique: %" PRIu64 ": fd: %p, gfid: %s", + finh->unique, state->fd, uuid_utoa(state->fd->inode->gfid)); - free_fuse_state (state); + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "finh->unique: %" PRIu64 ": RELEASEDIR %p", finh->unique, state->fd); - return; + fuse_fd_ctx_destroy(this, state->fd); + fd_unref(state->fd); + + gf_fdptr_put(priv->fdtable, state->fd); + + state->fd = NULL; + +out: + send_fuse_err(this, finh, 0); + + free_fuse_state(state); + + return; } void -fuse_fsyncdir_resume (fuse_state_t *state) +fuse_fsyncdir_resume(fuse_state_t *state) { - FUSE_FOP (state, fuse_err_cbk, GF_FOP_FSYNCDIR, - fsyncdir, state->fd, state->flags & 1); - + FUSE_FOP(state, fuse_err_cbk, GF_FOP_FSYNCDIR, fsyncdir, state->fd, + (state->flags & 1), state->xdata); } static void -fuse_fsyncdir (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_fsyncdir(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - struct fuse_fsync_in *fsi = msg; + struct fuse_fsync_in *fsi = msg; - fuse_state_t *state = NULL; - fd_t *fd = NULL; + fuse_state_t *state = NULL; + fd_t *fd = NULL; - fd = FH_TO_FD (fsi->fh); + fd = FH_TO_FD(fsi->fh); - GET_STATE (this, finh, state); - state->fd = fd; + GET_STATE(this, finh, state); + state->fd = fd; - state->flags = fsi->fsync_flags; - fuse_resolve_and_resume (state, fuse_fsyncdir_resume); + fuse_resolve_fd_init(state, &state->resolve, fd); - return; -} + state->flags = fsi->fsync_flags; + fuse_resolve_and_resume(state, fuse_fsyncdir_resume); + return; +} static int -fuse_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct statvfs *buf) -{ - fuse_state_t *state = NULL; - fuse_in_header_t *finh = NULL; - fuse_private_t *priv = NULL; - struct fuse_statfs_out fso = {{0, }, }; - - state = frame->root->state; - priv = this->private; - finh = state->finh; - - if (op_ret == 0) { -#ifndef GF_DARWIN_HOST_OS - /* MacFUSE doesn't respect anyof these tweaks */ - buf->f_blocks *= buf->f_frsize; - buf->f_blocks /= this->ctx->page_size; - - buf->f_bavail *= buf->f_frsize; - buf->f_bavail /= this->ctx->page_size; - - buf->f_bfree *= buf->f_frsize; - buf->f_bfree /= this->ctx->page_size; - - buf->f_frsize = buf->f_bsize =this->ctx->page_size; -#endif /* GF_DARWIN_HOST_OS */ - fso.st.bsize = buf->f_bsize; - fso.st.frsize = buf->f_frsize; - fso.st.blocks = buf->f_blocks; - fso.st.bfree = buf->f_bfree; - fso.st.bavail = buf->f_bavail; - fso.st.files = buf->f_files; - fso.st.ffree = buf->f_ffree; - fso.st.namelen = buf->f_namemax; - - priv->proto_minor >= 4 ? - send_fuse_obj (this, finh, &fso) : - send_fuse_data (this, finh, &fso, FUSE_COMPAT_STATFS_SIZE); - } else { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": ERR => -1 (%s)", frame->root->unique, - strerror (op_errno)); - send_fuse_err (this, finh, op_errno); - } +fuse_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct statvfs *buf, + dict_t *xdata) +{ + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + fuse_private_t *priv = NULL; + struct fuse_statfs_out fso = { + { + 0, + }, + }; + + state = frame->root->state; + priv = this->private; + finh = state->finh; + + fuse_log_eh(this, "op_ret: %d, op_errno: %d, %" PRIu64 ": %s()", op_ret, + op_errno, frame->root->unique, gf_fop_list[frame->root->op]); + + if (op_ret == 0) { + fso.st.bsize = buf->f_bsize; + fso.st.frsize = buf->f_frsize; + fso.st.blocks = buf->f_blocks; + fso.st.bfree = buf->f_bfree; + fso.st.bavail = buf->f_bavail; + fso.st.files = buf->f_files; + fso.st.ffree = buf->f_ffree; + fso.st.namelen = buf->f_namemax; + + priv->proto_minor >= 4 + ? send_fuse_obj(this, finh, &fso) + : send_fuse_data(this, finh, &fso, FUSE_COMPAT_STATFS_SIZE); + } else { + /* facilitate retry from VFS */ + if (op_errno == ENOENT) + op_errno = ESTALE; + + gf_log("glusterfs-fuse", GF_LOG_WARNING, "%" PRIu64 ": ERR => -1 (%s)", + frame->root->unique, strerror(op_errno)); + + send_fuse_err(this, finh, op_errno); + } + + free_fuse_state(state); + STACK_DESTROY(frame->root); + + return 0; +} - free_fuse_state (state); - STACK_DESTROY (frame->root); +void +fuse_statfs_resume(fuse_state_t *state) +{ + if (!state->loc.inode) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": STATFS (%s) resolution fail", state->finh->unique, + uuid_utoa(state->resolve.gfid)); - return 0; + /* facilitate retry from VFS */ + if (state->resolve.op_errno == ENOENT) + state->resolve.op_errno = ESTALE; + + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } + + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": STATFS", + state->finh->unique); + + FUSE_FOP(state, fuse_statfs_cbk, GF_FOP_STATFS, statfs, &state->loc, + state->xdata); } static void -fuse_statfs (xlator_t *this, fuse_in_header_t *finh, void *msg) -{ - fuse_state_t *state = NULL; - int32_t ret = -1; - - GET_STATE (this, finh, state); - ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": STATFS (fuse_loc_fill() fail)", - finh->unique); - - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } +fuse_statfs(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + fuse_state_t *state = NULL; - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": STATFS", finh->unique); + GET_STATE(this, finh, state); - FUSE_FOP (state, fuse_statfs_cbk, GF_FOP_STATFS, - statfs, &state->loc); + fuse_resolve_inode_init(state, &state->resolve, finh->nodeid); + + fuse_resolve_and_resume(state, fuse_statfs_resume); } void -fuse_setxattr_resume (fuse_state_t *state) +fuse_setxattr_resume(fuse_state_t *state) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": SETXATTR %s/%"PRIu64" (%s)", state->finh->unique, - state->loc.path, state->finh->nodeid, state->name); - - FUSE_FOP (state, fuse_setxattr_cbk, GF_FOP_SETXATTR, - setxattr, &state->loc, state->dict, state->flags); + if (!state->loc.inode) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": SETXATTR %s/%" PRIu64 + " (%s) " + "resolution failed", + state->finh->unique, uuid_utoa(state->resolve.gfid), + state->finh->nodeid, state->name); + + /* facilitate retry from VFS */ + if (state->resolve.op_errno == ENOENT) + state->resolve.op_errno = ESTALE; + + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } + +#ifdef GF_TEST_FFOP + state->fd = fd_lookup(state->loc.inode, state->finh->pid); +#endif /* GF_TEST_FFOP */ + + if (state->fd) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": SETXATTR %p/%" PRIu64 " (%s)", state->finh->unique, + state->fd, state->finh->nodeid, state->name); + + FUSE_FOP(state, fuse_setxattr_cbk, GF_FOP_FSETXATTR, fsetxattr, + state->fd, state->xattr, state->flags, state->xdata); + } else { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": SETXATTR %s/%" PRIu64 " (%s)", state->finh->unique, + state->loc.path, state->finh->nodeid, state->name); + + FUSE_FOP(state, fuse_setxattr_cbk, GF_FOP_SETXATTR, setxattr, + &state->loc, state->xattr, state->flags, state->xdata); + } } static void -fuse_setxattr (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - struct fuse_setxattr_in *fsi = msg; - char *name = (char *)(fsi + 1); - char *value = name + strlen (name) + 1; - struct fuse_private *priv = NULL; + struct fuse_setxattr_in *fsi = msg; + char *name = (char *)(fsi + 1); + char *value = name + strlen(name) + 1; + struct fuse_private *priv = NULL; + + fuse_state_t *state = NULL; + char *dict_value = NULL; + int32_t ret = -1; + int32_t op_errno = 0; + char *newkey = NULL; - fuse_state_t *state = NULL; - char *dict_value = NULL; - int32_t ret = -1; - char *newkey = NULL; + priv = this->private; - priv = this->private; + GET_STATE(this, finh, state); #ifdef GF_DARWIN_HOST_OS - if (fsi->position) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": SETXATTR %s/%"PRIu64" (%s):" - "refusing positioned setxattr", - finh->unique, state->loc.path, finh->nodeid, name); - send_fuse_err (this, finh, EINVAL); - FREE (finh); - return; - } + if (fsi->position) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": SETXATTR %s/%" PRIu64 + " (%s):" + "refusing positioned setxattr", + finh->unique, state->loc.path, finh->nodeid, name); + op_errno = EINVAL; + goto done; + } #endif - if (!priv->acl) { - if ((strcmp (name, "system.posix_acl_access") == 0) || - (strcmp (name, "system.posix_acl_default") == 0)) { - send_fuse_err (this, finh, EOPNOTSUPP); - GF_FREE (finh); - return; - } - } - -#ifdef DISABLE_SELINUX - if (!strncmp (name, "security.", 9)) { - send_fuse_err (this, finh, EOPNOTSUPP); - GF_FREE (finh); - return; - } + if (fuse_ignore_xattr_set(priv, name)) { + goto done; + } + + if (!priv->acl) { + if ((strcmp(name, POSIX_ACL_ACCESS_XATTR) == 0) || + (strcmp(name, POSIX_ACL_DEFAULT_XATTR) == 0)) { + op_errno = EOPNOTSUPP; + goto done; + } + } + + ret = fuse_check_selinux_cap_xattr(priv, name); + if (ret) { + op_errno = EOPNOTSUPP; + goto done; + } + + /* Check if the command is for changing the log + level of process or specific xlator */ + ret = is_gf_log_command(this, name, value, fsi->size); + if (ret >= 0) { + op_errno = ret; + goto done; + } + + if (!strcmp("inode-invalidate", name)) { + gf_log("fuse", GF_LOG_TRACE, "got request to invalidate %" PRIu64, + finh->nodeid); +#if FUSE_KERNEL_MINOR_VERSION >= 11 + ret = fuse_invalidate_entry(this, finh->nodeid); + if (ret) + op_errno = EBUSY; #endif + goto done; + } - /* Check if the command is for changing the log - level of process or specific xlator */ - ret = is_gf_log_command (this, name, value); - if (ret >= 0) { - send_fuse_err (this, finh, ret); - GF_FREE (finh); - return; - } - - if (!strcmp ("inode-invalidate", name)) { - gf_log ("fuse", GF_LOG_TRACE, - "got request to invalidate %"PRIu64, finh->nodeid); - send_fuse_err (this, finh, 0); - fuse_invalidate (this, finh->nodeid); - GF_FREE (finh); - return; - } - - if (!strcmp (GFID_XATTR_KEY, name)) { - send_fuse_err (this, finh, EPERM); - GF_FREE (finh); - return; - } + if (!strcmp(GFID_XATTR_KEY, name) || !strcmp(GF_XATTR_VOL_ID_KEY, name)) { + op_errno = EPERM; + goto done; + } - GET_STATE (this, finh, state); - state->size = fsi->size; - ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": SETXATTR %s/%"PRIu64" (%s) (fuse_loc_fill() failed)", - finh->unique, - state->loc.path, finh->nodeid, name); - - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } + state->size = fsi->size; - state->dict = get_new_dict (); - if (!state->dict) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "%"PRIu64": SETXATTR dict allocation failed", - finh->unique); + fuse_resolve_inode_init(state, &state->resolve, finh->nodeid); - send_fuse_err (this, finh, ENOMEM); - free_fuse_state (state); - return; - } + state->xattr = dict_new(); + if (!state->xattr) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "%" PRIu64 ": SETXATTR dict allocation failed", finh->unique); + op_errno = ENOMEM; + goto done; + } - ret = fuse_flip_xattr_ns (priv, name, &newkey); - if (ret) { - send_fuse_err (this, finh, ENOMEM); - free_fuse_state (state); - return; - } + ret = fuse_flip_xattr_ns(priv, name, &newkey); + if (ret) { + op_errno = ENOMEM; + goto done; + } - if (fsi->size > 0) { - dict_value = memdup (value, fsi->size); - } else { - gf_log (THIS->name, GF_LOG_ERROR, "value size zero"); - dict_value = NULL; - } - dict_set (state->dict, newkey, - data_from_dynptr ((void *)dict_value, fsi->size)); - dict_ref (state->dict); + if (fsi->size > 0) { + /* + * Many translators expect setxattr values to be strings, but + * neither dict_get_str nor data_to_str do any checking or + * fixups to make sure that's the case. To avoid nasty + * surprises, allocate an extra byte and add a NUL here. + */ + dict_value = GF_MALLOC(fsi->size + 1, gf_common_mt_char); + if (dict_value == NULL) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "%" PRIu64 ": SETXATTR value allocation failed", + finh->unique); + op_errno = ENOMEM; + GF_FREE(newkey); + goto done; + } + memcpy(dict_value, value, fsi->size); + dict_value[fsi->size] = '\0'; + } + ret = dict_set_dynptr(state->xattr, newkey, dict_value, fsi->size); + if (ret < 0) { + op_errno = -ret; + GF_FREE(dict_value); + GF_FREE(newkey); + goto done; + } + + state->flags = fsi->flags; + state->name = newkey; + + fuse_resolve_and_resume(state, fuse_setxattr_resume); + + return; + +done: + send_fuse_err(this, finh, op_errno); + free_fuse_state(state); +} - state->flags = fsi->flags; - state->name = newkey; +static void +send_fuse_xattr(xlator_t *this, fuse_in_header_t *finh, const char *value, + size_t size, size_t expected) +{ + struct fuse_getxattr_out fgxo; + + /* linux kernel limits the size of xattr value to 64k */ + if (size > GLUSTERFS_XATTR_LEN_MAX) + send_fuse_err(this, finh, E2BIG); + else if (expected) { + /* if callback for getxattr and asks for value */ + if (size > expected) + /* reply would be bigger than + * what was asked by kernel */ + send_fuse_err(this, finh, ERANGE); + else + send_fuse_data(this, finh, (void *)value, size); + } else { + fgxo.size = size; + send_fuse_obj(this, finh, &fgxo); + } +} - uuid_copy (state->resolve.gfid, state->loc.inode->gfid); - state->resolve.path = gf_strdup (state->loc.path); +/* filter out xattrs that need not be visible on the + * mount point. this is _specifically_ for geo-rep + * as of now, to prevent Rsync from crying out loud + * when it tries to setxattr() for selinux xattrs + */ +static int +fuse_filter_xattr(char *key) +{ + int need_filter = 0; + struct fuse_private *priv = THIS->private; - fuse_resolve_and_resume (state, fuse_setxattr_resume); + if ((priv->client_pid == GF_CLIENT_PID_GSYNCD) && + fnmatch("*.selinux*", key, FNM_PERIOD) == 0) + need_filter = 1; - return; + return need_filter; } -static void -send_fuse_xattr (xlator_t *this, fuse_in_header_t *finh, const char *value, - size_t size, size_t expected) -{ - struct fuse_getxattr_out fgxo; - - /* linux kernel limits the size of xattr value to 64k */ - if (size > GLUSTERFS_XATTR_LEN_MAX) - send_fuse_err (this, finh, E2BIG); - else if (expected) { - /* if callback for getxattr and asks for value */ - if (size > expected) - /* reply would be bigger than - * what was asked by kernel */ - send_fuse_err (this, finh, ERANGE); - else - send_fuse_data (this, finh, (void *)value, size); +static int +fuse_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + char *value = ""; + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + data_t *value_data = NULL; + int ret = -1; + int32_t len = 0; + int32_t len_next = 0; + + state = frame->root->state; + finh = state->finh; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); + + if (op_ret >= 0) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": %s() %s => %d", + frame->root->unique, gf_fop_list[frame->root->op], + state->loc.path, op_ret); + + /* if successful */ + if (state->name) { + /* if callback for getxattr */ + value_data = dict_get(dict, state->name); + if (value_data) { + ret = value_data->len; /* Don't return the value for '\0' */ + value = value_data->data; + + send_fuse_xattr(this, finh, value, ret, state->size); + /* if(ret >...)...else if...else */ + } else { + send_fuse_err(this, finh, ENODATA); + } /* if(value_data)...else */ } else { - fgxo.size = size; - send_fuse_obj (this, finh, &fgxo); - } -} + /* if callback for listxattr */ + /* we need to invoke fuse_filter_xattr() twice. Once + * while counting size and then while filling buffer + */ + len = dict_keys_join(NULL, 0, dict, fuse_filter_xattr); + if (len < 0) + goto out; -static int -fuse_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) -{ - int need_to_free_dict = 0; - char *value = ""; - fuse_state_t *state = NULL; - fuse_in_header_t *finh = NULL; - data_t *value_data = NULL; - int ret = -1; - int32_t len = 0; - data_pair_t *trav = NULL; - - state = frame->root->state; - finh = state->finh; - - if (op_ret >= 0) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": %s() %s => %d", frame->root->unique, - gf_fop_list[frame->root->op], state->loc.path, op_ret); - - /* if successful */ - if (state->name) { - /* if callback for getxattr */ - value_data = dict_get (dict, state->name); - if (value_data) { - - ret = value_data->len; /* Don't return the value for '\0' */ - value = value_data->data; - - send_fuse_xattr (this, finh, value, ret, state->size); - /* if(ret >...)...else if...else */ - } else { - send_fuse_err (this, finh, ENODATA); - } /* if(value_data)...else */ - } else { - /* if callback for listxattr */ - trav = dict->members_list; - while (trav) { - len += strlen (trav->key) + 1; - trav = trav->next; - } /* while(trav) */ - value = alloca (len + 1); - if (!value) - goto out; - len = 0; - trav = dict->members_list; - while (trav) { - strcpy (value + len, trav->key); - value[len + strlen (trav->key)] = '\0'; - len += strlen (trav->key) + 1; - trav = trav->next; - } /* while(trav) */ - send_fuse_xattr (this, finh, value, len, state->size); - } /* if(state->name)...else */ + value = alloca(len + 1); + if (!value) + goto out; + + len_next = dict_keys_join(value, len, dict, fuse_filter_xattr); + if (len_next != len) + gf_log(THIS->name, GF_LOG_ERROR, "sizes not equal %d != %d", + len, len_next); + + send_fuse_xattr(this, finh, value, len, state->size); + } /* if(state->name)...else */ + } else { + /* facilitate retry from VFS */ + if ((state->fd == NULL) && (op_errno == ENOENT)) { + op_errno = ESTALE; + } + + /* if failure - no need to check if listxattr or getxattr */ + if (op_errno != ENODATA && op_errno != ENOATTR) { + if (op_errno == ENOTSUP) { + GF_LOG_OCCASIONALLY(gf_fuse_xattr_enotsup_log, "glusterfs-fuse", + GF_LOG_ERROR, + "extended attribute not " + "supported by the backend " + "storage"); + } else { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": %s(%s) %s => -1 (%s)", frame->root->unique, + gf_fop_list[frame->root->op], state->name, + state->loc.path, strerror(op_errno)); + } } else { - /* if failure - no need to check if listxattr or getxattr */ - if (op_errno != ENODATA) { - if (op_errno == ENOTSUP) { - GF_LOG_OCCASIONALLY (gf_fuse_xattr_enotsup_log, - "glusterfs-fuse", - GF_LOG_ERROR, - "extended attribute not " - "supported by the backend " - "storage"); - } else { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": %s(%s) %s => -1 (%s)", - frame->root->unique, - gf_fop_list[frame->root->op], state->name, - state->loc.path, strerror (op_errno)); - } - } else { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "%"PRIu64": %s(%s) %s => -1 (%s)", - frame->root->unique, - gf_fop_list[frame->root->op], state->name, - state->loc.path, strerror (op_errno)); - } /* if(op_errno!= ENODATA)...else */ + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "%" PRIu64 ": %s(%s) %s => -1 (%s)", frame->root->unique, + gf_fop_list[frame->root->op], state->name, state->loc.path, + strerror(op_errno)); + } /* if(op_errno!= ENODATA)...else */ - send_fuse_err (this, finh, op_errno); - } /* if(op_ret>=0)...else */ + send_fuse_err(this, finh, op_errno); + } /* if(op_ret>=0)...else */ out: - if (need_to_free_dict) - dict_unref (dict); - - free_fuse_state (state); - STACK_DESTROY (frame->root); + free_fuse_state(state); + STACK_DESTROY(frame->root); - return 0; + return 0; } void -fuse_getxattr_resume (fuse_state_t *state) +fuse_getxattr_resume(fuse_state_t *state) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": GETXATTR %s/%"PRIu64" (%s)", state->finh->unique, - state->loc.path, state->finh->nodeid, state->name); + char *value = NULL; + + if (!state->loc.inode) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": GETXATTR %s/%" PRIu64 + " (%s) " + "resolution failed", + state->finh->unique, uuid_utoa(state->resolve.gfid), + state->finh->nodeid, state->name); + + /* facilitate retry from VFS */ + if (state->resolve.op_errno == ENOENT) + state->resolve.op_errno = ESTALE; + + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } - FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR, - getxattr, &state->loc, state->name); -} +#ifdef GF_TEST_FFOP + state->fd = fd_lookup(state->loc.inode, state->finh->pid); +#endif /* GF_TEST_FFOP */ -static void -fuse_getxattr (xlator_t *this, fuse_in_header_t *finh, void *msg) -{ - struct fuse_getxattr_in *fgxi = msg; - char *name = (char *)(fgxi + 1); + if (state->name && (strcmp(state->name, VIRTUAL_GFID_XATTR_KEY) == 0)) { + /* send glusterfs gfid in binary form */ - fuse_state_t *state = NULL; - int32_t ret = -1; - struct fuse_private *priv = NULL; - int rv = 0; - char *newkey = NULL; + value = GF_MALLOC(16 + 1, gf_common_mt_char); + if (!value) { + send_fuse_err(state->this, state->finh, ENOMEM); + goto internal_out; + } + memcpy(value, state->loc.inode->gfid, 16); + value[16] = '\0'; - priv = this->private; + send_fuse_xattr(THIS, state->finh, value, 16, state->size); + GF_FREE(value); + internal_out: + free_fuse_state(state); + return; + } -#ifdef GF_DARWIN_HOST_OS - if (fgxi->position) { - /* position can be used only for - * resource fork queries which we - * don't support anyway... so handling - * it separately is just sort of a - * matter of aesthetics, not strictly - * necessary. - */ + if (state->name && (strcmp(state->name, VIRTUAL_GFID_XATTR_KEY_STR) == 0)) { + /* transform binary gfid to canonical form */ - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": GETXATTR %s/%"PRIu64" (%s):" - "refusing positioned getxattr", - finh->unique, state->loc.path, finh->nodeid, name); - send_fuse_err (this, finh, EINVAL); - FREE (finh); - return; + value = GF_CALLOC(UUID_CANONICAL_FORM_LEN + 1, sizeof(char), + gf_common_mt_char); + if (!value) { + send_fuse_err(state->this, state->finh, ENOMEM); + goto internal_out1; } -#endif + uuid_utoa_r(state->loc.inode->gfid, value); - if (!priv->acl) { - if ((strcmp (name, "system.posix_acl_access") == 0) || - (strcmp (name, "system.posix_acl_default") == 0)) { - send_fuse_err (this, finh, ENOTSUP); - GF_FREE (finh); - return; - } - } + send_fuse_xattr(THIS, state->finh, value, UUID_CANONICAL_FORM_LEN, + state->size); + GF_FREE(value); + internal_out1: + free_fuse_state(state); + return; + } + + if (state->fd) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": GETXATTR %p/%" PRIu64 " (%s)", state->finh->unique, + state->fd, state->finh->nodeid, state->name); + + FUSE_FOP(state, fuse_xattr_cbk, GF_FOP_FGETXATTR, fgetxattr, state->fd, + state->name, state->xdata); + } else { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": GETXATTR %s/%" PRIu64 " (%s)", state->finh->unique, + state->loc.path, state->finh->nodeid, state->name); + + FUSE_FOP(state, fuse_xattr_cbk, GF_FOP_GETXATTR, getxattr, &state->loc, + state->name, state->xdata); + } +} -#ifdef DISABLE_SELINUX - if (!strncmp (name, "security.", 9)) { - send_fuse_err (this, finh, ENODATA); - GF_FREE (finh); - return; - } -#endif +static void +fuse_getxattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + struct fuse_getxattr_in *fgxi = msg; + char *name = (char *)(fgxi + 1); + fuse_state_t *state = NULL; + struct fuse_private *priv = NULL; + int rv = 0; + int op_errno = EINVAL; + char *newkey = NULL; + int ret = 0; + + priv = this->private; + GET_STATE(this, finh, state); - GET_STATE (this, finh, state); +#ifdef GF_DARWIN_HOST_OS + if (fgxi->position) { + /* position can be used only for + * resource fork queries which we + * don't support anyway... so handling + * it separately is just sort of a + * matter of aesthetics, not strictly + * necessary. + */ - ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": GETXATTR %s/%"PRIu64" (%s) (fuse_loc_fill() failed)", - finh->unique, state->loc.path, finh->nodeid, name); + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": GETXATTR %s/%" PRIu64 + " (%s):" + "refusing positioned getxattr", + finh->unique, state->loc.path, finh->nodeid, name); + op_errno = EINVAL; + goto err; + } +#endif - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; + if (!priv->acl) { + if ((strcmp(name, POSIX_ACL_ACCESS_XATTR) == 0) || + (strcmp(name, POSIX_ACL_DEFAULT_XATTR) == 0)) { + op_errno = ENOTSUP; + goto err; } + } - rv = fuse_flip_xattr_ns (priv, name, &newkey); - if (rv) { - send_fuse_err (this, finh, ENOMEM); - free_fuse_state (state); - goto out; - } + ret = fuse_check_selinux_cap_xattr(priv, name); + if (ret) { + op_errno = ENODATA; + goto err; + } - state->size = fgxi->size; - state->name = newkey; + fuse_resolve_inode_init(state, &state->resolve, finh->nodeid); - uuid_copy (state->resolve.gfid, state->loc.inode->gfid); - state->resolve.path = gf_strdup (state->loc.path); + rv = fuse_flip_xattr_ns(priv, name, &newkey); + if (rv) { + op_errno = ENOMEM; + goto err; + } - fuse_resolve_and_resume (state, fuse_getxattr_resume); - out: - return; + state->size = fgxi->size; + state->name = newkey; + + fuse_resolve_and_resume(state, fuse_getxattr_resume); + + return; +err: + send_fuse_err(this, finh, op_errno); + free_fuse_state(state); + return; } void -fuse_listxattr_resume (fuse_state_t *state) +fuse_listxattr_resume(fuse_state_t *state) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": LISTXATTR %s/%"PRIu64, state->finh->unique, - state->loc.path, state->finh->nodeid); - - FUSE_FOP (state, fuse_xattr_cbk, GF_FOP_GETXATTR, - getxattr, &state->loc, NULL); + if (!state->loc.inode) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": LISTXATTR %s/%" PRIu64 "resolution failed", + state->finh->unique, uuid_utoa(state->resolve.gfid), + state->finh->nodeid); + + /* facilitate retry from VFS */ + if (state->resolve.op_errno == ENOENT) + state->resolve.op_errno = ESTALE; + + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } + +#ifdef GF_TEST_FFOP + state->fd = fd_lookup(state->loc.inode, state->finh->pid); +#endif /* GF_TEST_FFOP */ + + if (state->fd) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": LISTXATTR %p/%" PRIu64, state->finh->unique, + state->fd, state->finh->nodeid); + + FUSE_FOP(state, fuse_xattr_cbk, GF_FOP_FGETXATTR, fgetxattr, state->fd, + NULL, state->xdata); + } else { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": LISTXATTR %s/%" PRIu64, state->finh->unique, + state->loc.path, state->finh->nodeid); + + FUSE_FOP(state, fuse_xattr_cbk, GF_FOP_GETXATTR, getxattr, &state->loc, + NULL, state->xdata); + } } static void -fuse_listxattr (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_listxattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - struct fuse_getxattr_in *fgxi = msg; + struct fuse_getxattr_in *fgxi = msg; + fuse_state_t *state = NULL; - fuse_state_t *state = NULL; - int32_t ret = -1; + GET_STATE(this, finh, state); - GET_STATE (this, finh, state); + fuse_resolve_inode_init(state, &state->resolve, finh->nodeid); - ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": LISTXATTR %s/%"PRIu64" (fuse_loc_fill() failed)", - finh->unique, state->loc.path, finh->nodeid); + state->size = fgxi->size; - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } - - state->size = fgxi->size; - uuid_copy (state->resolve.gfid, state->loc.inode->gfid); - state->resolve.path = gf_strdup (state->loc.path); - - fuse_resolve_and_resume (state, fuse_listxattr_resume); + fuse_resolve_and_resume(state, fuse_listxattr_resume); - return; + return; } void -fuse_removexattr_resume (fuse_state_t *state) +fuse_removexattr_resume(fuse_state_t *state) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": REMOVEXATTR %s/%"PRIu64" (%s)", state->finh->unique, - state->loc.path, state->finh->nodeid, state->name); - - FUSE_FOP (state, fuse_err_cbk, GF_FOP_REMOVEXATTR, - removexattr, &state->loc, state->name); + if (!state->loc.inode) { + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "%" PRIu64 ": REMOVEXATTR %s/%" PRIu64 + " (%s) " + "resolution failed", + state->finh->unique, uuid_utoa(state->resolve.gfid), + state->finh->nodeid, state->name); + + /* facilitate retry from VFS */ + if (state->resolve.op_errno == ENOENT) + state->resolve.op_errno = ESTALE; + + send_fuse_err(state->this, state->finh, state->resolve.op_errno); + free_fuse_state(state); + return; + } + +#ifdef GF_TEST_FFOP + state->fd = fd_lookup(state->loc.inode, state->finh->pid); +#endif /* GF_TEST_FFOP */ + + if (state->fd) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": REMOVEXATTR %p/%" PRIu64 " (%s)", + state->finh->unique, state->fd, state->finh->nodeid, + state->name); + + FUSE_FOP(state, fuse_removexattr_cbk, GF_FOP_FREMOVEXATTR, fremovexattr, + state->fd, state->name, state->xdata); + } else { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": REMOVEXATTR %s/%" PRIu64 " (%s)", + state->finh->unique, state->loc.path, state->finh->nodeid, + state->name); + + FUSE_FOP(state, fuse_removexattr_cbk, GF_FOP_REMOVEXATTR, removexattr, + &state->loc, state->name, state->xdata); + } } static void -fuse_removexattr (xlator_t *this, fuse_in_header_t *finh, void *msg) - +fuse_removexattr(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - char *name = msg; + char *name = msg; - fuse_state_t *state = NULL; - fuse_private_t *priv = NULL; - int32_t ret = -1; - char *newkey = NULL; + fuse_state_t *state = NULL; + fuse_private_t *priv = NULL; + int32_t ret = -1; + char *newkey = NULL; - priv = this->private; - - GET_STATE (this, finh, state); - ret = fuse_loc_fill (&state->loc, state, finh->nodeid, 0, NULL); - if ((state->loc.inode == NULL) || - (ret < 0)) { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "%"PRIu64": REMOVEXATTR %s/%"PRIu64" (%s) (fuse_loc_fill() failed)", - finh->unique, state->loc.path, finh->nodeid, name); + if (!strcmp(GFID_XATTR_KEY, name) || !strcmp(GF_XATTR_VOL_ID_KEY, name)) { + send_fuse_err(this, finh, EPERM); + GF_FREE(finh); + return; + } - send_fuse_err (this, finh, ENOENT); - free_fuse_state (state); - return; - } + priv = this->private; - ret = fuse_flip_xattr_ns (priv, name, &newkey); - if (ret) { - send_fuse_err (this, finh, ENOMEM); - free_fuse_state (state); - return; - } + GET_STATE(this, finh, state); - state->name = newkey; - uuid_copy (state->resolve.gfid, state->loc.inode->gfid); - state->resolve.path = gf_strdup (state->loc.path); + fuse_resolve_inode_init(state, &state->resolve, finh->nodeid); - fuse_resolve_and_resume (state, fuse_removexattr_resume); + ret = fuse_flip_xattr_ns(priv, name, &newkey); + if (ret) { + send_fuse_err(this, finh, ENOMEM); + free_fuse_state(state); return; -} + } + state->name = newkey; + + fuse_resolve_and_resume(state, fuse_removexattr_resume); + return; +} static int gf_fuse_lk_enosys_log; static int -fuse_getlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct gf_flock *lock) -{ - fuse_state_t *state = NULL; - - state = frame->root->state; - struct fuse_lk_out flo = {{0, }, }; - - if (op_ret == 0) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": ERR => 0", frame->root->unique); - flo.lk.type = lock->l_type; - flo.lk.pid = lock->l_pid; - if (lock->l_type == F_UNLCK) - flo.lk.start = flo.lk.end = 0; - else { - flo.lk.start = lock->l_start; - flo.lk.end = lock->l_len ? - (lock->l_start + lock->l_len - 1) : - OFFSET_MAX; - } - send_fuse_obj (this, state->finh, &flo); +fuse_getlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct gf_flock *lock, + dict_t *xdata) +{ + fuse_state_t *state = NULL; + + state = frame->root->state; + struct fuse_lk_out flo = { + { + 0, + }, + }; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); + + if (op_ret == 0) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": ERR => 0", + frame->root->unique); + flo.lk.type = lock->l_type; + flo.lk.pid = lock->l_pid; + if (lock->l_type == F_UNLCK) + flo.lk.start = flo.lk.end = 0; + else { + flo.lk.start = lock->l_start; + flo.lk.end = lock->l_len ? (lock->l_start + lock->l_len - 1) + : OFFSET_MAX; + } + send_fuse_obj(this, state->finh, &flo); + } else { + if (op_errno == ENOSYS) { + gf_fuse_lk_enosys_log++; + if (!(gf_fuse_lk_enosys_log % GF_UNIVERSAL_ANSWER)) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "GETLK not supported. loading " + "'features/posix-locks' on server side " + "will add GETLK support."); + } } else { - if (op_errno == ENOSYS) { - gf_fuse_lk_enosys_log++; - if (!(gf_fuse_lk_enosys_log % GF_UNIVERSAL_ANSWER)) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "GETLK not supported. loading " - "'features/posix-locks' on server side " - "will add GETLK support."); - } - } else { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": ERR => -1 (%s)", - frame->root->unique, strerror (op_errno)); - } - send_fuse_err (this, state->finh, op_errno); + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": ERR => -1 (%s)", frame->root->unique, + strerror(op_errno)); } + send_fuse_err(this, state->finh, op_errno); + } - free_fuse_state (state); - STACK_DESTROY (frame->root); + free_fuse_state(state); + STACK_DESTROY(frame->root); - return 0; + return 0; } void -fuse_getlk_resume (fuse_state_t *state) +fuse_getlk_resume(fuse_state_t *state) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": GETLK %p", state->finh->unique, state->fd); + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": GETLK %p", + state->finh->unique, state->fd); - FUSE_FOP (state, fuse_getlk_cbk, GF_FOP_LK, - lk, state->fd, F_GETLK, &state->lk_lock); + FUSE_FOP(state, fuse_getlk_cbk, GF_FOP_LK, lk, state->fd, F_GETLK, + &state->lk_lock, state->xdata); } static void -fuse_getlk (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_getlk(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - struct fuse_lk_in *fli = msg; + struct fuse_lk_in *fli = msg; - fuse_state_t *state = NULL; - fd_t *fd = NULL; + fuse_state_t *state = NULL; + fd_t *fd = NULL; - fd = FH_TO_FD (fli->fh); - GET_STATE (this, finh, state); - state->fd = fd; - convert_fuse_file_lock (&fli->lk, &state->lk_lock, - fli->owner); + fd = FH_TO_FD(fli->fh); + GET_STATE(this, finh, state); + state->fd = fd; - state->lk_owner = fli->owner; + fuse_resolve_fd_init(state, &state->resolve, fd); - fuse_resolve_and_resume (state, fuse_getlk_resume); + convert_fuse_file_lock(&fli->lk, &state->lk_lock, fli->owner); - return; -} + state->lk_owner = fli->owner; + fuse_resolve_and_resume(state, fuse_getlk_resume); + + return; +} static int -fuse_setlk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct gf_flock *lock) +fuse_setlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct gf_flock *lock, + dict_t *xdata) { - fuse_state_t *state = NULL; + uint32_t op = 0; + fuse_state_t *state = NULL; + int ret = 0; + + ret = fuse_interrupt_finish_fop(frame, this, _gf_true, (void **)&state); + GF_FREE(state->name); + dict_unref(state->xdata); + GF_FREE(state); + if (ret) { + return 0; + } + + state = frame->root->state; + op = state->finh->opcode; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); + + if (op_ret == 0) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": ERR => 0", + frame->root->unique); + fd_lk_insert_and_merge(state->fd, + (op == FUSE_SETLK) ? F_SETLK : F_SETLKW, + &state->lk_lock); + + send_fuse_err(this, state->finh, 0); + } else { + if (op_errno == ENOSYS) { + gf_fuse_lk_enosys_log++; + if (!(gf_fuse_lk_enosys_log % GF_UNIVERSAL_ANSWER)) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "SETLK not supported. loading " + "'features/posix-locks' on server side " + "will add SETLK support."); + } + } else if (op_errno == EAGAIN) { + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "Returning EAGAIN Flock: " + "start=%llu, len=%llu, pid=%llu, lk-owner=%s", + (unsigned long long)state->lk_lock.l_start, + (unsigned long long)state->lk_lock.l_len, + (unsigned long long)state->lk_lock.l_pid, + lkowner_utoa(&frame->root->lk_owner)); + } else { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 ": ERR => -1 (%s)", frame->root->unique, + strerror(op_errno)); + } - state = frame->root->state; + send_fuse_err(this, state->finh, op_errno); + } - if (op_ret == 0) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": ERR => 0", frame->root->unique); - send_fuse_err (this, state->finh, 0); - } else { - if (op_errno == ENOSYS) { - gf_fuse_lk_enosys_log++; - if (!(gf_fuse_lk_enosys_log % GF_UNIVERSAL_ANSWER)) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "SETLK not supported. loading " - "'features/posix-locks' on server side " - "will add SETLK support."); - } - } else if (op_errno == EAGAIN) { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "Returning EAGAIN Flock: " - "start=%llu, len=%llu, pid=%llu, lk-owner=%llu", - (unsigned long long) lock->l_start, - (unsigned long long) lock->l_len, - (unsigned long long) lock->l_pid, - (unsigned long long) frame->root->lk_owner); - - } else { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "%"PRIu64": ERR => -1 (%s)", - frame->root->unique, strerror (op_errno)); - } + free_fuse_state(state); + STACK_DESTROY(frame->root); - send_fuse_err (this, state->finh, op_errno); - } + return 0; +} - free_fuse_state (state); - STACK_DESTROY (frame->root); +static int +fuse_setlk_interrupt_handler_cbk(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *dict, dict_t *xdata) +{ + fuse_interrupt_state_t intstat = INTERRUPT_NONE; + fuse_interrupt_record_t *fir = cookie; - return 0; + intstat = op_ret >= 0 ? INTERRUPT_HANDLED : INTERRUPT_SQUELCHED; + + fuse_interrupt_finish_interrupt(this, fir, intstat, _gf_true, NULL); + + STACK_DESTROY(frame->root); + + return 0; } +static void +fuse_setlk_interrupt_handler(xlator_t *this, fuse_interrupt_record_t *fir) +{ + fuse_state_t *state = NULL; + call_frame_t *frame = NULL; + char *xattr_name = NULL; + int ret = 0; + + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "SETLK%s unique %" PRIu64 ": interrupt handler triggered", + fir->fuse_in_header.opcode == FUSE_SETLK ? "" : "W", + fir->fuse_in_header.unique); + + state = fir->data; + + ret = gf_asprintf( + &xattr_name, GF_XATTR_CLRLK_CMD ".tposix.kblocked.%hd,%jd-%jd", + state->lk_lock.l_whence, state->lk_lock.l_start, state->lk_lock.l_len); + if (ret == -1) { + xattr_name = NULL; + goto err; + } + + frame = get_call_frame_for_req(state); + if (!frame) { + goto err; + } + frame->root->state = state; + frame->root->op = GF_FOP_GETXATTR; + frame->op = GF_FOP_GETXATTR; + state->name = xattr_name; + + STACK_WIND_COOKIE(frame, fuse_setlk_interrupt_handler_cbk, fir, + state->active_subvol, + state->active_subvol->fops->fgetxattr, state->fd, + xattr_name, state->xdata); + + return; + +err: + GF_FREE(xattr_name); + fuse_interrupt_finish_interrupt(this, fir, INTERRUPT_SQUELCHED, _gf_false, + (void **)&state); + if (state) { + dict_unref(state->xdata); + GF_FREE(state); + } +} void -fuse_setlk_resume (fuse_state_t *state) +fuse_setlk_resume(fuse_state_t *state) { - gf_log ("glusterfs-fuse", GF_LOG_TRACE, - "%"PRIu64": SETLK%s %p", state->finh->unique, - state->finh->opcode == FUSE_SETLK ? "" : "W", state->fd); + fuse_interrupt_record_t *fir = NULL; + fuse_state_t *state_clone = NULL; - FUSE_FOP (state, fuse_setlk_cbk, GF_FOP_LK, lk, state->fd, - state->finh->opcode == FUSE_SETLK ? F_SETLK : F_SETLKW, - &state->lk_lock); + fir = fuse_interrupt_record_new(state->finh, fuse_setlk_interrupt_handler); + state_clone = gf_memdup(state, sizeof(*state)); + if (state_clone) { + state_clone->xdata = dict_new(); + } + + if (!fir || !state_clone || !state_clone->xdata) { + if (fir) { + GF_FREE(fir); + } + if (state_clone) { + GF_FREE(state_clone); + } + send_fuse_err(state->this, state->finh, ENOMEM); + + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "SETLK%s unique %" PRIu64 + ":" + " interrupt record allocation failed", + state->finh->opcode == FUSE_SETLK ? "" : "W", + state->finh->unique); + free_fuse_state(state); + + return; + } + state_clone->name = NULL; + fir->data = state_clone; + fuse_interrupt_record_insert(state->this, fir); + + gf_log("glusterfs-fuse", GF_LOG_TRACE, "%" PRIu64 ": SETLK%s %p", + state->finh->unique, state->finh->opcode == FUSE_SETLK ? "" : "W", + state->fd); + + FUSE_FOP(state, fuse_setlk_cbk, GF_FOP_LK, lk, state->fd, + state->finh->opcode == FUSE_SETLK ? F_SETLK : F_SETLKW, + &state->lk_lock, state->xdata); } static void -fuse_setlk (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_setlk(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - struct fuse_lk_in *fli = msg; + struct fuse_lk_in *fli = msg; - fuse_state_t *state = NULL; - fd_t *fd = NULL; + fuse_state_t *state = NULL; + fd_t *fd = NULL; - fd = FH_TO_FD (fli->fh); - GET_STATE (this, finh, state); - state->finh = finh; - state->fd = fd; - convert_fuse_file_lock (&fli->lk, &state->lk_lock, - fli->owner); + fd = FH_TO_FD(fli->fh); + GET_STATE(this, finh, state); + state->finh = finh; + state->fd = fd; - state->lk_owner = fli->owner; + fuse_resolve_fd_init(state, &state->resolve, fd); - fuse_resolve_and_resume (state, fuse_setlk_resume); + convert_fuse_file_lock(&fli->lk, &state->lk_lock, fli->owner); - return; + state->lk_owner = fli->owner; + + fuse_resolve_and_resume(state, fuse_setlk_resume); + + return; } +#if FUSE_KERNEL_MINOR_VERSION >= 11 && defined(HAVE_FUSE_NOTIFICATIONS) static void * -notify_kernel_loop (void *data) +notify_kernel_loop(void *data) { - xlator_t *this = NULL; - fuse_private_t *priv = NULL; - struct fuse_out_header *fouh = NULL; - int rv = 0; + uint32_t len = 0; + ssize_t rv = 0; + xlator_t *this = NULL; + fuse_private_t *priv = NULL; + fuse_invalidate_node_t *node = NULL; + fuse_invalidate_node_t *tmp = NULL; + struct fuse_out_header *pfoh = NULL; + struct iovec iov_out = { + 0, + }; + + this = data; + priv = this->private; + + for (;;) { + pthread_mutex_lock(&priv->invalidate_mutex); + { + while (list_empty(&priv->invalidate_list)) + pthread_cond_wait(&priv->invalidate_cond, + &priv->invalidate_mutex); - char inval_buf[INVAL_BUF_SIZE] = {0,}; + node = list_entry(priv->invalidate_list.next, + fuse_invalidate_node_t, next); - this = data; - priv = this->private; + list_del_init(&node->next); + priv->invalidate_count--; + } + pthread_mutex_unlock(&priv->invalidate_mutex); - for (;;) { - rv = read (priv->revchan_in, inval_buf, sizeof (*fouh)); - if (rv != sizeof (*fouh)) - break; - fouh = (struct fuse_out_header *)inval_buf; - rv = read (priv->revchan_in, inval_buf + sizeof (*fouh), - fouh->len - sizeof (*fouh)); - if (rv != fouh->len - sizeof (*fouh)) - break; - rv = write (priv->fd, inval_buf, fouh->len); - if (rv != fouh->len && !(rv == -1 && errno == ENOENT)) - break; + pfoh = (struct fuse_out_header *)node->inval_buf; + memcpy(&len, &pfoh->len, sizeof(len)); + /* + * a simple + * len = pfoh->len; + * works on x86, but takes a multiple insn cycle hit + * when pfoh->len is not correctly aligned, possibly + * even stalling the insn pipeline. + * Other architectures will not be so forgiving. If + * we're lucky the memcpy will be inlined by the + * compiler, and might be as fast or faster without + * the risk of stalling the insn pipeline. + */ + + iov_out.iov_base = node->inval_buf; + iov_out.iov_len = len; + rv = sys_writev(priv->fd, &iov_out, 1); + check_and_dump_fuse_W(priv, &iov_out, 1, rv, node->errnomask); + + GF_FREE(node); + + if (rv == -1 && errno == EBADF) + break; + + if (rv != len && !(rv == -1 && errno == ENOENT)) { + gf_log("glusterfs-fuse", GF_LOG_INFO, "len: %u, rv: %zd, errno: %d", + len, rv, errno); } + } - close (priv->revchan_in); - close (priv->revchan_out); + gf_log("glusterfs-fuse", GF_LOG_ERROR, "kernel notifier loop terminated"); - gf_log ("glusterfs-fuse", GF_LOG_INFO, - "kernel notifier loop terminated"); + pthread_mutex_lock(&priv->invalidate_mutex); + { + priv->reverse_fuse_thread_started = _gf_false; + list_for_each_entry_safe(node, tmp, &priv->invalidate_list, next) + { + list_del_init(&node->next); + GF_FREE(node); + } + priv->invalidate_count = 0; + } + pthread_mutex_unlock(&priv->invalidate_mutex); - return NULL; + return NULL; } +#endif - -static void -fuse_init (xlator_t *this, fuse_in_header_t *finh, void *msg) +static void * +timed_response_loop(void *data) { - struct fuse_init_in *fini = msg; - struct fuse_init_out fino = {0,}; - fuse_private_t *priv = NULL; - int ret = 0; - int pfd[2] = {0,}; - pthread_t messenger; + ssize_t rv = 0; + size_t len = 0; + xlator_t *this = NULL; + fuse_private_t *priv = NULL; + fuse_timed_message_t *dmsg = NULL; + fuse_timed_message_t *tmp = NULL; + struct timespec now = { + 0, + }; + struct timespec delta = { + 0, + }; + struct iovec iovs[2] = { + { + 0, + }, + }; - priv = this->private; + this = data; + priv = this->private; - if (priv->init_recvd) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "got INIT after first message"); + for (;;) { + pthread_mutex_lock(&priv->timed_mutex); + { + while (list_empty(&priv->timed_list)) { + pthread_cond_wait(&priv->timed_cond, &priv->timed_mutex); + } + + dmsg = list_entry(priv->timed_list.next, fuse_timed_message_t, + next); + list_for_each_entry(tmp, &priv->timed_list, next) + { + if (timespec_cmp(&tmp->scheduled_ts, &dmsg->scheduled_ts) < 0) { + dmsg = tmp; + } + } - close (priv->fd); - goto out; + list_del_init(&dmsg->next); } + pthread_mutex_unlock(&priv->timed_mutex); - priv->init_recvd = 1; + timespec_now(&now); + if (timespec_cmp(&now, &dmsg->scheduled_ts) < 0) { + timespec_sub(&now, &dmsg->scheduled_ts, &delta); + nanosleep(&delta, NULL); + } - if (fini->major != FUSE_KERNEL_VERSION) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "unsupported FUSE protocol version %d.%d", - fini->major, fini->minor); + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "sending timed " + "message of unique %" PRIu64, + dmsg->fuse_out_header.unique); - close (priv->fd); - goto out; + len = dmsg->fuse_out_header.len; + iovs[0] = (struct iovec){&dmsg->fuse_out_header, + sizeof(struct fuse_out_header)}; + iovs[1] = (struct iovec){dmsg->fuse_message_body, + len - sizeof(struct fuse_out_header)}; + rv = sys_writev(priv->fd, iovs, 2); + check_and_dump_fuse_W(priv, iovs, 2, rv, dmsg->errnomask); + + fuse_timed_message_free(dmsg); + + if (rv == -1 && errno == EBADF) { + break; } - priv->proto_minor = fini->minor; - fino.major = FUSE_KERNEL_VERSION; - fino.minor = FUSE_KERNEL_MINOR_VERSION; - fino.max_readahead = 1 << 17; - fino.max_write = 1 << 17; - fino.flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS; -#if FUSE_KERNEL_MINOR_VERSION >= 12 - if (fini->minor >= 12) { - /* let fuse leave the umask processing to us, so that it does not - * break extended POSIX ACL defaults on server */ - fino.flags |= FUSE_DONT_MASK; + if (rv != len && !(rv == -1 && errno == ENOENT)) { + gf_log("glusterfs-fuse", GF_LOG_INFO, + "len: %zu, rv: %zd, errno: %d", len, rv, errno); } -#endif -#if FUSE_KERNEL_MINOR_VERSION >= 9 - if (fini->minor >= 6 /* fuse_init_in has flags */ && - fini->flags & FUSE_BIG_WRITES) { - /* no need for direct I/O mode by default if big writes are supported */ - if (priv->direct_io_mode == 2) - priv->direct_io_mode = 0; - fino.flags |= FUSE_BIG_WRITES; + } + + gf_log("glusterfs-fuse", GF_LOG_ERROR, "timed response loop terminated"); + + pthread_mutex_lock(&priv->timed_mutex); + { + priv->timed_response_fuse_thread_started = _gf_false; + list_for_each_entry_safe(dmsg, tmp, &priv->timed_list, next) + { + list_del_init(&dmsg->next); + fuse_timed_message_free(dmsg); } + } + pthread_mutex_unlock(&priv->timed_mutex); - /* Used for 'reverse invalidation of inode' */ - if (fini->minor >= 12) { - if (pipe(pfd) == -1) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "cannot create pipe pair (%s)", - strerror(errno)); + return NULL; +} - close (priv->fd); - goto out; - } - priv->revchan_in = pfd[0]; - priv->revchan_out = pfd[1]; - ret = pthread_create (&messenger, NULL, notify_kernel_loop, - this); - if (ret != 0) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "failed to start messenger daemon (%s)", - strerror(errno)); - - close (priv->fd); - goto out; - } - priv->reverse_fuse_thread_started = _gf_true; +static void +fuse_init(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + struct fuse_init_in *fini = msg; + struct fuse_init_out fino = { + 0, + }; + fuse_private_t *priv = NULL; + size_t size = 0; + int ret = 0; +#if FUSE_KERNEL_MINOR_VERSION >= 9 + pthread_t messenger; +#endif + pthread_t delayer; + + priv = this->private; + + if (priv->init_recvd) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, "got INIT after first message"); + + sys_close(priv->fd); + goto out; + } + + priv->init_recvd = 1; + + if (fini->major != FUSE_KERNEL_VERSION) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "unsupported FUSE protocol version %d.%d", fini->major, + fini->minor); + + sys_close(priv->fd); + goto out; + } + priv->proto_minor = fini->minor; + + fino.major = FUSE_KERNEL_VERSION; + fino.minor = FUSE_KERNEL_MINOR_VERSION; + fino.max_readahead = 1 << 17; + fino.max_write = 1 << 17; + fino.flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS; +#if FUSE_KERNEL_MINOR_VERSION >= 17 + if (fini->minor >= 17) + fino.flags |= FUSE_FLOCK_LOCKS; +#endif +#if FUSE_KERNEL_MINOR_VERSION >= 12 + if (fini->minor >= 12) { + /* let fuse leave the umask processing to us, so that it does not + * break extended POSIX ACL defaults on server */ + fino.flags |= FUSE_DONT_MASK; + } +#endif +#if FUSE_KERNEL_MINOR_VERSION >= 9 + if (fini->minor >= 6 /* fuse_init_in has flags */ && + fini->flags & FUSE_BIG_WRITES) { + /* no need for direct I/O mode by default if big writes are supported */ + if (priv->direct_io_mode == 2) + priv->direct_io_mode = 0; + fino.flags |= FUSE_BIG_WRITES; + } + + /* Start the thread processing timed responses */ + ret = gf_thread_create(&delayer, NULL, timed_response_loop, this, + "fusedlyd"); + if (ret != 0) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "failed to start timed response thread (%s)", strerror(errno)); + + sys_close(priv->fd); + goto out; + } + priv->timed_response_fuse_thread_started = _gf_true; + + /* Used for 'reverse invalidation of inode' */ +#ifdef HAVE_FUSE_NOTIFICATIONS + if (fini->minor >= 12) { + ret = gf_thread_create(&messenger, NULL, notify_kernel_loop, this, + "fusenoti"); + if (ret != 0) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "failed to start messenger daemon (%s)", strerror(errno)); + + sys_close(priv->fd); + goto out; } - if (fini->minor >= 13) { - /* these values seemed to work fine during testing */ - fino.max_background = 64; - fino.congestion_threshold = 48; + priv->reverse_fuse_thread_started = _gf_true; + } else +#endif + { + /* + * FUSE minor < 12 does not implement invalidate notifications. + * This mechanism is required for fopen-keep-cache to operate + * correctly. Disable and warn the user. + */ + if (priv->fopen_keep_cache) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "FUSE version " + "%d.%d does not support inval notifications. " + "fopen-keep-cache disabled.", + fini->major, fini->minor); + priv->fopen_keep_cache = 0; + } + } + + if (fini->minor >= 13) { + fino.max_background = priv->background_qlen; + fino.congestion_threshold = priv->congestion_threshold; + } + if (fini->minor < 9) + *priv->msg0_len_p = sizeof(*finh) + FUSE_COMPAT_WRITE_IN_SIZE; + + if (priv->use_readdirp) { + if (fini->flags & FUSE_DO_READDIRPLUS) + fino.flags |= FUSE_DO_READDIRPLUS; + } +#endif + if (priv->fopen_keep_cache == 2) { + /* If user did not explicitly set --fopen-keep-cache[=off], + then check if kernel support FUSE_AUTO_INVAL_DATA and ... + */ + + priv->fopen_keep_cache = 1; + +#if FUSE_KERNEL_MINOR_VERSION >= 20 + if (fini->flags & FUSE_AUTO_INVAL_DATA) { + /* ... enable fopen_keep_cache mode if supported. + */ + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "Detected " + "support for FUSE_AUTO_INVAL_DATA. Enabling " + "fopen_keep_cache automatically."); + + if (priv->fuse_auto_inval) + fino.flags |= FUSE_AUTO_INVAL_DATA; + } else +#endif + { + if (priv->fuse_auto_inval) { + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "No support for FUSE_AUTO_INVAL_DATA. Disabling " + "fopen_keep_cache."); + /* ... else disable. */ + priv->fopen_keep_cache = 0; + } + } + } else if (priv->fopen_keep_cache == 1) { + /* If user explicitly set --fopen-keep-cache[=on], + then enable FUSE_AUTO_INVAL_DATA if possible. + */ +#if FUSE_KERNEL_MINOR_VERSION >= 20 + if (priv->fuse_auto_inval && (fini->flags & FUSE_AUTO_INVAL_DATA)) { + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "fopen_keep_cache " + "is explicitly set. Enabling FUSE_AUTO_INVAL_DATA"); + fino.flags |= FUSE_AUTO_INVAL_DATA; + } else +#endif + { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "fopen_keep_cache " + "is explicitly set. Support for " + "FUSE_AUTO_INVAL_DATA is missing"); } - if (fini->minor < 9) - *priv->msg0_len_p = sizeof(*finh) + FUSE_COMPAT_WRITE_IN_SIZE; + } + +#if FUSE_KERNEL_MINOR_VERSION >= 22 + if (fini->flags & FUSE_ASYNC_DIO) + fino.flags |= FUSE_ASYNC_DIO; #endif - ret = send_fuse_obj (this, finh, &fino); - if (ret == 0) - gf_log ("glusterfs-fuse", GF_LOG_INFO, - "FUSE inited with protocol versions:" - " glusterfs %d.%d kernel %d.%d", - FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION, - fini->major, fini->minor); - else { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "FUSE init failed (%s)", strerror (ret)); - close (priv->fd); - } + size = sizeof(fino); +#if FUSE_KERNEL_MINOR_VERSION >= 23 + /* FUSE 7.23 and newer added attributes to the fuse_init_out struct */ + if (fini->minor < 23) { + /* reduce the size, chop off unused attributes from &fino */ + size = FUSE_COMPAT_22_INIT_OUT_SIZE; + } + + /* Writeback cache support */ + if (fini->minor >= 23) { + if (priv->kernel_writeback_cache) + fino.flags |= FUSE_WRITEBACK_CACHE; + fino.time_gran = priv->attr_times_granularity; + } +#endif + + ret = send_fuse_data(this, finh, &fino, size); + if (ret == 0) + gf_log("glusterfs-fuse", GF_LOG_INFO, + "FUSE inited with protocol versions:" + " glusterfs %d.%d kernel %d.%d", + FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION, fini->major, + fini->minor); + else { + gf_log("glusterfs-fuse", GF_LOG_ERROR, "FUSE init failed (%s)", + strerror(ret)); - out: - GF_FREE (finh); + sys_close(priv->fd); + } + +out: + GF_FREE(finh); } +static void +fuse_enosys(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + send_fuse_err(this, finh, ENOSYS); + + GF_FREE(finh); +} static void -fuse_enosys (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_destroy(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - send_fuse_err (this, finh, ENOSYS); + send_fuse_err(this, finh, 0); - GF_FREE (finh); + GF_FREE(finh); } +int +fuse_first_lookup(xlator_t *this) +{ + fuse_private_t *priv = NULL; + loc_t loc = { + 0, + }; + xlator_t *xl = NULL; + dict_t *dict = NULL; + static uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + int ret = -1; + struct iatt iatt = { + 0, + }; + + priv = this->private; + + loc.path = "/"; + loc.name = ""; + loc.inode = fuse_ino_to_inode(1, this); + gf_uuid_copy(loc.gfid, loc.inode->gfid); + loc.parent = NULL; + + dict = dict_new(); + + xl = priv->active_subvol; + + ret = dict_set_gfuuid(dict, "gfid-req", gfid, true); + if (ret) { + gf_log(xl->name, GF_LOG_ERROR, "failed to set 'gfid-req'"); + goto out; + } + + ret = syncop_lookup(xl, &loc, &iatt, NULL, dict, NULL); + DECODE_SYNCOP_ERR(ret); + if (ret < 0) { + gf_log(this->name, GF_LOG_ERROR, "first lookup on root failed (%s)", + strerror(errno)); + /* NOTE: Treat it as an error case. */ + /* goto out; */ /* commented for preventing coverity warning */ + } + /* Remove comment of above goto statement if you are adding any + more code here, before 'out' label */ -static void -fuse_destroy (xlator_t *this, fuse_in_header_t *finh, void *msg) +out: + dict_unref(dict); + inode_unref(loc.inode); + + return ret; +} + +int +fuse_nameless_lookup(xlator_t *this, xlator_t *xl, uuid_t gfid, loc_t *loc) { - send_fuse_err (this, finh, 0); + int ret = -1; + dict_t *xattr_req = NULL; + struct iatt iatt = { + 0, + }; + inode_t *linked_inode = NULL; + uint64_t ctx_value = LOOKUP_NOT_NEEDED; + + if ((loc == NULL) || (xl == NULL)) { + ret = -EINVAL; + goto out; + } + + if (loc->inode == NULL) { + loc->inode = inode_new(xl->itable); + if (loc->inode == NULL) { + ret = -ENOMEM; + goto out; + } + } + + gf_uuid_copy(loc->gfid, gfid); + + xattr_req = dict_new(); + if (xattr_req == NULL) { + ret = -ENOMEM; + goto out; + } + + ret = syncop_lookup(xl, loc, &iatt, NULL, xattr_req, NULL); + if (ret < 0) + goto out; + + linked_inode = inode_link(loc->inode, NULL, NULL, &iatt); + if (linked_inode == loc->inode) + inode_ctx_set(linked_inode, this, &ctx_value); + + inode_unref(loc->inode); + loc->inode = linked_inode; + + ret = 0; +out: + if (xattr_req != NULL) { + dict_unref(xattr_req); + } - GF_FREE (finh); + return ret; } +int +fuse_migrate_fd_open(xlator_t *this, fd_t *basefd, fd_t *oldfd, + xlator_t *old_subvol, xlator_t *new_subvol) +{ + loc_t loc = { + 0, + }; + fd_t *newfd = NULL, *old_activefd = NULL; + fuse_fd_ctx_t *basefd_ctx = NULL; + fuse_fd_ctx_t *newfd_ctx = NULL; + int ret = 0, flags = 0; + + ret = inode_path(basefd->inode, NULL, (char **)&loc.path); + if (ret < 0) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "cannot construct path of gfid (%s) failed" + "(old-subvolume:%s-%d new-subvolume:%s-%d)", + uuid_utoa(basefd->inode->gfid), old_subvol->name, + old_subvol->graph->id, new_subvol->name, new_subvol->graph->id); + goto out; + } + + gf_uuid_copy(loc.gfid, basefd->inode->gfid); + + loc.inode = inode_find(new_subvol->itable, basefd->inode->gfid); + + if (loc.inode == NULL) { + ret = fuse_nameless_lookup(this, new_subvol, basefd->inode->gfid, &loc); + if (ret < 0) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "name-less lookup of gfid (%s) failed (%s)" + "(old-subvolume:%s-%d new-subvolume:%s-%d)", + uuid_utoa(basefd->inode->gfid), strerror(-ret), + old_subvol->name, old_subvol->graph->id, new_subvol->name, + new_subvol->graph->id); + ret = -1; + goto out; + } + } + + basefd_ctx = fuse_fd_ctx_get(this, basefd); + GF_VALIDATE_OR_GOTO("glusterfs-fuse", basefd_ctx, out); + + newfd = fd_create(loc.inode, basefd->pid); + if (newfd == NULL) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "cannot create new fd, hence not migrating basefd " + "(ptr:%p inode-gfid:%s) " + "(old-subvolume:%s-%d new-subvolume:%s-%d)", + basefd, uuid_utoa(loc.inode->gfid), old_subvol->name, + old_subvol->graph->id, new_subvol->name, new_subvol->graph->id); + ret = -1; + goto out; + } + + newfd->flags = basefd->flags; + if (newfd->lk_ctx) + fd_lk_ctx_unref(newfd->lk_ctx); + + newfd->lk_ctx = fd_lk_ctx_ref(oldfd->lk_ctx); + + newfd_ctx = fuse_fd_ctx_check_n_create(this, newfd); + GF_VALIDATE_OR_GOTO("glusterfs-fuse", newfd_ctx, out); + + if (IA_ISDIR(basefd->inode->ia_type)) { + ret = syncop_opendir(new_subvol, &loc, newfd, NULL, NULL); + } else { + flags = basefd->flags & ~(O_CREAT | O_EXCL | O_TRUNC); + ret = syncop_open(new_subvol, &loc, flags, newfd, NULL, NULL); + } + + if (ret < 0) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "open on basefd (ptr:%p inode-gfid:%s) failed (%s)" + "(old-subvolume:%s-%d new-subvolume:%s-%d)", + basefd, uuid_utoa(basefd->inode->gfid), strerror(-ret), + old_subvol->name, old_subvol->graph->id, new_subvol->name, + new_subvol->graph->id); + ret = -1; + goto out; + } + + fd_bind(newfd); + + LOCK(&basefd->lock); + { + if (basefd_ctx->activefd != NULL) { + old_activefd = basefd_ctx->activefd; + } + + basefd_ctx->activefd = newfd; + } + UNLOCK(&basefd->lock); + + if (old_activefd != NULL) { + fd_unref(old_activefd); + } + + gf_log("glusterfs-fuse", GF_LOG_INFO, + "migrated basefd (%p) to newfd (%p) (inode-gfid:%s)" + "(old-subvolume:%s-%d new-subvolume:%s-%d)", + basefd, newfd, uuid_utoa(basefd->inode->gfid), old_subvol->name, + old_subvol->graph->id, new_subvol->name, new_subvol->graph->id); + + ret = 0; +out: + loc_wipe(&loc); -struct fuse_first_lookup { - pthread_mutex_t mutex; - pthread_cond_t cond; - char fin; -}; + return ret; +} int -fuse_first_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, dict_t *xattr, - struct iatt *postparent) +fuse_migrate_locks(xlator_t *this, fd_t *basefd, fd_t *oldfd, + xlator_t *old_subvol, xlator_t *new_subvol) { - struct fuse_first_lookup *stub = NULL; + int ret = -1; + dict_t *lockinfo = NULL; + void *ptr = NULL; + fd_t *newfd = NULL; + fuse_fd_ctx_t *basefd_ctx = NULL; + + if (!oldfd->lk_ctx || fd_lk_ctx_empty(oldfd->lk_ctx)) + return 0; + + basefd_ctx = fuse_fd_ctx_get(this, basefd); + GF_VALIDATE_OR_GOTO("glusterfs-fuse", basefd_ctx, out); + + LOCK(&basefd->lock); + { + newfd = fd_ref(basefd_ctx->activefd); + } + UNLOCK(&basefd->lock); + + ret = syncop_fgetxattr(old_subvol, oldfd, &lockinfo, GF_XATTR_LOCKINFO_KEY, + NULL, NULL); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "getting lockinfo failed while migrating locks" + "(oldfd:%p newfd:%p inode-gfid:%s)" + "(old-subvol:%s-%d new-subvol:%s-%d)", + oldfd, newfd, uuid_utoa(newfd->inode->gfid), old_subvol->name, + old_subvol->graph->id, new_subvol->name, new_subvol->graph->id); + ret = -1; + goto out; + } + + ret = dict_get_ptr(lockinfo, GF_XATTR_LOCKINFO_KEY, &ptr); + if (ptr == NULL) { + ret = 0; + gf_log(this->name, GF_LOG_INFO, + "No lockinfo present on any of the bricks " + "(oldfd: %p newfd:%p inode-gfid:%s) " + "(old-subvol:%s-%d new-subvol:%s-%d)", + oldfd, newfd, uuid_utoa(newfd->inode->gfid), old_subvol->name, + old_subvol->graph->id, new_subvol->name, new_subvol->graph->id); + + goto out; + } + + ret = syncop_fsetxattr(new_subvol, newfd, lockinfo, 0, NULL, NULL); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "migrating locks failed (oldfd:%p newfd:%p " + "inode-gfid:%s) (old-subvol:%s-%d new-subvol:%s-%d)", + oldfd, newfd, uuid_utoa(newfd->inode->gfid), old_subvol->name, + old_subvol->graph->id, new_subvol->name, new_subvol->graph->id); + ret = -1; + goto out; + } - stub = frame->local; +out: + if (newfd) + fd_unref(newfd); + + if (lockinfo != NULL) { + dict_unref(lockinfo); + } + + return ret; +} - if (op_ret == 0) { - gf_log (this->name, GF_LOG_TRACE, - "first lookup on root succeeded."); +int +fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol, + xlator_t *new_subvol) +{ + int ret = -1; + char create_in_progress = 0; + fuse_fd_ctx_t *basefd_ctx = NULL; + fd_t *oldfd = NULL; + dict_t *xdata = NULL; + + basefd_ctx = fuse_fd_ctx_get(this, basefd); + GF_VALIDATE_OR_GOTO("glusterfs-fuse", basefd_ctx, out); + + LOCK(&basefd->lock); + { + oldfd = basefd_ctx->activefd ? basefd_ctx->activefd : basefd; + fd_ref(oldfd); + } + UNLOCK(&basefd->lock); + + LOCK(&oldfd->inode->lock); + { + if (gf_uuid_is_null(oldfd->inode->gfid)) { + create_in_progress = 1; } else { - gf_log (this->name, GF_LOG_DEBUG, - "first lookup on root failed."); + create_in_progress = 0; } + } + UNLOCK(&oldfd->inode->lock); - pthread_mutex_lock (&stub->mutex); - { - stub->fin = 1; - pthread_cond_broadcast (&stub->cond); - } - pthread_mutex_unlock (&stub->mutex); + if (create_in_progress) { + gf_log("glusterfs-fuse", GF_LOG_INFO, + "create call on fd (%p) is in progress " + "(basefd-ptr:%p basefd-inode.gfid:%s), " + "hence deferring migration till application does an " + "fd based operation on this fd" + "(old-subvolume:%s-%d, new-subvolume:%s-%d)", + oldfd, basefd, uuid_utoa(basefd->inode->gfid), old_subvol->name, + old_subvol->graph->id, new_subvol->name, new_subvol->graph->id); - return 0; -} + ret = 0; + goto out; + } + if (oldfd->inode->table->xl == old_subvol) { + if (IA_ISDIR(oldfd->inode->ia_type)) { + ret = syncop_fsyncdir(old_subvol, oldfd, 0, NULL, NULL); + } else { + xdata = dict_new(); + if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "last-fsync set failed (%s) on fd (%p)" + "(basefd:%p basefd-inode.gfid:%s) " + "(old-subvolume:%s-%d new-subvolume:%s-%d)", + strerror(ENOMEM), oldfd, basefd, + uuid_utoa(basefd->inode->gfid), old_subvol->name, + old_subvol->graph->id, new_subvol->name, + new_subvol->graph->id); + } + + ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, xdata, NULL); + } + + if (ret < 0) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "syncop_fsync(dir) failed (%s) on fd (%p)" + "(basefd:%p basefd-inode.gfid:%s) " + "(old-subvolume:%s-%d new-subvolume:%s-%d)", + strerror(-ret), oldfd, basefd, + uuid_utoa(basefd->inode->gfid), old_subvol->name, + old_subvol->graph->id, new_subvol->name, + new_subvol->graph->id); + } + } else { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "basefd (ptr:%p inode-gfid:%s) was not " + "migrated during previous graph switch" + "(old-subvolume:%s-%d new-subvolume: %s-%d)", + basefd, basefd->inode->gfid, old_subvol->name, + old_subvol->graph->id, new_subvol->name, new_subvol->graph->id); + } + + ret = fuse_migrate_fd_open(this, basefd, oldfd, old_subvol, new_subvol); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "open corresponding to " + "basefd (ptr:%p inode-gfid:%s) in new graph failed " + "(old-subvolume:%s-%d new-subvolume:%s-%d)", + basefd, uuid_utoa(basefd->inode->gfid), old_subvol->name, + old_subvol->graph->id, new_subvol->name, new_subvol->graph->id); + goto out; + } + + ret = fuse_migrate_locks(this, basefd, oldfd, old_subvol, new_subvol); + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "migrating locks from old-subvolume (%s-%d) to " + "new-subvolume (%s-%d) failed (inode-gfid:%s oldfd:%p " + "basefd:%p)", + old_subvol->name, old_subvol->graph->id, new_subvol->name, + new_subvol->graph->id, uuid_utoa(basefd->inode->gfid), oldfd, + basefd); + } +out: + if (ret < 0) { + gf_log(this->name, GF_LOG_WARNING, + "migration of basefd " + "(ptr:%p inode-gfid:%s) failed" + "(old-subvolume:%s-%d new-subvolume:%s-%d)", + basefd, oldfd ? uuid_utoa(oldfd->inode->gfid) : NULL, + old_subvol->name, old_subvol->graph->id, new_subvol->name, + new_subvol->graph->id); + } + + fd_unref(oldfd); + + if (xdata) + dict_unref(xdata); + + return ret; +} int -fuse_first_lookup (xlator_t *this) +fuse_handle_opened_fds(xlator_t *this, xlator_t *old_subvol, + xlator_t *new_subvol) { - fuse_private_t *priv = NULL; - loc_t loc = {0, }; - call_frame_t *frame = NULL; - xlator_t *xl = NULL; - dict_t *dict = NULL; - struct fuse_first_lookup stub; - uuid_t gfid; - int ret; + fuse_private_t *priv = NULL; + fdentry_t *fdentries = NULL; + uint32_t count = 0; + fdtable_t *fdtable = NULL; + int i = 0; + fd_t *fd = NULL; + int32_t ret = 0; + fuse_fd_ctx_t *fdctx = NULL; + + priv = this->private; + + fdtable = priv->fdtable; + + fdentries = gf_fd_fdtable_copy_all_fds(fdtable, &count); + if (fdentries != NULL) { + for (i = 0; i < count; i++) { + fd = fdentries[i].fd; + if (fd == NULL) + continue; + + ret = fuse_migrate_fd(this, fd, old_subvol, new_subvol); + + fdctx = fuse_fd_ctx_get(this, fd); + if (fdctx) { + LOCK(&fd->lock); + { + if (ret < 0) { + fdctx->migration_failed = 1; + } else { + fdctx->migration_failed = 0; + } + } + UNLOCK(&fd->lock); + } + } + + for (i = 0; i < count; i++) { + fd = fdentries[i].fd; + if (fd) + fd_unref(fd); + } - priv = this->private; + GF_FREE(fdentries); + } - loc.path = "/"; - loc.name = ""; - loc.inode = fuse_ino_to_inode (1, this); - uuid_copy (loc.gfid, loc.inode->gfid); - loc.parent = NULL; + return 0; +} - dict = dict_new (); - frame = create_frame (this, this->ctx->pool); - frame->root->type = GF_OP_TYPE_FOP; +static int +fuse_handle_blocked_locks(xlator_t *this, xlator_t *old_subvol, + xlator_t *new_subvol) +{ + return 0; +} - xl = priv->active_subvol; +static int +fuse_graph_switch_task(void *data) +{ + fuse_graph_switch_args_t *args = NULL; - pthread_mutex_init (&stub.mutex, NULL); - pthread_cond_init (&stub.cond, NULL); - stub.fin = 0; + args = data; + if (args == NULL) { + goto out; + } - frame->local = &stub; + /* don't change the order of handling open fds and blocked locks, since + * the act of opening files also reacquires granted locks in new graph. + */ + fuse_handle_opened_fds(args->this, args->old_subvol, args->new_subvol); - memset (gfid, 0, 16); - gfid[15] = 1; - ret = dict_set_static_bin (dict, "gfid-req", gfid, 16); - if (ret) - gf_log (xl->name, GF_LOG_ERROR, "failed to set 'gfid-req'"); + fuse_handle_blocked_locks(args->this, args->old_subvol, args->new_subvol); - STACK_WIND (frame, fuse_first_lookup_cbk, xl, xl->fops->lookup, - &loc, dict); - dict_unref (dict); +out: + return 0; +} - pthread_mutex_lock (&stub.mutex); - { - while (!stub.fin) { - pthread_cond_wait (&stub.cond, &stub.mutex); - } - } - pthread_mutex_unlock (&stub.mutex); +fuse_graph_switch_args_t * +fuse_graph_switch_args_alloc(void) +{ + fuse_graph_switch_args_t *args = NULL; - pthread_mutex_destroy (&stub.mutex); - pthread_cond_destroy (&stub.cond); + args = GF_CALLOC(1, sizeof(*args), gf_fuse_mt_graph_switch_args_t); + if (args == NULL) { + goto out; + } - frame->local = NULL; - STACK_DESTROY (frame->root); +out: + return args; +} - return 0; +void +fuse_graph_switch_args_destroy(fuse_graph_switch_args_t *args) +{ + if (args == NULL) { + goto out; + } + + GF_FREE(args); +out: + return; } +int +fuse_handle_graph_switch(xlator_t *this, xlator_t *old_subvol, + xlator_t *new_subvol) +{ + call_frame_t *frame = NULL; + int32_t ret = -1; + fuse_graph_switch_args_t *args = NULL; + + frame = create_frame(this, this->ctx->pool); + if (frame == NULL) { + goto out; + } + + args = fuse_graph_switch_args_alloc(); + if (args == NULL) { + goto out; + } + + args->this = this; + args->old_subvol = old_subvol; + args->new_subvol = new_subvol; + + ret = synctask_new(this->ctx->env, fuse_graph_switch_task, NULL, frame, + args); + if (ret == -1) { + gf_log(this->name, GF_LOG_WARNING, + "starting sync-task to " + "handle graph switch failed"); + goto out; + } + + ret = 0; +out: + if (args != NULL) { + fuse_graph_switch_args_destroy(args); + } + + if (frame != NULL) { + STACK_DESTROY(frame->root); + } + + return ret; +} int -fuse_graph_sync (xlator_t *this) +fuse_graph_sync(xlator_t *this) { - fuse_private_t *priv = NULL; - int need_first_lookup = 0; - int ret = 0; + fuse_private_t *priv = NULL; + int need_first_lookup = 0; + int ret = 0; + int new_graph_id = 0; + xlator_t *old_subvol = NULL, *new_subvol = NULL; + uint64_t winds_on_old_subvol = 0; + + priv = this->private; + + pthread_mutex_lock(&priv->sync_mutex); + { + if (!priv->next_graph) + goto unlock; + + old_subvol = priv->active_subvol; + new_subvol = priv->active_subvol = priv->next_graph->top; + new_graph_id = priv->next_graph->id; + priv->next_graph = NULL; + need_first_lookup = 1; + if (old_subvol) { + priv->handle_graph_switch = _gf_true; + } + + while (!priv->event_recvd) { + ret = pthread_cond_wait(&priv->sync_cond, &priv->sync_mutex); + if (ret != 0) { + gf_log(this->name, GF_LOG_DEBUG, + "timedwait returned non zero value " + "ret: %d errno: %d", + ret, errno); + break; + } + } + } +unlock: + pthread_mutex_unlock(&priv->sync_mutex); - priv = this->private; + if (need_first_lookup) { + gf_log("fuse", GF_LOG_INFO, "switched to graph %d", new_graph_id); + fuse_first_lookup(this); + } - pthread_mutex_lock (&priv->sync_mutex); + if ((old_subvol != NULL) && (new_subvol != NULL)) { + fuse_handle_graph_switch(this, old_subvol, new_subvol); + + pthread_mutex_lock(&priv->sync_mutex); { - if (!priv->next_graph) - goto unlock; - - priv->active_subvol = priv->next_graph->top; - priv->next_graph = NULL; - need_first_lookup = 1; - - while (!priv->event_recvd) { - ret = pthread_cond_wait (&priv->sync_cond, - &priv->sync_mutex); - if (ret != 0) { - gf_log (this->name, GF_LOG_DEBUG, - "timedwait returned non zero value " - "ret: %d errno: %d", ret, errno); - break; - } - } + old_subvol->switched = 1; + winds_on_old_subvol = old_subvol->winds; + priv->handle_graph_switch = _gf_false; + pthread_cond_broadcast(&priv->migrate_cond); } -unlock: - pthread_mutex_unlock (&priv->sync_mutex); + pthread_mutex_unlock(&priv->sync_mutex); - if (need_first_lookup) { - fuse_first_lookup (this); + if (winds_on_old_subvol == 0) { + xlator_notify(old_subvol, GF_EVENT_PARENT_DOWN, old_subvol, NULL); } + } - return 0; + return 0; } +int +fuse_get_mount_status(xlator_t *this) +{ + int kid_status = -1; + fuse_private_t *priv = this->private; + + if (sys_read(priv->status_pipe[0], &kid_status, sizeof(kid_status)) < 0) { + gf_log(this->name, GF_LOG_ERROR, "could not get mount status"); + kid_status = -1; + } + gf_log(this->name, GF_LOG_DEBUG, "mount status is %d", kid_status); + + sys_close(priv->status_pipe[0]); + sys_close(priv->status_pipe[1]); + return kid_status; +} -static void * -fuse_thread_proc (void *data) -{ - char *mount_point = NULL; - xlator_t *this = NULL; - fuse_private_t *priv = NULL; - ssize_t res = 0; - struct iobuf *iobuf = NULL; - fuse_in_header_t *finh; - struct iovec iov_in[2]; - void *msg = NULL; - const size_t msg0_size = sizeof (*finh) + 128; - fuse_handler_t **fuse_ops = NULL; - - this = data; - priv = this->private; - fuse_ops = priv->fuse_ops; +static void +fuse_dispatch(xlator_t *xl, gf_async_t *async) +{ + fuse_async_t *fasync; + fuse_private_t *priv; + fuse_in_header_t *finh; + struct iobuf *iobuf; - THIS = this; + priv = xl->private; + fasync = caa_container_of(async, fuse_async_t, async); + finh = fasync->finh; + iobuf = fasync->iobuf; - iov_in[0].iov_len = sizeof (*finh) + sizeof (struct fuse_write_in); - iov_in[1].iov_len = ((struct iobuf_pool *)this->ctx->iobuf_pool) - ->default_page_size; - priv->msg0_len_p = &iov_in[0].iov_len; - - for (;;) { - /* THIS has to be reset here */ - THIS = this; - - if (priv->init_recvd) - fuse_graph_sync (this); - - iobuf = iobuf_get (this->ctx->iobuf_pool); - /* Add extra 128 byte to the first iov so that it can - * accommodate "ordinary" non-write requests. It's not - * guaranteed to be big enough, as SETXATTR and namespace - * operations with very long names may grow behind it, - * but it's good enough in most cases (and we can handle - * rest via realloc). - */ - iov_in[0].iov_base = GF_CALLOC (1, msg0_size, - gf_fuse_mt_iov_base); - - if (!iobuf || !iov_in[0].iov_base) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory"); - if (iobuf) - iobuf_unref (iobuf); - GF_FREE (iov_in[0].iov_base); - sleep (10); - continue; - } + priv->fuse_ops[finh->opcode](xl, finh, fasync->msg, iobuf); - iov_in[1].iov_base = iobuf->ptr; + iobuf_unref(iobuf); +} - res = readv (priv->fd, iov_in, 2); +/* We need 512 extra buffer size for BATCH_FORGET fop. By tests, it is + * found to be reduces 'REALLOC()' in the loop */ +#define FUSE_EXTRA_ALLOC 512 - if (res == -1) { - if (errno == ENODEV || errno == EBADF) { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "terminating upon getting %s when " - "reading /dev/fuse", - errno == ENODEV ? "ENODEV" : "EBADF"); +static void * +fuse_thread_proc(void *data) +{ + char *mount_point = NULL; + xlator_t *this = NULL; + fuse_private_t *priv = NULL; + ssize_t res = 0; + struct iobuf *iobuf = NULL; + fuse_in_header_t *finh = NULL; + struct iovec iov_in[2] = { + { + 0, + }, + }; - break; - } - if (errno != EINTR) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "read from /dev/fuse returned -1 (%s)", - strerror (errno)); - } + void *msg = NULL; + size_t msg0_size = sizeof(*finh) + sizeof(struct fuse_write_in); + fuse_async_t *fasync; + struct pollfd pfd[2] = {{ + 0, + }}; + uint32_t psize; + + this = data; + priv = this->private; + + THIS = this; - goto cont_err; + psize = ((struct iobuf_pool *)this->ctx->iobuf_pool)->default_page_size; + priv->msg0_len_p = &msg0_size; + + for (;;) { + /* THIS has to be reset here */ + THIS = this; + + pthread_mutex_lock(&priv->sync_mutex); + { + if (!priv->mount_finished) { + memset(pfd, 0, sizeof(pfd)); + pfd[0].fd = priv->status_pipe[0]; + pfd[0].events = POLLIN | POLLHUP | POLLERR; + pfd[1].fd = priv->fd; + pfd[1].events = POLLIN | POLLHUP | POLLERR; + if (poll(pfd, 2, -1) < 0) { + gf_log(this->name, GF_LOG_ERROR, "poll error %s", + strerror(errno)); + pthread_mutex_unlock(&priv->sync_mutex); + break; } - if (res < sizeof (finh)) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "short read on /dev/fuse"); + if (pfd[0].revents & POLLIN) { + if (fuse_get_mount_status(this) != 0) { + pthread_mutex_unlock(&priv->sync_mutex); break; + } + priv->mount_finished = _gf_true; + } else if (pfd[0].revents) { + gf_log(this->name, GF_LOG_ERROR, + "mount pipe closed without status"); + pthread_mutex_unlock(&priv->sync_mutex); + break; } + if (!pfd[1].revents) { + pthread_mutex_unlock(&priv->sync_mutex); + continue; + } + } + } + pthread_mutex_unlock(&priv->sync_mutex); - finh = (fuse_in_header_t *)iov_in[0].iov_base; + /* + * We don't want to block on readv while we're still waiting + * for mount status. That means we only want to get here if + * mount_status is true (meaning that our wait completed + * already) or if we already called poll(2) on priv->fd to + * make sure it's ready. + */ - if (res != finh->len -#ifdef GF_DARWIN_HOST_OS - /* work around fuse4bsd/MacFUSE msg size miscalculation bug, - * that is, payload size is not taken into account for - * buffered writes + if (priv->init_recvd) + fuse_graph_sync(this); + + /* TODO: This place should always get maximum supported buffer + size from 'fuse', which is as of today 128KB. If we bring in + support for higher block sizes support, then we should be + changing this one too */ + iobuf = iobuf_get(this->ctx->iobuf_pool); + + /* Add extra 512 byte to the first iov so that it can + * accommodate "ordinary" non-write requests. It's not + * guaranteed to be big enough, as SETXATTR and namespace + * operations with very long names may grow behind it, + * but it's good enough in most cases (and we can handle + * rest via realloc). */ + iov_in[0].iov_base = GF_MALLOC( + sizeof(fuse_async_t) + msg0_size + FUSE_EXTRA_ALLOC, + gf_fuse_mt_iov_base); + + if (!iobuf || !iov_in[0].iov_base) { + gf_log(this->name, GF_LOG_ERROR, "Out of memory"); + if (iobuf) + iobuf_unref(iobuf); + GF_FREE(iov_in[0].iov_base); + sleep(10); + continue; + } + + iov_in[1].iov_base = iobuf->ptr; + + iov_in[0].iov_len = msg0_size; + iov_in[1].iov_len = psize; + + res = sys_readv(priv->fd, iov_in, 2); + + if (res == -1) { + if (errno == ENODEV || errno == EBADF) { + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "terminating upon getting %s when " + "reading /dev/fuse", + errno == ENODEV ? "ENODEV" : "EBADF"); + fuse_log_eh(this, + "glusterfs-fuse: terminating" + " upon getting %s when " + "reading /dev/fuse", + errno == ENODEV ? "ENODEV" : "EBADF"); + break; + } + if (errno != EINTR) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "read from /dev/fuse returned -1 (%s)", strerror(errno)); + fuse_log_eh(this, + "glusterfs-fuse: read from " + "/dev/fuse returned -1 (%s)", + strerror(errno)); + if (errno == EPERM) { + /* + * sleep a while to avoid busy looping + * on EPERM condition */ - && !(finh->opcode == FUSE_WRITE && - finh->len == sizeof(*finh) + sizeof(struct fuse_write_in) && - res == finh->len + ((struct fuse_write_in *)(finh + 1))->size) -#endif - ) { - gf_log ("glusterfs-fuse", GF_LOG_WARNING, - "inconsistent read on /dev/fuse"); - break; + nanosleep( + &(struct timespec){0, + priv->fuse_dev_eperm_ratelimit_ns}, + NULL); } + } - priv->iobuf = iobuf; - - if (finh->opcode == FUSE_WRITE) - msg = iov_in[1].iov_base; - else { - if (res > msg0_size) { - iov_in[0].iov_base = - GF_REALLOC (iov_in[0].iov_base, res); - if (iov_in[0].iov_base) - finh = (fuse_in_header_t *) - iov_in[0].iov_base; - else { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "Out of memory"); - send_fuse_err (this, finh, ENOMEM); - - goto cont_err; - } - } - - if (res > iov_in[0].iov_len) - memcpy (iov_in[0].iov_base + iov_in[0].iov_len, - iov_in[1].iov_base, - res - iov_in[0].iov_len); + goto cont_err; + } + if (res < sizeof(*finh)) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, "short read on /dev/fuse"); + fuse_log_eh(this, + "glusterfs-fuse: short read on " + "/dev/fuse"); + break; + } - msg = finh + 1; - } - if (priv->uid_map_root && - finh->uid == priv->uid_map_root) - finh->uid = 0; + finh = (fuse_in_header_t *)iov_in[0].iov_base; + if (res != finh->len #ifdef GF_DARWIN_HOST_OS - if (finh->opcode >= FUSE_OP_HIGH) - /* turn down MacFUSE specific messages */ - fuse_enosys (this, finh, msg); - else + /* work around fuse4bsd/MacFUSE msg size miscalculation bug, + * that is, payload size is not taken into account for + * buffered writes + */ + && !(finh->opcode == FUSE_WRITE && + finh->len == sizeof(*finh) + sizeof(struct fuse_write_in) && + res == finh->len + ((struct fuse_write_in *)(finh + 1))->size) #endif - fuse_ops[finh->opcode] (this, finh, msg); + ) { + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "inconsistent read on /dev/fuse"); + fuse_log_eh(this, + "glusterfs-fuse: inconsistent read " + "on /dev/fuse"); + break; + } - iobuf_unref (iobuf); - continue; + /* + * This can be moved around a bit, but it's important to do it + * *after* the readv. Otherwise, a graph switch could occur + * while we're in readv and we'll process the next request on + * the old graph before we come to the part of the loop above + * readv and check again. That would be wrong. + */ + if (priv->init_recvd) + fuse_graph_sync(this); - cont_err: - iobuf_unref (iobuf); - GF_FREE (iov_in[0].iov_base); - } + if (finh->opcode == FUSE_WRITE) + msg = iov_in[1].iov_base; + else { + if (res > msg0_size + FUSE_EXTRA_ALLOC) { + void *b = GF_REALLOC(iov_in[0].iov_base, + sizeof(fuse_async_t) + res); + if (b) { + iov_in[0].iov_base = b; + finh = (fuse_in_header_t *)iov_in[0].iov_base; + } else { + gf_log("glusterfs-fuse", GF_LOG_ERROR, "Out of memory"); + send_fuse_err(this, finh, ENOMEM); - iobuf_unref (iobuf); - GF_FREE (iov_in[0].iov_base); + goto cont_err; + } + } - if (dict_get (this->options, ZR_MOUNTPOINT_OPT)) - mount_point = data_to_str (dict_get (this->options, - ZR_MOUNTPOINT_OPT)); - if (mount_point) { - gf_log (this->name, GF_LOG_INFO, - "unmounting %s", mount_point); - dict_del (this->options, ZR_MOUNTPOINT_OPT); - } + if (res > iov_in[0].iov_len) { + memcpy(iov_in[0].iov_base + iov_in[0].iov_len, + iov_in[1].iov_base, res - iov_in[0].iov_len); + iov_in[0].iov_len = res; + } - kill (getpid(), SIGTERM); + msg = finh + 1; + } + if (priv->uid_map_root && finh->uid == priv->uid_map_root) + finh->uid = 0; - return NULL; + if (finh->opcode >= FUSE_OP_HIGH) { + /* turn down MacFUSE specific messages */ + fuse_enosys(this, finh, msg, NULL); + iobuf_unref(iobuf); + } else { + fasync = iov_in[0].iov_base + iov_in[0].iov_len; + fasync->finh = finh; + fasync->msg = msg; + fasync->iobuf = iobuf; + gf_async(&fasync->async, this, fuse_dispatch); + } + + continue; + + cont_err: + iobuf_unref(iobuf); + GF_FREE(iov_in[0].iov_base); + iov_in[0].iov_base = NULL; + } + + if (iov_in[0].iov_base) + GF_FREE(iov_in[0].iov_base); + + /* + * We could be in all sorts of states with respect to iobuf and iov_in + * by the time we get here, and it's just not worth untangling them if + * we're about to kill ourselves anyway. + */ + + if (dict_get(this->options, ZR_MOUNTPOINT_OPT)) + mount_point = data_to_str(dict_get(this->options, ZR_MOUNTPOINT_OPT)); + if (mount_point) { + gf_log(this->name, GF_LOG_INFO, "initiating unmount of %s", + mount_point); + } + + /* Kill the whole process, not just this thread. */ + kill(getpid(), SIGTERM); + return NULL; } - int32_t -fuse_itable_dump (xlator_t *this) +fuse_itable_dump(xlator_t *this) { - if (!this) - return -1; + fuse_private_t *priv = NULL; - gf_proc_dump_add_section("xlator.mount.fuse.itable"); - inode_table_dump(this->itable, "xlator.mount.fuse.itable"); + if (!this) + return -1; - return 0; + priv = this->private; + + if (priv && priv->active_subvol) { + gf_proc_dump_add_section("xlator.mount.fuse.itable"); + inode_table_dump(priv->active_subvol->itable, + "xlator.mount.fuse.itable"); + } + return 0; } int32_t -fuse_priv_dump (xlator_t *this) -{ - fuse_private_t *private = NULL; - - if (!this) - return -1; - - private = this->private; - - if (!private) - return -1; - - gf_proc_dump_add_section("xlator.mount.fuse.priv"); - - gf_proc_dump_write("fd", "%d", private->fd); - gf_proc_dump_write("proto_minor", "%u", - private->proto_minor); - gf_proc_dump_write("volfile", "%s", - private->volfile?private->volfile:"None"); - gf_proc_dump_write("volfile_size", "%d", - private->volfile_size); - gf_proc_dump_write("mount_point", "%s", - private->mount_point); - gf_proc_dump_write("iobuf", "%u", - private->iobuf); - gf_proc_dump_write("fuse_thread_started", "%d", - (int)private->fuse_thread_started); - gf_proc_dump_write("direct_io_mode", "%d", - private->direct_io_mode); - gf_proc_dump_write("entry_timeout", "%lf", - private->entry_timeout); - gf_proc_dump_write("attribute_timeout", "%lf", - private->attribute_timeout); - gf_proc_dump_write("init_recvd", "%d", - (int)private->init_recvd); - gf_proc_dump_write("strict_volfile_check", "%d", - (int)private->strict_volfile_check); - gf_proc_dump_write("reverse_thread_started", "%d", - (int)private->reverse_fuse_thread_started); +fuse_priv_dump(xlator_t *this) +{ + fuse_private_t *private = NULL; - return 0; + if (!this) + return -1; + + private + = this->private; + + if (!private) + return -1; + + gf_proc_dump_add_section("xlator.mount.fuse.priv"); + + gf_proc_dump_write("fd", "%d", private->fd); + gf_proc_dump_write("proto_minor", "%u", private->proto_minor); + gf_proc_dump_write("volfile", "%s", + private->volfile ? private->volfile : "None"); + gf_proc_dump_write("volfile_size", "%" GF_PRI_SIZET, private->volfile_size); + gf_proc_dump_write("mount_point", "%s", private->mount_point); + gf_proc_dump_write("fuse_thread_started", "%d", + (int)private->fuse_thread_started); + gf_proc_dump_write("direct_io_mode", "%d", private->direct_io_mode); + gf_proc_dump_write("entry_timeout", "%lf", private->entry_timeout); + gf_proc_dump_write("attribute_timeout", "%lf", private->attribute_timeout); + gf_proc_dump_write("init_recvd", "%d", (int)private->init_recvd); + gf_proc_dump_write("strict_volfile_check", "%d", + (int)private->strict_volfile_check); + gf_proc_dump_write("timed_response_thread_started", "%d", + (int)private->timed_response_fuse_thread_started); + gf_proc_dump_write("reverse_thread_started", "%d", + (int)private->reverse_fuse_thread_started); + gf_proc_dump_write("invalidate_limit", "%u", private->invalidate_limit); + gf_proc_dump_write("invalidate_queue_length", "%" PRIu64, + private->invalidate_count); + gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp); + + return 0; } +int +fuse_history_dump(xlator_t *this) +{ + int ret = -1; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; + fuse_private_t *priv = this->private; + + GF_VALIDATE_OR_GOTO("fuse", this, out); + + if (!priv->event_history) + goto out; + + GF_VALIDATE_OR_GOTO(this->name, this->history, out); + + gf_proc_dump_build_key(key_prefix, "xlator.mount.fuse", "history"); + gf_proc_dump_add_section("%s", key_prefix); + eh_dump(this->history, NULL, dump_history_fuse); + + ret = 0; +out: + return ret; +} int -fuse_graph_setup (xlator_t *this, glusterfs_graph_t *graph) +dump_history_fuse(circular_buffer_t *cb, void *data) { - inode_table_t *itable = NULL; - int ret = 0; - fuse_private_t *priv = NULL; + char timestr[GF_TIMESTR_SIZE] = { + 0, + }; - priv = this->private; + gf_time_fmt_tv(timestr, sizeof timestr, &cb->tv, gf_timefmt_F_HMS); - /* handle the case of more than one CHILD_UP on same graph */ - if (priv->active_subvol == graph->top) - return 0; /* This is a valid case */ + gf_proc_dump_write("TIME", "%s", timestr); - if (graph->used) - return 0; + gf_proc_dump_write("message", "%s\n", (char *)cb->data); - graph->used = 1; + return 0; +} - itable = inode_table_new (0, graph->top); - if (!itable) - return -1; +int +fuse_graph_setup(xlator_t *this, glusterfs_graph_t *graph) +{ + inode_table_t *itable = NULL; + int ret = 0, winds = 0; + fuse_private_t *priv = NULL; + glusterfs_graph_t *prev_graph = NULL; + + priv = this->private; + + pthread_mutex_lock(&priv->sync_mutex); + { + /* 1. handle the case of more than one CHILD_UP on same graph. + * 2. make sure graph is newer than current active_subvol. + */ + if ((priv->active_subvol == graph->top) || graph->used || + ((priv->active_subvol) && + (priv->active_subvol->graph->id > graph->id))) { + goto unlock; + } + +#if FUSE_KERNEL_MINOR_VERSION >= 11 + itable = inode_table_with_invalidator(priv->lru_limit, graph->top, + fuse_inode_invalidate_fn, this); +#else + itable = inode_table_new(0, graph->top); +#endif + if (!itable) { + ret = -1; + goto unlock; + } ((xlator_t *)graph->top)->itable = itable; - pthread_mutex_lock (&priv->sync_mutex); - { - priv->next_graph = graph; - priv->event_recvd = 0; + prev_graph = priv->next_graph; - pthread_cond_signal (&priv->sync_cond); + if ((prev_graph != NULL) && (prev_graph->id > graph->id)) { + /* there was a race and an old graph was initialised + * before new one. + */ + prev_graph = graph; + } else { + priv->next_graph = graph; + priv->event_recvd = 0; } - pthread_mutex_unlock (&priv->sync_mutex); - gf_log ("fuse", GF_LOG_INFO, "switched to graph %d", - ((graph) ? graph->id : 0)); + if (prev_graph != NULL) + winds = ((xlator_t *)prev_graph->top)->winds; - return ret; -} + /* set post initializing next_graph i to preserve + * critical section update and bails on error */ + graph->used = 1; + } + pthread_mutex_unlock(&priv->sync_mutex); + + if ((prev_graph != NULL) && (winds == 0)) { + xlator_notify(prev_graph->top, GF_EVENT_PARENT_DOWN, prev_graph->top, + NULL); + } + return ret; +unlock: + pthread_mutex_unlock(&priv->sync_mutex); + + return ret; +} int -notify (xlator_t *this, int32_t event, void *data, ...) +notify(xlator_t *this, int32_t event, void *data, ...) { - int32_t ret = 0; - fuse_private_t *private = NULL; - glusterfs_graph_t *graph = NULL; + int i = 0; + int32_t ret = 0; + fuse_private_t *private = NULL; + gf_boolean_t start_thread = _gf_false; + glusterfs_graph_t *graph = NULL; + struct pollfd pfd = {0}; - private = this->private; + private + = this->private; - graph = data; + graph = data; - gf_log ("fuse", GF_LOG_DEBUG, "got event %d on graph %d", - event, ((graph) ? graph->id : 0)); + gf_log("fuse", GF_LOG_DEBUG, "got event %d on graph %d", event, + ((graph) ? graph->id : 0)); - switch (event) - { + switch (event) { case GF_EVENT_GRAPH_NEW: - break; + break; case GF_EVENT_CHILD_UP: case GF_EVENT_CHILD_DOWN: - case GF_EVENT_CHILD_CONNECTING: - { - if (graph) { - ret = fuse_graph_setup (this, graph); - if (ret) - gf_log (this->name, GF_LOG_WARNING, - "failed to setup the graph"); - } - - if ((event == GF_EVENT_CHILD_UP) - || (event == GF_EVENT_CHILD_DOWN)) { - pthread_mutex_lock (&private->sync_mutex); - { - private->event_recvd = 1; - pthread_cond_broadcast (&private->sync_cond); - } - pthread_mutex_unlock (&private->sync_mutex); + case GF_EVENT_CHILD_CONNECTING: { + if (graph) { + ret = fuse_graph_setup(this, graph); + if (ret) + gf_log(this->name, GF_LOG_WARNING, + "failed to setup the graph"); + } + + if ((event == GF_EVENT_CHILD_UP) || + (event == GF_EVENT_CHILD_DOWN)) { + pthread_mutex_lock(&private->sync_mutex); + { + private + ->event_recvd = 1; + pthread_cond_broadcast(&private->sync_cond); } + pthread_mutex_unlock(&private->sync_mutex); + } + pthread_mutex_lock(&private->sync_mutex); + { if (!private->fuse_thread_started) { - private->fuse_thread_started = 1; - - ret = pthread_create (&private->fuse_thread, NULL, - fuse_thread_proc, this); - if (ret != 0) { - gf_log (this->name, GF_LOG_DEBUG, - "pthread_create() failed (%s)", - strerror (errno)); - break; + private + ->fuse_thread_started = 1; + start_thread = _gf_true; + } + } + pthread_mutex_unlock(&private->sync_mutex); + + if (start_thread) { + private + ->fuse_thread = GF_CALLOC(private->reader_thread_count, + sizeof(pthread_t), + gf_fuse_mt_pthread_t); + for (i = 0; i < private->reader_thread_count; i++) { + ret = gf_thread_create(&private->fuse_thread[i], NULL, + fuse_thread_proc, this, "fuseproc"); + if (ret != 0) { + gf_log(this->name, GF_LOG_DEBUG, + "pthread_create() failed (%s)", strerror(errno)); + break; + } + } + } + + break; + } + + case GF_EVENT_AUTH_FAILED: { + /* Authentication failure is an error and glusterfs should stop */ + gf_log(this->name, GF_LOG_ERROR, + "Server authenication failed. Shutting down."); + pthread_mutex_lock(&private->sync_mutex); + { + /*Wait for mount to finish*/ + if (!private->mount_finished) { + pfd.fd = private->status_pipe[0]; + pfd.events = POLLIN | POLLHUP | POLLERR; + if (poll(&pfd, 1, -1) < 0) { + gf_log(this->name, GF_LOG_ERROR, "poll error %s", + strerror(errno)); + goto auth_fail_unlock; + } + if (pfd.revents & POLLIN) { + if (fuse_get_mount_status(this) != 0) { + goto auth_fail_unlock; } + private + ->mount_finished = _gf_true; + } else if (pfd.revents) { + gf_log(this->name, GF_LOG_ERROR, + "mount pipe closed without status"); + goto auth_fail_unlock; + } } - - break; + } + auth_fail_unlock: + pthread_mutex_unlock(&private->sync_mutex); + fini(this); + break; } default: - break; - } + break; + } - return ret; + return ret; } int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; - - if (!this) - return ret; + int ret = -1; - ret = xlator_mem_acct_init (this, gf_fuse_mt_end + 1); + if (!this) + return ret; - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - return ret; - } + ret = xlator_mem_acct_init(this, gf_fuse_mt_end + 1); + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "Memory accounting init" + "failed"); return ret; -} + } + return ret; +} static fuse_handler_t *fuse_std_ops[FUSE_OP_HIGH] = { - [FUSE_INIT] = fuse_init, - [FUSE_DESTROY] = fuse_destroy, - [FUSE_LOOKUP] = fuse_lookup, - [FUSE_FORGET] = fuse_forget, - [FUSE_GETATTR] = fuse_getattr, - [FUSE_SETATTR] = fuse_setattr, - [FUSE_OPENDIR] = fuse_opendir, - [FUSE_READDIR] = fuse_readdir, - [FUSE_RELEASEDIR] = fuse_releasedir, - [FUSE_ACCESS] = fuse_access, - [FUSE_READLINK] = fuse_readlink, - [FUSE_MKNOD] = fuse_mknod, - [FUSE_MKDIR] = fuse_mkdir, - [FUSE_UNLINK] = fuse_unlink, - [FUSE_RMDIR] = fuse_rmdir, - [FUSE_SYMLINK] = fuse_symlink, - [FUSE_RENAME] = fuse_rename, - [FUSE_LINK] = fuse_link, - [FUSE_CREATE] = fuse_create, - [FUSE_OPEN] = fuse_open, - [FUSE_READ] = fuse_readv, - [FUSE_WRITE] = fuse_write, - [FUSE_FLUSH] = fuse_flush, - [FUSE_RELEASE] = fuse_release, - [FUSE_FSYNC] = fuse_fsync, - [FUSE_FSYNCDIR] = fuse_fsyncdir, - [FUSE_STATFS] = fuse_statfs, - [FUSE_SETXATTR] = fuse_setxattr, - [FUSE_GETXATTR] = fuse_getxattr, - [FUSE_LISTXATTR] = fuse_listxattr, - [FUSE_REMOVEXATTR] = fuse_removexattr, - [FUSE_GETLK] = fuse_getlk, - [FUSE_SETLK] = fuse_setlk, - [FUSE_SETLKW] = fuse_setlk, -}; + [FUSE_LOOKUP] = fuse_lookup, + [FUSE_FORGET] = fuse_forget, + [FUSE_GETATTR] = fuse_getattr, + [FUSE_SETATTR] = fuse_setattr, + [FUSE_READLINK] = fuse_readlink, + [FUSE_SYMLINK] = fuse_symlink, + [FUSE_MKNOD] = fuse_mknod, + [FUSE_MKDIR] = fuse_mkdir, + [FUSE_UNLINK] = fuse_unlink, + [FUSE_RMDIR] = fuse_rmdir, + [FUSE_RENAME] = fuse_rename, + [FUSE_LINK] = fuse_link, + [FUSE_OPEN] = fuse_open, + [FUSE_READ] = fuse_readv, + [FUSE_WRITE] = fuse_write, + [FUSE_STATFS] = fuse_statfs, + [FUSE_RELEASE] = fuse_release, + [FUSE_FSYNC] = fuse_fsync, + [FUSE_SETXATTR] = fuse_setxattr, + [FUSE_GETXATTR] = fuse_getxattr, + [FUSE_LISTXATTR] = fuse_listxattr, + [FUSE_REMOVEXATTR] = fuse_removexattr, + [FUSE_FLUSH] = fuse_flush, + [FUSE_INIT] = fuse_init, + [FUSE_OPENDIR] = fuse_opendir, + [FUSE_READDIR] = fuse_readdir, + [FUSE_RELEASEDIR] = fuse_releasedir, + [FUSE_FSYNCDIR] = fuse_fsyncdir, + [FUSE_GETLK] = fuse_getlk, + [FUSE_SETLK] = fuse_setlk, + [FUSE_SETLKW] = fuse_setlk, + [FUSE_ACCESS] = fuse_access, + [FUSE_CREATE] = fuse_create, + [FUSE_INTERRUPT] = fuse_interrupt, + /* [FUSE_BMAP] */ + [FUSE_DESTROY] = fuse_destroy, +/* [FUSE_IOCTL] */ +/* [FUSE_POLL] */ +/* [FUSE_NOTIFY_REPLY] */ + +#if FUSE_KERNEL_MINOR_VERSION >= 16 + [FUSE_BATCH_FORGET] = fuse_batch_forget, +#endif +#if FUSE_KERNEL_MINOR_VERSION >= 19 +#ifdef FALLOC_FL_KEEP_SIZE + [FUSE_FALLOCATE] = fuse_fallocate, +#endif /* FALLOC_FL_KEEP_SIZE */ +#endif + +#if FUSE_KERNEL_MINOR_VERSION >= 21 + [FUSE_READDIRPLUS] = fuse_readdirp, +#endif -static fuse_handler_t *fuse_dump_ops[FUSE_OP_HIGH] = { +#if FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE + [FUSE_LSEEK] = fuse_lseek, +#endif + +#if FUSE_KERNEL_MINOR_VERSION >= 28 + [FUSE_COPY_FILE_RANGE] = fuse_copy_file_range, +#endif }; +static fuse_handler_t *fuse_dump_ops[FUSE_OP_HIGH]; static void -fuse_dumper (xlator_t *this, fuse_in_header_t *finh, void *msg) +fuse_dumper(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) { - fuse_private_t *priv = NULL; - struct iovec diov[3]; - char r = 'R'; - int ret = 0; - - priv = this->private; - - diov[0].iov_base = &r; - diov[0].iov_len = 1; - diov[1].iov_base = finh; - diov[1].iov_len = sizeof (*finh); - diov[2].iov_base = msg; - diov[2].iov_len = finh->len - sizeof (*finh); - - pthread_mutex_lock (&priv->fuse_dump_mutex); - ret = writev (priv->fuse_dump_fd, diov, 3); - pthread_mutex_unlock (&priv->fuse_dump_mutex); - if (ret == -1) - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "failed to dump fuse message (R): %s", - strerror (errno)); - - return priv->fuse_ops0[finh->opcode] (this, finh, msg); + fuse_private_t *priv = NULL; + struct iovec diov[6] = { + { + 0, + }, + }; + char r = 'R'; + uint32_t fusedump_item_count = 3; + struct fusedump_timespec fts = { + 0, + }; + struct fusedump_signature fsig = { + 0, + }; + + int ret = 0; + + priv = this->private; + + fusedump_setup_meta(diov, &r, &fusedump_item_count, &fts, &fsig); + diov[4] = (struct iovec){finh, sizeof(*finh)}; + if (finh->opcode == FUSE_WRITE) { + /* WRITE has special data alignment, see comment in + fuse_write(). */ + diov[4].iov_len += sizeof(struct fuse_write_in); + } + diov[5] = (struct iovec){msg, finh->len - diov[4].iov_len}; + + pthread_mutex_lock(&priv->fuse_dump_mutex); + ret = sys_writev(priv->fuse_dump_fd, diov, sizeof(diov) / sizeof(diov[0])); + pthread_mutex_unlock(&priv->fuse_dump_mutex); + if (ret == -1) + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "failed to dump fuse message (R): %s", strerror(errno)); + + priv->fuse_ops0[finh->opcode](this, finh, msg, NULL); } - int -init (xlator_t *this_xl) -{ - int ret = 0; - dict_t *options = NULL; - char *value_string = NULL; - cmd_args_t *cmd_args = NULL; - char *fsname = NULL; - fuse_private_t *priv = NULL; - struct stat stbuf = {0,}; - int i = 0; - int xl_name_allocated = 0; - int fsname_allocated = 0; - glusterfs_ctx_t *ctx = NULL; - gf_boolean_t sync_mtab = _gf_false; - char *mnt_args = NULL; - - if (this_xl == NULL) - return -1; - - if (this_xl->options == NULL) - return -1; - - ctx = glusterfs_ctx_get (); - if (!ctx) - return -1; - - options = this_xl->options; - - if (this_xl->name == NULL) { - this_xl->name = gf_strdup ("fuse"); - if (!this_xl->name) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "Out of memory"); - - goto cleanup_exit; - } - xl_name_allocated = 1; - } +init(xlator_t *this_xl) +{ + int ret = 0; + dict_t *options = NULL; + char *value_string = NULL; + cmd_args_t *cmd_args = NULL; + char *fsname = NULL; + fuse_private_t *priv = NULL; + struct stat stbuf = { + 0, + }; + int i = 0; + int xl_name_allocated = 0; + int fsname_allocated = 0; + glusterfs_ctx_t *ctx = NULL; + gf_boolean_t sync_to_mount = _gf_false; + gf_boolean_t fopen_keep_cache = _gf_false; + char *mnt_args = NULL; + eh_t *event = NULL; + + if (this_xl == NULL) + return -1; - priv = GF_CALLOC (1, sizeof (*priv), gf_fuse_mt_fuse_private_t); - if (!priv) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "Out of memory"); + if (this_xl->options == NULL) + return -1; - goto cleanup_exit; - } - this_xl->private = (void *) priv; - priv->mount_point = NULL; - priv->fd = -1; - priv->revchan_in = -1; - priv->revchan_out = -1; - - /* get options from option dictionary */ - ret = dict_get_str (options, ZR_MOUNTPOINT_OPT, &value_string); - if (ret == -1 || value_string == NULL) { - gf_log ("fuse", GF_LOG_ERROR, - "Mandatory option 'mountpoint' is not specified."); - goto cleanup_exit; - } + ctx = this_xl->ctx; + if (!ctx) + return -1; - if (stat (value_string, &stbuf) != 0) { - if (errno == ENOENT) { - gf_log (this_xl->name, GF_LOG_ERROR, - "%s %s does not exist", - ZR_MOUNTPOINT_OPT, value_string); - } else if (errno == ENOTCONN) { - gf_log (this_xl->name, GF_LOG_ERROR, - "Mountpoint %s seems to have a stale " - "mount, run 'umount %s' and try again.", - value_string, value_string); - } else { - gf_log (this_xl->name, GF_LOG_DEBUG, - "%s %s : stat returned %s", - ZR_MOUNTPOINT_OPT, - value_string, strerror (errno)); - } - goto cleanup_exit; + options = this_xl->options; + + if (this_xl->name == NULL) { + this_xl->name = gf_strdup("fuse"); + if (!this_xl->name) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, "Out of memory"); + + goto cleanup_exit; + } + xl_name_allocated = 1; + } + + priv = GF_CALLOC(1, sizeof(*priv), gf_fuse_mt_fuse_private_t); + if (!priv) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, "Out of memory"); + + goto cleanup_exit; + } + this_xl->private = (void *)priv; + priv->mount_point = NULL; + priv->fd = -1; + + INIT_LIST_HEAD(&priv->invalidate_list); + pthread_cond_init(&priv->invalidate_cond, NULL); + pthread_mutex_init(&priv->invalidate_mutex, NULL); + + INIT_LIST_HEAD(&priv->timed_list); + pthread_cond_init(&priv->timed_cond, NULL); + pthread_mutex_init(&priv->timed_mutex, NULL); + + INIT_LIST_HEAD(&priv->interrupt_list); + pthread_mutex_init(&priv->interrupt_mutex, NULL); + + pthread_mutex_init(&priv->fusedev_errno_cnt_mutex, NULL); + + /* get options from option dictionary */ + ret = dict_get_str(options, ZR_MOUNTPOINT_OPT, &value_string); + if (ret == -1 || value_string == NULL) { + gf_log("fuse", GF_LOG_ERROR, + "Mandatory option 'mountpoint' is not specified."); + goto cleanup_exit; + } + + if (sys_stat(value_string, &stbuf) != 0) { + if (errno == ENOENT) { + gf_log(this_xl->name, GF_LOG_ERROR, "%s %s does not exist", + ZR_MOUNTPOINT_OPT, value_string); + } else if (errno == ENOTCONN) { + gf_log(this_xl->name, GF_LOG_ERROR, + "Mountpoint %s seems to have a stale " + "mount, run 'umount %s' and try again.", + value_string, value_string); + } else { + gf_log(this_xl->name, GF_LOG_DEBUG, "%s %s : stat returned %s", + ZR_MOUNTPOINT_OPT, value_string, strerror(errno)); } + goto cleanup_exit; + } - if (S_ISDIR (stbuf.st_mode) == 0) { - gf_log (this_xl->name, GF_LOG_ERROR, - "%s %s is not a directory", - ZR_MOUNTPOINT_OPT, value_string); - goto cleanup_exit; - } - priv->mount_point = gf_strdup (value_string); - if (!priv->mount_point) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "Out of memory"); + if (S_ISDIR(stbuf.st_mode) == 0) { + gf_log(this_xl->name, GF_LOG_ERROR, "%s %s is not a directory", + ZR_MOUNTPOINT_OPT, value_string); + goto cleanup_exit; + } + priv->mount_point = gf_strdup(value_string); + if (!priv->mount_point) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, "Out of memory"); - goto cleanup_exit; - } + goto cleanup_exit; + } - ret = dict_get_double (options, "attribute-timeout", - &priv->attribute_timeout); - if (ret != 0) - priv->attribute_timeout = 1.0; /* default */ + GF_OPTION_INIT(ZR_ATTR_TIMEOUT_OPT, priv->attribute_timeout, double, + cleanup_exit); - ret = dict_get_double (options, "entry-timeout", - &priv->entry_timeout); - if (ret != 0) - priv->entry_timeout = 1.0; /* default */ + GF_OPTION_INIT("reader-thread-count", priv->reader_thread_count, uint32, + cleanup_exit); - ret = dict_get_int32 (options, "client-pid", - &priv->client_pid); - if (ret == 0) - priv->client_pid_set = _gf_true; + GF_OPTION_INIT("auto-invalidation", priv->fuse_auto_inval, bool, + cleanup_exit); + GF_OPTION_INIT(ZR_ENTRY_TIMEOUT_OPT, priv->entry_timeout, double, + cleanup_exit); - ret = dict_get_uint32 (options, "uid-map-root", - &priv->uid_map_root); - if (ret != 0) - priv->uid_map_root = 0; + GF_OPTION_INIT(ZR_NEGATIVE_TIMEOUT_OPT, priv->negative_timeout, double, + cleanup_exit); - priv->direct_io_mode = 2; - ret = dict_get_str (options, ZR_DIRECT_IO_OPT, &value_string); - if (ret == 0) { - ret = gf_string2boolean (value_string, &priv->direct_io_mode); - GF_ASSERT (ret == 0); - } + GF_OPTION_INIT("client-pid", priv->client_pid, int32, cleanup_exit); + /* have to check & register the presence of client-pid manually */ + priv->client_pid_set = !!dict_get(this_xl->options, "client-pid"); - priv->strict_volfile_check = 0; - ret = dict_get_str (options, ZR_STRICT_VOLFILE_CHECK, &value_string); - if (ret == 0) { - ret = gf_string2boolean (value_string, - &priv->strict_volfile_check); - GF_ASSERT (ret == 0); - } + GF_OPTION_INIT("uid-map-root", priv->uid_map_root, uint32, cleanup_exit); - priv->acl = 0; - ret = dict_get_str (options, "acl", &value_string); + priv->direct_io_mode = 2; + ret = dict_get_str(options, ZR_DIRECT_IO_OPT, &value_string); + if (ret == 0) { + gf_boolean_t direct_io_mode_bool; + ret = gf_string2boolean(value_string, &direct_io_mode_bool); if (ret == 0) { - ret = gf_string2boolean (value_string, &priv->acl); - GF_ASSERT (ret == 0); + priv->direct_io_mode = direct_io_mode_bool ? 1 : 0; } - if (priv->uid_map_root) - priv->acl = 1; + } - priv->read_only = 0; - ret = dict_get_str (options, "read-only", &value_string); - if (ret == 0) { - ret = gf_string2boolean (value_string, &priv->read_only); - GF_ASSERT (ret == 0); - } + GF_OPTION_INIT(ZR_STRICT_VOLFILE_CHECK, priv->strict_volfile_check, bool, + cleanup_exit); - priv->fuse_dump_fd = -1; - ret = dict_get_str (options, "dump-fuse", &value_string); - if (ret == 0) { - ret = unlink (value_string); - if (ret != -1 || errno == ENOENT) - ret = open (value_string, O_RDWR|O_CREAT|O_EXCL, - S_IRUSR|S_IWUSR); - if (ret == -1) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "cannot open fuse dump file %s", - value_string); - - goto cleanup_exit; - } - priv->fuse_dump_fd = ret; - } + GF_OPTION_INIT("acl", priv->acl, bool, cleanup_exit); - sync_mtab = _gf_false; - ret = dict_get_str (options, "sync-mtab", &value_string); - if (ret == 0) { - ret = gf_string2boolean (value_string, - &sync_mtab); - GF_ASSERT (ret == 0); - } + if (priv->uid_map_root) + priv->acl = 1; - cmd_args = &this_xl->ctx->cmd_args; - fsname = cmd_args->volfile; - if (!fsname && cmd_args->volfile_server) { - if (cmd_args->volfile_id) { - fsname = GF_MALLOC ( - strlen (cmd_args->volfile_server) + 1 + - strlen (cmd_args->volfile_id) + 1, - gf_fuse_mt_fuse_private_t); - if (!fsname) { - gf_log ("glusterfs-fuse", GF_LOG_ERROR, - "Out of memory"); - goto cleanup_exit; - } - fsname_allocated = 1; - strcpy (fsname, cmd_args->volfile_server); - strcat (fsname, ":"); - strcat (fsname, cmd_args->volfile_id); - } else - fsname = cmd_args->volfile_server; - } - if (!fsname) - fsname = "glusterfs"; + GF_OPTION_INIT("selinux", priv->selinux, bool, cleanup_exit); - gf_asprintf (&mnt_args, "%s%sallow_other,max_read=131072", - priv->read_only ? "ro," : "", - priv->acl ? "" : "default_permissions,"); - if (!mnt_args) - goto cleanup_exit; + GF_OPTION_INIT("capability", priv->capability, bool, cleanup_exit); - priv->fd = gf_fuse_mount (priv->mount_point, fsname, mnt_args, - sync_mtab ? &ctx->mtab_pid : NULL); - if (priv->fd == -1) - goto cleanup_exit; + GF_OPTION_INIT("read-only", priv->read_only, bool, cleanup_exit); + + GF_OPTION_INIT("enable-ino32", priv->enable_ino32, bool, cleanup_exit); - pthread_mutex_init (&priv->fuse_dump_mutex, NULL); - pthread_cond_init (&priv->sync_cond, NULL); - pthread_mutex_init (&priv->sync_mutex, NULL); - priv->event_recvd = 0; + GF_OPTION_INIT("use-readdirp", priv->use_readdirp, bool, cleanup_exit); - for (i = 0; i < FUSE_OP_HIGH; i++) { - if (!fuse_std_ops[i]) - fuse_std_ops[i] = fuse_enosys; - if (!fuse_dump_ops[i]) - fuse_dump_ops[i] = fuse_dumper; + priv->fuse_dump_fd = -1; + ret = dict_get_str(options, "dump-fuse", &value_string); + if (ret == 0) { + ret = sys_unlink(value_string); + if (ret == -1 && errno != ENOENT) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "failed to remove old fuse dump file %s: %s", value_string, + strerror(errno)); + + goto cleanup_exit; } - priv->fuse_ops = fuse_std_ops; - if (priv->fuse_dump_fd != -1) { - priv->fuse_ops0 = priv->fuse_ops; - priv->fuse_ops = fuse_dump_ops; + ret = open(value_string, O_RDWR | O_CREAT | O_EXCL, S_IRUSR | S_IWUSR); + if (ret == -1) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "failed to open fuse dump file %s: %s", value_string, + strerror(errno)); + + goto cleanup_exit; + } + priv->fuse_dump_fd = ret; + } + + sync_to_mount = _gf_false; + ret = dict_get_str(options, "sync-to-mount", &value_string); + if (ret == 0) { + ret = gf_string2boolean(value_string, &sync_to_mount); + GF_ASSERT(ret == 0); + } + + priv->fopen_keep_cache = 2; + if (dict_get(options, "fopen-keep-cache")) { + GF_OPTION_INIT("fopen-keep-cache", fopen_keep_cache, bool, + cleanup_exit); + priv->fopen_keep_cache = fopen_keep_cache; + } + + GF_OPTION_INIT("gid-timeout", priv->gid_cache_timeout, int32, cleanup_exit); + + GF_OPTION_INIT("fuse-mountopts", priv->fuse_mountopts, str, cleanup_exit); + + if (gid_cache_init(&priv->gid_cache, priv->gid_cache_timeout) < 0) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, + "Failed to initialize " + "group cache."); + goto cleanup_exit; + } + + GF_OPTION_INIT("resolve-gids", priv->resolve_gids, bool, cleanup_exit); + + /* default values seemed to work fine during testing */ + GF_OPTION_INIT("background-qlen", priv->background_qlen, int32, + cleanup_exit); + GF_OPTION_INIT("congestion-threshold", priv->congestion_threshold, int32, + cleanup_exit); + + GF_OPTION_INIT("no-root-squash", priv->no_root_squash, bool, cleanup_exit); + /* change the client_pid to no-root-squash pid only if the + client is neither defrag process or gsyncd process. + */ + if (!priv->client_pid_set) { + if (priv->no_root_squash == _gf_true) { + priv->client_pid_set = _gf_true; + priv->client_pid = GF_CLIENT_PID_NO_ROOT_SQUASH; + } + } + + GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit); + + GF_OPTION_INIT("invalidate-limit", priv->invalidate_limit, uint32, + cleanup_exit); + + GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit); + + GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit); + + /* Writeback cache support */ + GF_OPTION_INIT("kernel-writeback-cache", priv->kernel_writeback_cache, bool, + cleanup_exit); + GF_OPTION_INIT("attr-times-granularity", priv->attr_times_granularity, + int32, cleanup_exit); + + GF_OPTION_INIT("flush-handle-interrupt", priv->flush_handle_interrupt, bool, + cleanup_exit); + + GF_OPTION_INIT("fuse-dev-eperm-ratelimit-ns", + priv->fuse_dev_eperm_ratelimit_ns, uint32, cleanup_exit); + + /* user has set only background-qlen, not congestion-threshold, + use the fuse kernel driver formula to set congestion. ie, 75% */ + if (dict_get(this_xl->options, "background-qlen") && + !dict_get(this_xl->options, "congestion-threshold")) { + priv->congestion_threshold = (priv->background_qlen * 3) / 4; + gf_log(this_xl->name, GF_LOG_INFO, + "setting congestion control as 75%% of " + "background-queue length (ie, (.75 * %d) = %d", + priv->background_qlen, priv->congestion_threshold); + } + + /* congestion should not be higher than background queue length */ + if (priv->congestion_threshold > priv->background_qlen) { + gf_log(this_xl->name, GF_LOG_INFO, + "setting congestion control same as " + "background-queue length (%d)", + priv->background_qlen); + priv->congestion_threshold = priv->background_qlen; + } + + cmd_args = &this_xl->ctx->cmd_args; + fsname = cmd_args->volfile; + if (!fsname && cmd_args->volfile_server) { + if (cmd_args->volfile_id) { + int dir_len = 0; + if (cmd_args->subdir_mount) + dir_len = strlen(cmd_args->subdir_mount) + 1; + fsname = GF_MALLOC(strlen(cmd_args->volfile_server) + 1 + + strlen(cmd_args->volfile_id) + 1 + dir_len, + gf_fuse_mt_fuse_private_t); + if (!fsname) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, "Out of memory"); + goto cleanup_exit; + } + fsname_allocated = 1; + strcpy(fsname, cmd_args->volfile_server); + strcat(fsname, ":"); + strcat(fsname, cmd_args->volfile_id); + if (dir_len) + strcat(fsname, cmd_args->subdir_mount); + } else + fsname = cmd_args->volfile_server; + } + if (!fsname) + fsname = "glusterfs"; + + priv->fdtable = gf_fd_fdtable_alloc(); + if (priv->fdtable == NULL) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, "Out of memory"); + goto cleanup_exit; + } + + gf_asprintf(&mnt_args, "%s%s%s%sallow_other,max_read=131072", + priv->acl ? "" : "default_permissions,", + priv->read_only ? "ro," : "", + priv->fuse_mountopts ? priv->fuse_mountopts : "", + priv->fuse_mountopts ? "," : ""); + if (!mnt_args) + goto cleanup_exit; + + { + char *mnt_tok = NULL; + token_iter_t tit = { + 0, + }; + gf_boolean_t iter_end = _gf_false; + + for (mnt_tok = token_iter_init(mnt_args, ',', &tit);;) { + iter_end = next_token(&mnt_tok, &tit); + + if (strcmp(mnt_tok, "auto_unmount") == 0) { + priv->auto_unmount = _gf_true; + drop_token(mnt_tok, &tit); + } + + if (iter_end) + break; } - - if (fsname_allocated) - GF_FREE (fsname); - GF_FREE (mnt_args); - return 0; + } + + if (pipe(priv->status_pipe) < 0) { + gf_log(this_xl->name, GF_LOG_ERROR, + "could not create pipe to separate mount process"); + goto cleanup_exit; + } + + priv->fd = gf_fuse_mount(priv->mount_point, fsname, mnt_args, + sync_to_mount ? &ctx->mnt_pid : NULL, + priv->status_pipe[1]); + if (priv->fd == -1) + goto cleanup_exit; + if (priv->auto_unmount) { + ret = gf_fuse_unmount_daemon(priv->mount_point, priv->fd); + if (ret == -1) + goto cleanup_exit; + } + + if (priv->event_history) { + event = eh_new(FUSE_EVENT_HISTORY_SIZE, _gf_false, NULL); + if (!event) { + gf_log(this_xl->name, GF_LOG_ERROR, + "could not create a new event history"); + goto cleanup_exit; + } + + this_xl->history = event; + } + + pthread_mutex_init(&priv->fuse_dump_mutex, NULL); + pthread_cond_init(&priv->sync_cond, NULL); + pthread_cond_init(&priv->migrate_cond, NULL); + pthread_mutex_init(&priv->sync_mutex, NULL); + priv->event_recvd = 0; + + for (i = 0; i < FUSE_OP_HIGH; i++) { + if (!fuse_std_ops[i]) + fuse_std_ops[i] = fuse_enosys; + if (!fuse_dump_ops[i]) + fuse_dump_ops[i] = fuse_dumper; + } + priv->fuse_ops = fuse_std_ops; + if (priv->fuse_dump_fd != -1) { + priv->fuse_ops0 = priv->fuse_ops; + priv->fuse_ops = fuse_dump_ops; + } + + if (fsname_allocated) + GF_FREE(fsname); + GF_FREE(mnt_args); + return 0; cleanup_exit: - if (xl_name_allocated) - GF_FREE (this_xl->name); - if (fsname_allocated) - GF_FREE (fsname); - if (priv) { - GF_FREE (priv->mount_point); - close (priv->fd); - close (priv->fuse_dump_fd); - GF_FREE (priv); - } - if (mnt_args) - GF_FREE (mnt_args); - return -1; + if (xl_name_allocated) + GF_FREE(this_xl->name); + if (fsname_allocated) + GF_FREE(fsname); + if (priv) { + GF_FREE(priv->mount_point); + if (priv->fd != -1) + sys_close(priv->fd); + if (priv->fuse_dump_fd != -1) + sys_close(priv->fuse_dump_fd); + GF_FREE(priv); + } + GF_FREE(mnt_args); + return -1; } - void -fini (xlator_t *this_xl) +fini(xlator_t *this_xl) { - fuse_private_t *priv = NULL; - char *mount_point = NULL; + fuse_private_t *priv = NULL; + char *mount_point = NULL; - if (this_xl == NULL) - return; - - if ((priv = this_xl->private) == NULL) - return; + if (this_xl == NULL) + return; - if (dict_get (this_xl->options, ZR_MOUNTPOINT_OPT)) - mount_point = data_to_str (dict_get (this_xl->options, - ZR_MOUNTPOINT_OPT)); - if (mount_point != NULL) { - gf_log (this_xl->name, GF_LOG_INFO, - "Unmounting '%s'.", mount_point); + if ((priv = this_xl->private) == NULL) + return; - dict_del (this_xl->options, ZR_MOUNTPOINT_OPT); - gf_fuse_unmount (mount_point, priv->fd); - close (priv->fuse_dump_fd); - } + pthread_mutex_lock(&priv->sync_mutex); + { + if (!(priv->fini_invoked)) { + priv->fini_invoked = _gf_true; + } else { + pthread_mutex_unlock(&priv->sync_mutex); + return; + } + } + pthread_mutex_unlock(&priv->sync_mutex); + + if (dict_get(this_xl->options, ZR_MOUNTPOINT_OPT)) + mount_point = data_to_str( + dict_get(this_xl->options, ZR_MOUNTPOINT_OPT)); + if (mount_point != NULL) { + if (!priv->auto_unmount) { + gf_log(this_xl->name, GF_LOG_INFO, "Unmounting '%s'.", mount_point); + gf_fuse_unmount(mount_point, priv->fd); + } + + gf_log(this_xl->name, GF_LOG_INFO, "Closing fuse connection to '%s'.", + mount_point); + + sys_close(priv->fuse_dump_fd); + dict_del(this_xl->options, ZR_MOUNTPOINT_OPT); + } + /* Process should terminate once fuse xlator is finished. + * Required for AUTH_FAILED event. + */ + kill(getpid(), SIGTERM); } -struct xlator_fops fops = { -}; - -struct xlator_cbks cbks = { -}; +struct xlator_fops fops; +struct xlator_cbks cbks = {.invalidate = fuse_invalidate, + .forget = fuse_forget_cbk, + .release = fuse_internal_release}; struct xlator_dumpops dumpops = { - .priv = fuse_priv_dump, - .inode = fuse_itable_dump, + .priv = fuse_priv_dump, + .inode = fuse_itable_dump, + .history = fuse_history_dump, }; struct volume_options options[] = { - { .key = {"direct-io-mode"}, - .type = GF_OPTION_TYPE_BOOL - }, - { .key = {ZR_MOUNTPOINT_OPT, "mount-point"}, - .type = GF_OPTION_TYPE_PATH - }, - { .key = {ZR_DUMP_FUSE, "fuse-dumpfile"}, - .type = GF_OPTION_TYPE_PATH - }, - { .key = {ZR_ATTR_TIMEOUT_OPT}, - .type = GF_OPTION_TYPE_DOUBLE - }, - { .key = {ZR_ENTRY_TIMEOUT_OPT}, - .type = GF_OPTION_TYPE_DOUBLE - }, - { .key = {ZR_STRICT_VOLFILE_CHECK}, - .type = GF_OPTION_TYPE_BOOL - }, - { .key = {"client-pid"}, - .type = GF_OPTION_TYPE_INT - }, - { .key = {"uid-map-root"}, - .type = GF_OPTION_TYPE_INT - }, - { .key = {"sync-mtab"}, - .type = GF_OPTION_TYPE_BOOL - }, - { .key = {"read-only"}, - .type = GF_OPTION_TYPE_BOOL - }, - { .key = {NULL} }, + {.key = {"direct-io-mode"}, .type = GF_OPTION_TYPE_BOOL}, + {.key = {ZR_MOUNTPOINT_OPT, "mount-point"}, .type = GF_OPTION_TYPE_PATH}, + {.key = {ZR_DUMP_FUSE, "fuse-dumpfile"}, .type = GF_OPTION_TYPE_PATH}, + {.key = {ZR_ATTR_TIMEOUT_OPT}, + .type = GF_OPTION_TYPE_DOUBLE, + .default_value = "1.0"}, + {.key = {ZR_ENTRY_TIMEOUT_OPT}, + .type = GF_OPTION_TYPE_DOUBLE, + .default_value = "1.0"}, + {.key = {ZR_NEGATIVE_TIMEOUT_OPT}, + .type = GF_OPTION_TYPE_DOUBLE, + .default_value = "0.0"}, + {.key = {ZR_STRICT_VOLFILE_CHECK}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false"}, + {.key = {"client-pid"}, .type = GF_OPTION_TYPE_INT}, + {.key = {"uid-map-root"}, .type = GF_OPTION_TYPE_INT}, + {.key = {"sync-to-mount"}, .type = GF_OPTION_TYPE_BOOL}, + {.key = {"read-only"}, .type = GF_OPTION_TYPE_BOOL}, + {.key = {"fopen-keep-cache"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false"}, + {.key = {"gid-timeout"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "300"}, + {.key = {"resolve-gids"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false"}, + {.key = {"acl"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "false"}, + {.key = {"selinux"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "false"}, + {.key = {"enable-ino32"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false"}, + { + .key = {"background-qlen"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "64", + .min = 16, + .max = (64 * GF_UNIT_KB), + }, + { + .key = {"congestion-threshold"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "48", + .min = 12, + .max = (64 * GF_UNIT_KB), + }, + {.key = {"fuse-mountopts"}, .type = GF_OPTION_TYPE_STR}, + {.key = {"use-readdirp"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "yes"}, + { + .key = {"no-root-squash"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .description = + "This is the mount option for disabling the " + "root squash for the client irrespective of whether the " + "root-squash " + "option for the volume is set or not. But this option is honoured " + "only for the trusted clients. For non trusted clients this value " + "does not have any affect and the volume option for root-squash is " + "honoured.", + }, + {.key = {"capability"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false"}, + { + .key = {"event-history"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .description = "This option can be used to enable or disable fuse " + "event history.", + }, + { + .key = {"thin-client"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .description = "Enables thin mount and connects via gfproxyd daemon.", + }, + { + .key = {"reader-thread-count"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "1", + .min = 1, + .max = 64, + .description = "Sets fuse reader thread count.", + }, + { + .key = {"kernel-writeback-cache"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .description = "Enables fuse in-kernel writeback cache.", + }, + { + .key = {"attr-times-granularity"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + .min = 0, + .max = 1000000000, + .description = "Supported granularity of file attribute times.", + }, + { + .key = {"flush-handle-interrupt"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .description = + "Handle iterrupts in FLUSH handler (for testing purposes).", + }, + { + .key = {"lru-limit"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "65536", + .min = 0, + .description = "makes glusterfs invalidate kernel inodes after " + "reaching this limit (0 means 'unlimited')", + }, + { + .key = {"invalidate-limit"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + .min = 0, + .description = "suspend invalidations as of 'lru-limit' if the number " + "of outstanding invalidations reaches this limit " + "(0 means 'unlimited')", + }, + { + .key = {"auto-invalidation"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "true", + .description = "controls whether fuse-kernel can auto-invalidate " + "attribute, dentry and page-cache. Disable this only " + "if same files/directories are not accessed across " + "two different mounts concurrently", + }, + { + .key = {"fuse-dev-eperm-ratelimit-ns"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "10000000", /* 0.01 sec */ + .min = 0, + .max = 1000000000, + .description = "Rate limit reading from fuse device upon EPERM " + "failure.", + }, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .notify = notify, + .dumpops = &dumpops, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "fuse", + .category = GF_MAINTAINED, }; diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h index ae764a7bccc..4cb94c23cad 100644 --- a/xlators/mount/fuse/src/fuse-bridge.h +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - #ifndef _GF_FUSE_BRIDGE_H_ #define _GF_FUSE_BRIDGE_H_ @@ -29,21 +19,14 @@ #include <sys/time.h> #include <fnmatch.h> -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif /* _CONFIG_H */ - -#include "glusterfs.h" -#include "logging.h" -#include "xlator.h" -#include "defaults.h" -#include "common-utils.h" -#include "statedump.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/xlator.h> +#include <glusterfs/defaults.h> +#include <glusterfs/common-utils.h> +#include <glusterfs/statedump.h> #ifdef GF_DARWIN_HOST_OS -/* This is MacFUSE's marker for MacFUSE-specific code */ -#define __FreeBSD__ 10 #include "fuse_kernel_macfuse.h" #else #include "fuse_kernel.h" @@ -52,269 +35,510 @@ #include "fuse-mount.h" #include "fuse-mem-types.h" -#include "list.h" -#include "dict.h" +#include <glusterfs/list.h> +#include <glusterfs/dict.h> +#include <glusterfs/syncop.h> +#include <glusterfs/gidcache.h> + +#if defined(GF_LINUX_HOST_OS) || defined(__FreeBSD__) || defined(__NetBSD__) + +/* + * TODO: + * So, with the addition of copy_file_range support, it might + * require a bump up of fuse kernel minor version (like it was + * done when support for lseek fop was added. But, as of now, + * the copy_file_range support has just landed in upstream + * kernel fuse module. So, until, there is a release of that + * fuse as part of a kernel, the FUSE_KERNEL_MINOR_VERSION + * from fuse_kernel.h in the contrib might not be changed. + * If so, then the highest op available should be based on + * the current minor version (which is 24). So, selectively + * determine. When, the minor version is changed to 28 in + * fuse_kernel.h from contrib (because in upstream linux + * kernel source tree, the kernel minor version which + * contains support for copy_file_range is 28), then remove + * the reference to FUSE_LSEEK below and just determine + * FUSE_OP_HIGH based on copy_file_range. + */ +#if FUSE_KERNEL_MINOR_VERSION >= 28 +#define FUSE_OP_HIGH (FUSE_COPY_FILE_RANGE + 1) +#else +#define FUSE_OP_HIGH (FUSE_LSEEK + 1) +#endif -#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__) -#define FUSE_OP_HIGH (FUSE_POLL + 1) #endif #ifdef GF_DARWIN_HOST_OS #define FUSE_OP_HIGH (FUSE_DESTROY + 1) #endif -#define GLUSTERFS_XATTR_LEN_MAX 65536 +#define GLUSTERFS_XATTR_LEN_MAX 65536 #define MAX_FUSE_PROC_DELAY 1 -#define DISABLE_SELINUX 1 - typedef struct fuse_in_header fuse_in_header_t; -typedef void (fuse_handler_t) (xlator_t *this, fuse_in_header_t *finh, - void *msg); +typedef void(fuse_handler_t)(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf); + +enum fusedev_errno { + FUSEDEV_ENOENT, + FUSEDEV_ENOTDIR, + FUSEDEV_ENODEV, + FUSEDEV_EPERM, + FUSEDEV_ENOMEM, + FUSEDEV_ENOTCONN, + FUSEDEV_ECONNREFUSED, + FUSEDEV_EOVERFLOW, + FUSEDEV_EBUSY, + FUSEDEV_ENOTEMPTY, + FUSEDEV_EMAXPLUS +}; struct fuse_private { - int fd; - uint32_t proto_minor; - char *volfile; - size_t volfile_size; - char *mount_point; - struct iobuf *iobuf; - - pthread_t fuse_thread; - char fuse_thread_started; - - uint32_t direct_io_mode; - size_t *msg0_len_p; - - double entry_timeout; - double attribute_timeout; - - pthread_cond_t sync_cond; - pthread_mutex_t sync_mutex; - char event_recvd; - - char init_recvd; - - gf_boolean_t strict_volfile_check; + int fd; + uint32_t proto_minor; + char *volfile; + size_t volfile_size; + char *mount_point; + struct iobuf *iobuf; + + pthread_t *fuse_thread; + uint32_t reader_thread_count; + char fuse_thread_started; + + uint32_t direct_io_mode; + size_t *msg0_len_p; + + double entry_timeout; + double negative_timeout; + double attribute_timeout; + + pthread_cond_t sync_cond; + pthread_mutex_t sync_mutex; + char event_recvd; + + char init_recvd; + + gf_boolean_t strict_volfile_check; + + fuse_handler_t **fuse_ops; + fuse_handler_t **fuse_ops0; + pthread_mutex_t fuse_dump_mutex; + int fuse_dump_fd; + + glusterfs_graph_t *next_graph; + xlator_t *active_subvol; + + pid_t client_pid; + gf_boolean_t client_pid_set; + unsigned uid_map_root; + gf_boolean_t acl; + gf_boolean_t selinux; + gf_boolean_t read_only; + int32_t fopen_keep_cache; + int32_t gid_cache_timeout; + gf_boolean_t enable_ino32; + /* This is the mount option for disabling the root-squash for the + mount irrespective of whether the root-squash option for the + volume is set or not. But this option is honoured only for + thr trusted clients. For non trusted clients this value does + not have any affect and the volume option for root-squash is + honoured. + */ + gf_boolean_t no_root_squash; + fdtable_t *fdtable; + gid_cache_t gid_cache; + char *fuse_mountopts; + + /* For fuse-reverse-validation */ + struct list_head invalidate_list; + pthread_cond_t invalidate_cond; + pthread_mutex_t invalidate_mutex; + gf_boolean_t reverse_fuse_thread_started; + uint64_t invalidate_count; + /* For communicating with separate mount thread. */ + int status_pipe[2]; + + /* for fuse queue length and congestion threshold */ + int background_qlen; + int congestion_threshold; + + /* for using fuse-kernel readdirp*/ + gf_boolean_t use_readdirp; + + /* fini started, helps prevent multiple epoll worker threads + * firing up the fini routine */ + gf_boolean_t fini_invoked; + + /* resolve gid with getgrouplist() instead of /proc/%d/status */ + gf_boolean_t resolve_gids; + + /* Enable or disable capability support */ + gf_boolean_t capability; + + /* Enable or disable event history */ + gf_boolean_t event_history; + + /* whether to run the unmount daemon */ + gf_boolean_t auto_unmount; + + /* Load the thin volfile, and connect to gfproxyd*/ + gf_boolean_t thin_client; + gf_boolean_t mount_finished; + gf_boolean_t handle_graph_switch; + pthread_cond_t migrate_cond; + + /* Writeback cache support */ + gf_boolean_t kernel_writeback_cache; + int attr_times_granularity; + + /* Delayed fuse response */ + struct list_head timed_list; + pthread_cond_t timed_cond; + pthread_mutex_t timed_mutex; + gf_boolean_t timed_response_fuse_thread_started; + + /* Interrupt subscription */ + struct list_head interrupt_list; + pthread_mutex_t interrupt_mutex; + + gf_boolean_t flush_handle_interrupt; + gf_boolean_t fuse_auto_inval; + + /* LRU Limit, if not set, default is 64k for now */ + uint32_t lru_limit; + uint32_t invalidate_limit; + uint32_t fuse_dev_eperm_ratelimit_ns; + + /* counters for fusdev errnos */ + uint8_t fusedev_errno_cnt[FUSEDEV_EMAXPLUS]; + pthread_mutex_t fusedev_errno_cnt_mutex; +}; +typedef struct fuse_private fuse_private_t; - fuse_handler_t **fuse_ops; - fuse_handler_t **fuse_ops0; - pthread_mutex_t fuse_dump_mutex; - int fuse_dump_fd; +typedef uint64_t errnomask_t[2]; +#define MASK_ERRNO(mask, n) ((mask)[(n) >> 6] |= ((uint64_t)1 << ((n)&63))) +#define GET_ERRNO_MASK(mask, n) ((mask)[(n) >> 6] & ((uint64_t)1 << ((n)&63))) +#define ERRNOMASK_MAX (64 * (sizeof(errnomask_t) / sizeof(uint64_t))) - glusterfs_graph_t *next_graph; - xlator_t *active_subvol; +#define INVAL_BUF_SIZE \ + (sizeof(struct fuse_out_header) + \ + max(sizeof(struct fuse_notify_inval_inode_out), \ + sizeof(struct fuse_notify_inval_entry_out) + NAME_MAX + 1)) - pid_t client_pid; - gf_boolean_t client_pid_set; - unsigned uid_map_root; - gf_boolean_t acl; - gf_boolean_t read_only; +struct fuse_invalidate_node { + errnomask_t errnomask; + struct list_head next; + char inval_buf[INVAL_BUF_SIZE]; +}; +typedef struct fuse_invalidate_node fuse_invalidate_node_t; + +struct fuse_timed_message { + struct fuse_out_header fuse_out_header; + void *fuse_message_body; + struct timespec scheduled_ts; + errnomask_t errnomask; + struct list_head next; +}; +typedef struct fuse_timed_message fuse_timed_message_t; - /* For fuse-reverse-validation */ - int revchan_in; - int revchan_out; - gf_boolean_t reverse_fuse_thread_started; +enum fuse_interrupt_state { + INTERRUPT_NONE, + INTERRUPT_SQUELCHED, + INTERRUPT_HANDLED, + INTERRUPT_WAITING_HANDLER, +}; +typedef enum fuse_interrupt_state fuse_interrupt_state_t; +struct fuse_interrupt_record; +typedef struct fuse_interrupt_record fuse_interrupt_record_t; +typedef void (*fuse_interrupt_handler_t)(xlator_t *this, + fuse_interrupt_record_t *); +struct fuse_interrupt_record { + fuse_in_header_t fuse_in_header; + void *data; + gf_boolean_t hit; + fuse_interrupt_state_t interrupt_state; + fuse_interrupt_handler_t interrupt_handler; + pthread_cond_t handler_cond; + pthread_mutex_t handler_mutex; + struct list_head next; }; -typedef struct fuse_private fuse_private_t; -#define INVAL_BUF_SIZE (sizeof (struct fuse_out_header) + \ - max (sizeof (struct fuse_notify_inval_inode_out), \ - sizeof (struct fuse_notify_inval_entry_out) + \ - NAME_MAX + 1)) +struct fuse_graph_switch_args { + xlator_t *this; + xlator_t *old_subvol; + xlator_t *new_subvol; +}; +typedef struct fuse_graph_switch_args fuse_graph_switch_args_t; +#define FUSE_EVENT_HISTORY_SIZE 1024 #define _FH_TO_FD(fh) ((fd_t *)(uintptr_t)(fh)) -#define FH_TO_FD(fh) ((_FH_TO_FD (fh))?(fd_ref (_FH_TO_FD (fh))):((fd_t *) 0)) - -#define FUSE_FOP(state, ret, op_num, fop, args ...) \ - do { \ - call_frame_t *frame = NULL; \ - xlator_t *xl = NULL; \ - \ - frame = get_call_frame_for_req (state); \ - if (!frame) { \ - /* This is not completely clean, as some \ - * earlier allocations might remain unfreed \ - * if we return at this point, but still \ - * better than trying to go on with a NULL \ - * frame ... \ - */ \ - gf_log ("glusterfs-fuse", \ - GF_LOG_ERROR, \ - "FUSE message" \ - " unique %"PRIu64" opcode %d:" \ - " frame allocation failed", \ - state->finh->unique, \ - state->finh->opcode); \ - free_fuse_state (state); \ - /* ideally, need to 'return', but let the */ \ - /* calling function take care of it */ \ - break; \ - } \ - \ - frame->root->state = state; \ - frame->root->op = op_num; \ - frame->op = op_num; \ - \ - xl = fuse_state_subvol (state); \ - if (!xl) { \ - gf_log ("glusterfs-fuse", GF_LOG_ERROR, \ - "xl is NULL"); \ - send_fuse_err (state->this, state->finh, ENOENT); \ - free_fuse_state (state); \ - STACK_DESTROY (frame->root); \ - } else { \ - STACK_WIND (frame, ret, xl, xl->fops->fop, args); \ - } \ - \ - } while (0) - - -#define FUSE_FOP_COOKIE(state, xl, ret, cky, op_num, fop, args ...) \ - do { \ - call_frame_t *frame = NULL; \ - \ - frame = get_call_frame_for_req (state); \ - if (!frame) { \ - gf_log ("glusterfs-fuse", \ - GF_LOG_ERROR, \ - "FUSE message" \ - " unique %"PRIu64" opcode %d:" \ - " frame allocation failed", \ - state->finh->unique, \ - state->finh->opcode); \ - free_fuse_state (state); \ - return 0; \ - } \ - \ - frame->root->state = state; \ - frame->root->op = op_num; \ - frame->op = op_num; \ - STACK_WIND_COOKIE (frame, ret, cky, xl, xl->fops->fop, args); \ - } while (0) - -#define GF_SELECT_LOG_LEVEL(_errno) \ +#define FH_TO_FD(fh) ((_FH_TO_FD(fh)) ? (fd_ref(_FH_TO_FD(fh))) : ((fd_t *)0)) + +/* Use the same logic as the Linux NFS-client */ +#define GF_FUSE_SQUASH_INO(ino) (((uint32_t)ino) ^ (ino >> 32)) + +#define FUSE_FOP(state, ret, op_num, fop, args...) \ + do { \ + xlator_t *xl = NULL; \ + call_frame_t *frame = NULL; \ + \ + xl = state->active_subvol; \ + if (!xl) { \ + gf_log_callingfn(state->this->name, GF_LOG_ERROR, \ + "No active subvolume"); \ + send_fuse_err(state->this, state->finh, ENOENT); \ + free_fuse_state(state); \ + break; \ + } \ + \ + frame = get_call_frame_for_req(state); \ + if (!frame) { \ + /* This is not completely clean, as some \ + * earlier allocations might remain unfreed \ + * if we return at this point, but still \ + * better than trying to go on with a NULL \ + * frame ... \ + */ \ + send_fuse_err(state->this, state->finh, ENOMEM); \ + free_fuse_state(state); \ + /* ideally, need to 'return', but let the */ \ + /* calling function take care of it */ \ + break; \ + } \ + \ + frame->root->state = state; \ + frame->root->op = op_num; \ + frame->op = op_num; \ + \ + if (state->this->history) \ + gf_log_eh("%" PRIu64 \ + ", %s, path: (%s), gfid: " \ + "(%s)", \ + frame->root->unique, gf_fop_list[frame->root->op], \ + state->loc.path, \ + (state->fd == NULL) \ + ? uuid_utoa(state->loc.gfid) \ + : uuid_utoa(state->fd->inode->gfid)); \ + STACK_WIND(frame, ret, xl, xl->fops->fop, args); \ + } while (0) + +#define GF_SELECT_LOG_LEVEL(_errno) \ (((_errno == ENOENT) || (_errno == ESTALE))? \ GF_LOG_DEBUG) -#define GET_STATE(this, finh, state) \ - do { \ - state = get_fuse_state (this, finh); \ - if (!state) { \ - gf_log ("glusterfs-fuse", \ - GF_LOG_ERROR, \ - "FUSE message unique %"PRIu64" opcode %d:" \ - " state allocation failed", \ - finh->unique, finh->opcode); \ - \ - send_fuse_err (this, finh, ENOMEM); \ - GF_FREE (finh); \ - \ - return; \ - } \ - } while (0) - - +#define GET_STATE(this, finh, state) \ + do { \ + state = get_fuse_state(this, finh); \ + if (!state) { \ + gf_log("glusterfs-fuse", GF_LOG_ERROR, \ + "FUSE message unique %" PRIu64 \ + " opcode %d:" \ + " state allocation failed", \ + finh->unique, finh->opcode); \ + \ + send_fuse_err(this, finh, ENOMEM); \ + GF_FREE(finh); \ + \ + return; \ + } \ + } while (0) + +#define FUSE_ENTRY_CREATE(this, priv, finh, state, fci, op) \ + do { \ + if (priv->proto_minor >= 12) \ + state->mode &= ~fci->umask; \ + if (priv->proto_minor >= 12 && priv->acl) { \ + state->xdata = dict_new(); \ + if (!state->xdata) { \ + gf_log("glusterfs-fuse", GF_LOG_WARNING, \ + "%s failed to allocate " \ + "a param dictionary", \ + op); \ + send_fuse_err(this, finh, ENOMEM); \ + free_fuse_state(state); \ + return; \ + } \ + state->umask = fci->umask; \ + } \ + } while (0) + +#define fuse_log_eh_fop(this, state, frame, op_ret, op_errno) \ + do { \ + fuse_private_t *priv = this->private; \ + if (this->history && priv->event_history) { \ + if (state->fd) \ + gf_log_eh( \ + "op_ret: %d, op_errno: %d, " \ + "%" PRIu64 ", %s () => %p, gfid: %s", \ + op_ret, op_errno, frame->root->unique, \ + gf_fop_list[frame->root->op], state->fd, \ + uuid_utoa(state->fd->inode->gfid)); \ + else \ + gf_log_eh( \ + "op_ret: %d, op_errno: %d, " \ + "%" PRIu64 ", %s () => %s, gfid: %s", \ + op_ret, op_errno, frame->root->unique, \ + gf_fop_list[frame->root->op], state->loc.path, \ + uuid_utoa(state->loc.gfid)); \ + } \ + } while (0) + +#define fuse_log_eh(this, args...) \ + do { \ + fuse_private_t *priv = this->private; \ + if (this->history && priv->event_history) \ + gf_log_eh(args); \ + } while (0) + +static inline xlator_t * +fuse_active_subvol(xlator_t *fuse) +{ + fuse_private_t *priv = NULL; + + priv = fuse->private; + + return priv->active_subvol; +} typedef enum { - RESOLVE_MUST = 1, - RESOLVE_NOT, - RESOLVE_MAY, - RESOLVE_DONTCARE, - RESOLVE_EXACT + RESOLVE_MUST = 1, + RESOLVE_NOT, + RESOLVE_MAY, + RESOLVE_DONTCARE, + RESOLVE_EXACT } fuse_resolve_type_t; -struct fuse_resolve_comp { - char *basename; - ino_t ino; - uint64_t gen; - inode_t *inode; -}; - typedef struct { - fuse_resolve_type_t type; - ino_t ino; - uint64_t gen; - ino_t par; - fd_t *fd; - char *path; - char *bname; - u_char gfid[16]; - u_char pargfid[16]; - char *resolved; - int op_ret; - int op_errno; - loc_t resolve_loc; - struct fuse_resolve_comp *components; - int comp_count; + fuse_resolve_type_t type; + fd_t *fd; + char *path; + char *bname; + u_char gfid[16]; + inode_t *hint; + u_char pargfid[16]; + inode_t *parhint; + int op_ret; + int op_errno; + loc_t resolve_loc; } fuse_resolve_t; - typedef struct { - void *pool; - xlator_t *this; - inode_table_t *itable; - loc_t loc; - loc_t loc2; - fuse_in_header_t *finh; - int32_t flags; - off_t off; - size_t size; - unsigned long nlookup; - fd_t *fd; - dict_t *dict; - char *name; - char is_revalidate; - gf_boolean_t truncate_needed; - gf_lock_t lock; - uint64_t lk_owner; - - /* used within resolve_and_resume */ - /* */ - fuse_resolve_t resolve; - fuse_resolve_t resolve2; - - loc_t *loc_now; - fuse_resolve_t *resolve_now; - - void *resume_fn; - - int valid; - int mask; - dev_t rdev; - mode_t mode; - struct iatt attr; - struct gf_flock lk_lock; - struct iovec vector; - - uuid_t gfid; + void *pool; + xlator_t *this; + xlator_t *active_subvol; + inode_table_t *itable; + loc_t loc; + loc_t loc2; + fuse_in_header_t *finh; + int32_t flags; + + off_t off; + /* + * The man page of copy_file_range tells that the offset + * arguments are of type loff_t *. Here in fuse state, the values of + * those offsets are saved instead of pointers as the kernel sends + * the values of the offsets from those pointers instead of pointers. + * But the type loff_t is linux specific and is actually a typedef of + * off64_t. Hence using off64_t + */ + off64_t off_in; /* for copy_file_range source fd */ + off64_t off_out; /* for copy_file_range destination fd */ + size_t size; + unsigned long nlookup; + fd_t *fd; + fd_t *fd_dst; /* for copy_file_range destination */ + dict_t *xattr; + dict_t *xdata; + char *name; + char is_revalidate; + gf_boolean_t truncate_needed; + gf_lock_t lock; + uint64_t lk_owner; + + /* used within resolve_and_resume */ + /* */ + fuse_resolve_t resolve; + fuse_resolve_t resolve2; + + loc_t *loc_now; + fuse_resolve_t *resolve_now; + + void *resume_fn; + + int valid; + int mask; + dev_t rdev; + mode_t mode; + mode_t umask; + struct iatt attr; + struct gf_flock lk_lock; + struct iovec vector; + + uuid_t gfid; + uint32_t io_flags; + int32_t fd_no; + + gf_seek_what_t whence; + struct iobuf *iobuf; } fuse_state_t; -typedef struct fuse_fd_ctx { - uint32_t open_flags; - fd_t *fd; +typedef struct { + uint32_t open_flags; + char migration_failed; + fd_t *activefd; } fuse_fd_ctx_t; -typedef void (*fuse_resume_fn_t) (fuse_state_t *state); +typedef void (*fuse_resume_fn_t)(fuse_state_t *state); GF_MUST_CHECK int32_t -fuse_loc_fill (loc_t *loc, fuse_state_t *state, ino_t ino, - ino_t par, const char *name); -call_frame_t *get_call_frame_for_req (fuse_state_t *state); -fuse_state_t *get_fuse_state (xlator_t *this, fuse_in_header_t *finh); -void free_fuse_state (fuse_state_t *state); -void gf_fuse_stat2attr (struct iatt *st, struct fuse_attr *fa); -uint64_t inode_to_fuse_nodeid (inode_t *inode); -xlator_t *fuse_state_subvol (fuse_state_t *state); -xlator_t *fuse_active_subvol (xlator_t *fuse); -inode_t *fuse_ino_to_inode (uint64_t ino, xlator_t *fuse); -int fuse_resolve_and_resume (fuse_state_t *state, fuse_resume_fn_t fn); -int send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error); -int fuse_gfid_set (fuse_state_t *state); -int fuse_flip_xattr_ns (struct fuse_private *priv, char *okey, char **nkey); -int fuse_flip_user_to_trusted (char *okey, char **nkey); -int fuse_xattr_alloc_default (char *okey, char **nkey); -fuse_fd_ctx_t * __fuse_fd_ctx_check_n_create (fd_t *fd, xlator_t *this); -fuse_fd_ctx_t * fuse_fd_ctx_check_n_create (fd_t *fd, xlator_t *this); +fuse_loc_fill(loc_t *loc, fuse_state_t *state, ino_t ino, ino_t par, + const char *name); +call_frame_t * +get_call_frame_for_req(fuse_state_t *state); +fuse_state_t * +get_fuse_state(xlator_t *this, fuse_in_header_t *finh); +void +free_fuse_state(fuse_state_t *state); +void +gf_fuse_stat2attr(struct iatt *st, struct fuse_attr *fa, + gf_boolean_t enable_ino32); +void +gf_fuse_fill_dirent(gf_dirent_t *entry, struct fuse_dirent *fde, + gf_boolean_t enable_ino32); +uint64_t +inode_to_fuse_nodeid(inode_t *inode); +xlator_t * +fuse_active_subvol(xlator_t *fuse); +inode_t * +fuse_ino_to_inode(uint64_t ino, xlator_t *fuse); +int +send_fuse_err(xlator_t *this, fuse_in_header_t *finh, int error); +int +fuse_gfid_set(fuse_state_t *state); +int +fuse_flip_xattr_ns(struct fuse_private *priv, char *okey, char **nkey); +fuse_fd_ctx_t * +__fuse_fd_ctx_check_n_create(xlator_t *this, fd_t *fd); +fuse_fd_ctx_t * +fuse_fd_ctx_check_n_create(xlator_t *this, fd_t *fd); + +int +fuse_resolve_and_resume(fuse_state_t *state, fuse_resume_fn_t fn); +int +fuse_resolve_inode_init(fuse_state_t *state, fuse_resolve_t *resolve, + ino_t ino); +int +fuse_resolve_entry_init(fuse_state_t *state, fuse_resolve_t *resolve, ino_t par, + char *name); +int +fuse_resolve_fd_init(fuse_state_t *state, fuse_resolve_t *resolve, fd_t *fd); +int +fuse_ignore_xattr_set(fuse_private_t *priv, char *key); +void +fuse_fop_resume(fuse_state_t *state); +int +dump_history_fuse(circular_buffer_t *cb, void *data); +int +fuse_check_selinux_cap_xattr(fuse_private_t *priv, char *name); #endif /* _GF_FUSE_BRIDGE_H_ */ diff --git a/xlators/mount/fuse/src/fuse-helpers.c b/xlators/mount/fuse/src/fuse-helpers.c index 9bf85f979c3..a2b0ad11fe4 100644 --- a/xlators/mount/fuse/src/fuse-helpers.c +++ b/xlators/mount/fuse/src/fuse-helpers.c @@ -1,505 +1,688 @@ /* - Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ +#ifdef __NetBSD__ +#define _KMEMUSER +#endif -#include "fuse-bridge.h" #if defined(GF_SOLARIS_HOST_OS) #include <sys/procfs.h> -#else +#elif defined(__FreeBSD__) +#include <sys/types.h> +#include <libutil.h> +#elif defined(CTL_KERN) #include <sys/sysctl.h> #endif +#include <pwd.h> + +#include "fuse-bridge.h" -xlator_t * -fuse_state_subvol (fuse_state_t *state) +static void +fuse_resolve_wipe(fuse_resolve_t *resolve) { - xlator_t *subvol = NULL; + GF_FREE((void *)resolve->path); - if (!state) - return NULL; + GF_FREE((void *)resolve->bname); - if (state->loc.inode) - subvol = state->loc.inode->table->xl; + if (resolve->fd) + fd_unref(resolve->fd); - if (state->fd) - subvol = state->fd->inode->table->xl; + loc_wipe(&resolve->resolve_loc); - return subvol; -} + if (resolve->hint) { + inode_unref(resolve->hint); + resolve->hint = 0; + } + if (resolve->parhint) { + inode_unref(resolve->parhint); + resolve->parhint = 0; + } +} -xlator_t * -fuse_active_subvol (xlator_t *fuse) +void +free_fuse_state(fuse_state_t *state) { - fuse_private_t *priv = NULL; - - priv = fuse->private; + xlator_t *this = NULL; + fuse_private_t *priv = NULL; + uint64_t winds = 0; + char switched = 0; + + this = state->this; + + priv = this->private; + + loc_wipe(&state->loc); + + loc_wipe(&state->loc2); + + if (state->xdata) { + dict_unref(state->xdata); + state->xdata = (void *)0xaaaaeeee; + } + if (state->xattr) + dict_unref(state->xattr); + + if (state->name) { + GF_FREE(state->name); + state->name = NULL; + } + if (state->fd) { + fd_unref(state->fd); + state->fd = (void *)0xfdfdfdfd; + } + if (state->finh) { + GF_FREE(state->finh); + state->finh = NULL; + } + + fuse_resolve_wipe(&state->resolve); + fuse_resolve_wipe(&state->resolve2); + + pthread_mutex_lock(&priv->sync_mutex); + { + winds = --state->active_subvol->winds; + switched = state->active_subvol->switched; + } + pthread_mutex_unlock(&priv->sync_mutex); + + if ((winds == 0) && (switched)) { + xlator_notify(state->active_subvol, GF_EVENT_PARENT_DOWN, + state->active_subvol, NULL); + } - return priv->active_subvol; +#ifdef DEBUG + memset(state, 0x90, sizeof(*state)); +#endif + GF_FREE(state); + state = NULL; } - - -static void -fuse_resolve_wipe (fuse_resolve_t *resolve) +fuse_state_t * +get_fuse_state(xlator_t *this, fuse_in_header_t *finh) { - struct fuse_resolve_comp *comp = NULL; + fuse_state_t *state = NULL; + xlator_t *active_subvol = NULL; + fuse_private_t *priv = NULL; - if (resolve->path) - GF_FREE ((void *)resolve->path); + state = (void *)GF_CALLOC(1, sizeof(*state), gf_fuse_mt_fuse_state_t); + if (!state) + return NULL; - if (resolve->bname) - GF_FREE ((void *)resolve->bname); + state->this = THIS; + priv = this->private; - if (resolve->resolved) - GF_FREE ((void *)resolve->resolved); + pthread_mutex_lock(&priv->sync_mutex); + { + while (priv->handle_graph_switch) + pthread_cond_wait(&priv->migrate_cond, &priv->sync_mutex); + active_subvol = fuse_active_subvol(state->this); + active_subvol->winds++; + } + pthread_mutex_unlock(&priv->sync_mutex); - loc_wipe (&resolve->resolve_loc); + state->active_subvol = active_subvol; + state->itable = active_subvol->itable; - comp = resolve->components; + state->pool = this->ctx->pool; + state->finh = finh; + state->this = this; - if (comp) { - int i = 0; + LOCK_INIT(&state->lock); - for (i = 0; comp[i].basename; i++) { - if (comp[i].inode) { - inode_unref (comp[i].inode); - comp[i].inode = NULL; - } - } - - GF_FREE ((void *)resolve->components); - } + return state; } void -free_fuse_state (fuse_state_t *state) +frame_fill_groups(call_frame_t *frame) { - loc_wipe (&state->loc); - - loc_wipe (&state->loc2); - - if (state->dict) { - dict_unref (state->dict); - state->dict = (void *)0xaaaaeeee; - } - if (state->name) { - GF_FREE (state->name); - state->name = NULL; - } - if (state->fd) { - fd_unref (state->fd); - state->fd = (void *)0xfdfdfdfd; +#if defined(GF_LINUX_HOST_OS) + xlator_t *this = frame->this; + fuse_private_t *priv = this->private; + char filename[32]; + char line[4096]; + char *ptr = NULL; + long int id = 0; + char *saveptr = NULL; + char *endptr = NULL; + int ret = 0; + int ngroups = 0; + gid_t *mygroups = NULL; + + if (priv->resolve_gids) { + struct passwd pwent; + char mystrs[1024]; + struct passwd *result; + + if (getpwuid_r(frame->root->uid, &pwent, mystrs, sizeof(mystrs), + &result) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "getpwuid_r(%u) " + "failed", + frame->root->uid); + return; } - if (state->finh) { - GF_FREE (state->finh); - state->finh = NULL; + if (result == 0) { + gf_log(this->name, GF_LOG_ERROR, + "getpwuid_r(%u): " + "no matching record", + frame->root->uid); + return; } - fuse_resolve_wipe (&state->resolve); - fuse_resolve_wipe (&state->resolve2); - -#ifdef DEBUG - memset (state, 0x90, sizeof (*state)); -#endif - GF_FREE (state); - state = NULL; -} - - -fuse_state_t * -get_fuse_state (xlator_t *this, fuse_in_header_t *finh) -{ - fuse_state_t *state = NULL; - - state = (void *)GF_CALLOC (1, sizeof (*state), - gf_fuse_mt_fuse_state_t); - if (!state) - return NULL; - state->pool = this->ctx->pool; - state->finh = finh; - state->this = this; + ngroups = gf_getgrouplist(result->pw_name, frame->root->gid, &mygroups); + if (ngroups == -1) { + gf_log(this->name, GF_LOG_ERROR, + "could not map %s to " + "group list (ngroups %d, max %d)", + result->pw_name, ngroups, GF_MAX_AUX_GROUPS); + return; + } - LOCK_INIT (&state->lock); + call_stack_set_groups(frame->root, ngroups, &mygroups); + } else { + FILE *fp = NULL; - return state; -} + ret = snprintf(filename, sizeof filename, "/proc/%d/status", + frame->root->pid); + if (ret >= sizeof filename) { + gf_log(this->name, GF_LOG_ERROR, "procfs path exceeds buffer size"); + goto out; + } + fp = fopen(filename, "r"); + if (!fp) { + gf_log(this->name, GF_LOG_ERROR, "failed to open %s: %s", filename, + strerror(errno)); + goto out; + } -void -frame_fill_groups (call_frame_t *frame) -{ -#if defined(GF_LINUX_HOST_OS) - char filename[32]; - char line[128]; - char *ptr = NULL; - FILE *fp = NULL; - int idx = 0; - long int id = 0; - char *saveptr = NULL; - char *endptr = NULL; - int ret = 0; - - ret = snprintf (filename, sizeof filename, "/proc/%d/status", frame->root->pid); - if (ret >= sizeof filename) - goto out; + for (;;) { + gf_boolean_t found_groups = _gf_false; + int idx = 0; - fp = fopen (filename, "r"); - if (!fp) + if (call_stack_alloc_groups(frame->root, ngroups) != 0) { + gf_log(this->name, GF_LOG_ERROR, + "failed to allocate gid buffer"); goto out; + } - while ((ptr = fgets (line, sizeof line, fp))) { - if (strncmp (ptr, "Groups:", 7) != 0) - continue; - - ptr = line + 8; - - for (ptr = strtok_r (ptr, " \t\r\n", &saveptr); - ptr; - ptr = strtok_r (NULL, " \t\r\n", &saveptr)) { - errno = 0; - id = strtol (ptr, &endptr, 0); - if (errno == ERANGE) - break; - if (!endptr || *endptr) - break; - frame->root->groups[idx++] = id; - if (idx == GF_REQUEST_MAXGROUPS) - break; + while ((ptr = fgets(line, sizeof line, fp))) { + if (strncmp(ptr, "Groups:", 7) == 0) { + found_groups = _gf_true; + break; } - + } + if (!found_groups) { + gf_log(this->name, GF_LOG_ERROR, "cannot find gid list in %s", + filename); + break; + } + ptr = line + 8; + + for (ptr = strtok_r(ptr, " \t\r\n", &saveptr); ptr; + ptr = strtok_r(NULL, " \t\r\n", &saveptr)) { + errno = 0; + id = strtol(ptr, &endptr, 0); + if (errno == ERANGE || !endptr || *endptr) { + gf_log(this->name, GF_LOG_ERROR, "failed to parse %s", + filename); + break; + } + if (idx < call_stack_groups_capacity(frame->root)) + frame->root->groups[idx] = id; + idx++; + if (idx == GF_MAX_AUX_GROUPS) + break; + } + if (idx > call_stack_groups_capacity(frame->root)) { + ngroups = idx; + rewind(fp); + } else { frame->root->ngrps = idx; break; + } } -out: + out: if (fp) - fclose (fp); + fclose(fp); + } #elif defined(GF_SOLARIS_HOST_OS) - char filename[32]; - char scratch[128]; - prcred_t *prcred = (prcred_t *) scratch; - FILE *fp = NULL; - int ret = 0; - - ret = snprintf (filename, sizeof filename, - "/proc/%d/cred", frame->root->pid); - - if (ret < sizeof filename) { - fp = fopen (filename, "r"); - if (fp != NULL) { - if (fgets (scratch, sizeof scratch, fp) != NULL) { - frame->root->ngrps = MIN(prcred->pr_ngroups, - GF_REQUEST_MAXGROUPS); - } - fclose (fp); - } - } + char filename[32]; + char scratch[128]; + prcred_t *prcred = (prcred_t *)scratch; + FILE *fp = NULL; + int ret = 0; + int ngrps; + + ret = snprintf(filename, sizeof filename, "/proc/%d/cred", + frame->root->pid); + + if (ret < sizeof filename) { + fp = fopen(filename, "r"); + if (fp != NULL) { + if (fgets(scratch, sizeof scratch, fp) != NULL) { + ngrps = MIN(prcred->pr_ngroups, GF_MAX_AUX_GROUPS); + if (call_stack_alloc_groups(frame->root, ngrps) != 0) { + fclose(fp); + return; + } + } + fclose(fp); + } + } #elif defined(CTL_KERN) /* DARWIN and *BSD */ - /* - N.B. CTL_KERN is an enum on Linux. (Meaning, if it's not - obvious, that it's not subject to preprocessor directives - like '#if defined'.) - Unlike Linux, on Mac OS and the BSDs it is a #define. We - could test to see that KERN_PROC is defined, but, barring any - evidence to the contrary, I think that's overkill. - We might also test that GF_DARWIN_HOST_OS is defined, why - limit this to just Mac OS. It's equally valid for the BSDs - and we do have people building on NetBSD and FreeBSD. - */ - int name[] = { CTL_KERN, KERN_PROC, KERN_PROC_PID, frame->root->pid }; - size_t namelen = sizeof name / sizeof name[0]; - struct kinfo_proc kp; - size_t kplen = sizeof(kp); - int i, ngroups; - - if (sysctl(name, namelen, &kp, &kplen, NULL, 0) != 0) - return; - ngroups = MIN(kp.kp_eproc.e_ucred.cr_ngroups, GF_REQUEST_MAXGROUPS); - for (i = 0; i < ngroups; i++) - frame->root->groups[i] = kp.kp_eproc.e_ucred.cr_groups[i]; - frame->root->ngrps = ngroups; + /* + N.B. CTL_KERN is an enum on Linux. (Meaning, if it's not + obvious, that it's not subject to preprocessor directives + like '#if defined'.) + Unlike Linux, on Mac OS and the BSDs it is a #define. We + could test to see that KERN_PROC is defined, but, barring any + evidence to the contrary, I think that's overkill. + We might also test that GF_DARWIN_HOST_OS is defined, why + limit this to just Mac OS. It's equally valid for the BSDs + and we do have people building on NetBSD and FreeBSD. + */ + int name[] = {CTL_KERN, KERN_PROC, KERN_PROC_PID, frame->root->pid}; + size_t namelen = sizeof name / sizeof name[0]; + struct kinfo_proc kp; + size_t kplen = sizeof(kp); + int i, ngroups; + + if (sysctl(name, namelen, &kp, &kplen, NULL, 0) != 0) + return; + ngroups = MIN(kp.kp_eproc.e_ucred.cr_ngroups, NGROUPS_MAX); + if (call_stack_alloc_groups(frame->root, ngroups) != 0) + return; + for (i = 0; i < ngroups; i++) + frame->root->groups[i] = kp.kp_eproc.e_ucred.cr_groups[i]; + frame->root->ngrps = ngroups; #else - frame->root->ngrps = 0; + frame->root->ngrps = 0; #endif /* GF_LINUX_HOST_OS */ } +/* + * Get the groups for the PID associated with this frame. If enabled, + * use the gid cache to reduce group list collection. + */ +static void +get_groups(fuse_private_t *priv, call_frame_t *frame) +{ + int i; + const gid_list_t *gl; + gid_list_t agl; + + if (!priv || !priv->gid_cache_timeout) { + frame_fill_groups(frame); + return; + } + + if (-1 == priv->gid_cache_timeout) { + frame->root->ngrps = 0; + return; + } + + gl = gid_cache_lookup(&priv->gid_cache, frame->root->pid, frame->root->uid, + frame->root->gid); + if (gl) { + if (call_stack_alloc_groups(frame->root, gl->gl_count) != 0) { + gid_cache_release(&priv->gid_cache, gl); + return; + } + frame->root->ngrps = gl->gl_count; + for (i = 0; i < gl->gl_count; i++) + frame->root->groups[i] = gl->gl_list[i]; + gid_cache_release(&priv->gid_cache, gl); + return; + } + + frame_fill_groups(frame); + + agl.gl_id = frame->root->pid; + agl.gl_uid = frame->root->uid; + agl.gl_gid = frame->root->gid; + agl.gl_count = frame->root->ngrps; + agl.gl_list = GF_CALLOC(frame->root->ngrps, sizeof(gid_t), + gf_fuse_mt_gids_t); + if (!agl.gl_list) + return; + + for (i = 0; i < frame->root->ngrps; i++) + agl.gl_list[i] = frame->root->groups[i]; + + if (gid_cache_add(&priv->gid_cache, &agl) != 1) + GF_FREE(agl.gl_list); +} call_frame_t * -get_call_frame_for_req (fuse_state_t *state) +get_call_frame_for_req(fuse_state_t *state) { - call_pool_t *pool = NULL; - fuse_in_header_t *finh = NULL; - call_frame_t *frame = NULL; - xlator_t *this = NULL; - fuse_private_t *priv = NULL; - - pool = state->pool; - finh = state->finh; - this = state->this; - priv = this->private; - - frame = create_frame (this, pool); - if (!frame) - return NULL; - - if (finh) { - frame->root->uid = finh->uid; - frame->root->gid = finh->gid; - frame->root->pid = finh->pid; - frame->root->lk_owner = state->lk_owner; - frame->root->unique = finh->unique; - } + call_pool_t *pool = NULL; + fuse_in_header_t *finh = NULL; + call_frame_t *frame = NULL; + xlator_t *this = NULL; + fuse_private_t *priv = NULL; - frame_fill_groups (frame); + pool = state->pool; + finh = state->finh; + this = state->this; + priv = this->private; - if (priv && priv->client_pid_set) - frame->root->pid = priv->client_pid; + frame = create_frame(this, pool); + if (!frame) + return NULL; - frame->root->type = GF_OP_TYPE_FOP; + if (finh) { + frame->root->uid = finh->uid; + frame->root->gid = finh->gid; + frame->root->pid = finh->pid; + set_lk_owner_from_uint64(&frame->root->lk_owner, state->lk_owner); + } - return frame; -} + get_groups(priv, frame); + + if (priv && priv->client_pid_set) + frame->root->pid = priv->client_pid; + + frame->root->type = GF_OP_TYPE_FOP; + return frame; +} inode_t * -fuse_ino_to_inode (uint64_t ino, xlator_t *fuse) +fuse_ino_to_inode(uint64_t ino, xlator_t *fuse) { - inode_t *inode = NULL; - xlator_t *active_subvol = NULL; - - if (ino == 1) { - active_subvol = fuse_active_subvol (fuse); - if (active_subvol) - inode = active_subvol->itable->root; - } else { - inode = (inode_t *) (unsigned long) ino; - inode_ref (inode); - } - - return inode; + inode_t *inode = NULL; + xlator_t *active_subvol = NULL; + + if (ino == 1) { + active_subvol = fuse_active_subvol(fuse); + if (active_subvol) + inode = active_subvol->itable->root; + } else { + inode = (inode_t *)(unsigned long)ino; + inode_ref(inode); + } + + return inode; } uint64_t -inode_to_fuse_nodeid (inode_t *inode) +inode_to_fuse_nodeid(inode_t *inode) { - if (!inode || __is_root_gfid (inode->gfid)) - return 1; + if (!inode) + return 0; + if (__is_root_gfid(inode->gfid)) + return 1; - return (unsigned long) inode; + return (unsigned long)inode; } - GF_MUST_CHECK int32_t -fuse_loc_fill (loc_t *loc, fuse_state_t *state, ino_t ino, - ino_t par, const char *name) +fuse_loc_fill(loc_t *loc, fuse_state_t *state, ino_t ino, ino_t par, + const char *name) { - inode_t *inode = NULL; - inode_t *parent = NULL; - int32_t ret = -1; - char *path = NULL; - uuid_t null_gfid = {0,}; - - /* resistance against multiple invocation of loc_fill not to get - reference leaks via inode_search() */ - - if (name) { - parent = loc->parent; - if (!parent) { - parent = fuse_ino_to_inode (par, state->this); - loc->parent = parent; - if (parent) - uuid_copy (loc->pargfid, parent->gfid); - } - - inode = loc->inode; - if (!inode) { - inode = inode_grep (parent->table, parent, name); - loc->inode = inode; - } - - ret = inode_path (parent, name, &path); - if (ret <= 0) { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "inode_path failed for %s/%s", - (parent)?uuid_utoa (parent->gfid):"0", name); - goto fail; - } - loc->path = path; - } else { - inode = loc->inode; - if (!inode) { - inode = fuse_ino_to_inode (ino, state->this); - loc->inode = inode; - if (inode) - uuid_copy (loc->gfid, inode->gfid); - } - - parent = loc->parent; - if (!parent) { - parent = inode_parent (inode, null_gfid, NULL); - loc->parent = parent; - if (parent) - uuid_copy (loc->pargfid, parent->gfid); + inode_t *inode = NULL; + inode_t *parent = NULL; + int32_t ret = -1; + char *path = NULL; + uuid_t null_gfid = { + 0, + }; + + /* resistance against multiple invocation of loc_fill not to get + reference leaks via inode_search() */ + + if (name) { + parent = loc->parent; + if (!parent) { + parent = fuse_ino_to_inode(par, state->this); + loc->parent = parent; + if (parent) + gf_uuid_copy(loc->pargfid, parent->gfid); + } - } + inode = loc->inode; + if (!inode && parent) { + inode = inode_grep(parent->table, parent, name); + loc->inode = inode; + } - ret = inode_path (inode, NULL, &path); - if (ret <= 0) { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, - "inode_path failed for %s", - (inode) ? uuid_utoa (inode->gfid) : "0"); - goto fail; - } - loc->path = path; + ret = inode_path(parent, name, &path); + if (ret <= 0) { + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "inode_path failed for %s/%s", + (parent) ? uuid_utoa(parent->gfid) : "0", name); + goto fail; + } + loc->path = path; + } else { + inode = loc->inode; + if (!inode) { + inode = fuse_ino_to_inode(ino, state->this); + loc->inode = inode; + if (inode) + gf_uuid_copy(loc->gfid, inode->gfid); } - if (loc->path) { - loc->name = strrchr (loc->path, '/'); - if (loc->name) - loc->name++; - else - loc->name = ""; + parent = loc->parent; + if (!parent) { + parent = inode_parent(inode, null_gfid, NULL); + loc->parent = parent; + if (parent) + gf_uuid_copy(loc->pargfid, parent->gfid); } - if ((ino != 1) && (parent == NULL)) { - gf_log ("fuse-bridge", GF_LOG_DEBUG, - "failed to search parent for %"PRId64"/%s (%"PRId64")", - (ino_t)par, name, (ino_t)ino); - ret = -1; - goto fail; + ret = inode_path(inode, NULL, &path); + if (ret <= 0) { + gf_log("glusterfs-fuse", GF_LOG_DEBUG, "inode_path failed for %s", + (inode) ? uuid_utoa(inode->gfid) : "0"); + goto fail; } - ret = 0; + loc->path = path; + } + + if (loc->path) { + loc->name = strrchr(loc->path, '/'); + if (loc->name) + loc->name++; + else + loc->name = ""; + } + + if ((ino != 1) && (parent == NULL)) { + gf_log("fuse-bridge", GF_LOG_DEBUG, + "failed to search parent for %" PRId64 "/%s (%" PRId64 ")", + (ino_t)par, name, (ino_t)ino); + ret = -1; + goto fail; + } + ret = 0; fail: - return ret; + /* this should not happen as inode_path returns -1 when buf is NULL + for sure */ + if (path && !loc->path) + GF_FREE(path); + return ret; } - /* courtesy of folly */ void -gf_fuse_stat2attr (struct iatt *st, struct fuse_attr *fa) +gf_fuse_stat2attr(struct iatt *st, struct fuse_attr *fa, + gf_boolean_t enable_ino32) { - fa->ino = st->ia_ino; - fa->size = st->ia_size; - fa->blocks = st->ia_blocks; - fa->atime = st->ia_atime; - fa->mtime = st->ia_mtime; - fa->ctime = st->ia_ctime; - fa->atimensec = st->ia_atime_nsec; - fa->mtimensec = st->ia_mtime_nsec; - fa->ctimensec = st->ia_ctime_nsec; - fa->mode = st_mode_from_ia (st->ia_prot, st->ia_type); - fa->nlink = st->ia_nlink; - fa->uid = st->ia_uid; - fa->gid = st->ia_gid; - fa->rdev = makedev (ia_major (st->ia_rdev), - ia_minor (st->ia_rdev)); + if (enable_ino32) + fa->ino = GF_FUSE_SQUASH_INO(st->ia_ino); + else + fa->ino = st->ia_ino; + + fa->size = st->ia_size; + fa->blocks = st->ia_blocks; + fa->atime = st->ia_atime; + fa->mtime = st->ia_mtime; + fa->ctime = st->ia_ctime; + fa->atimensec = st->ia_atime_nsec; + fa->mtimensec = st->ia_mtime_nsec; + fa->ctimensec = st->ia_ctime_nsec; + fa->mode = st_mode_from_ia(st->ia_prot, st->ia_type); + fa->nlink = st->ia_nlink; + fa->uid = st->ia_uid; + fa->gid = st->ia_gid; + fa->rdev = makedev(ia_major(st->ia_rdev), ia_minor(st->ia_rdev)); #if FUSE_KERNEL_MINOR_VERSION >= 9 - fa->blksize = st->ia_blksize; + fa->blksize = st->ia_blksize; #endif #ifdef GF_DARWIN_HOST_OS - fa->crtime = (uint64_t)-1; - fa->crtimensec = (uint32_t)-1; - fa->flags = 0; + fa->crtime = (uint64_t)-1; + fa->crtimensec = (uint32_t)-1; + fa->flags = 0; #endif } -int -fuse_flip_user_to_trusted (char *okey, char **nkey) +void +gf_fuse_fill_dirent(gf_dirent_t *entry, struct fuse_dirent *fde, + gf_boolean_t enable_ino32) { - int ret = 0; - char *key = NULL; + if (enable_ino32) + fde->ino = GF_FUSE_SQUASH_INO(entry->d_ino); + else + fde->ino = entry->d_ino; + + fde->off = entry->d_off; + fde->type = entry->d_type; + fde->namelen = strlen(entry->d_name); + (void)memcpy(fde->name, entry->d_name, fde->namelen); +} - key = GF_CALLOC (1, strlen(okey) + 10, gf_common_mt_char); - if (!key) { - ret = -1; - goto out; - } +static int +fuse_do_flip_xattr_ns(char *okey, const char *nns, char **nkey) +{ + int ret = 0; + char *key = NULL; + + okey = strchr(okey, '.'); + GF_ASSERT(okey); - okey += 5; - strncpy(key, "trusted.", 8); - strncat(key+8, okey, strlen(okey)); + int key_len = strlen(nns) + strlen(okey); + key = GF_MALLOC(key_len + 1, gf_common_mt_char); + if (!key) { + ret = -1; + goto out; + } - *nkey = key; + strcpy(key, nns); + strcat(key, okey); - out: - return ret; + *nkey = key; + +out: + return ret; } -int -fuse_xattr_alloc_default (char *okey, char **nkey) +static int +fuse_xattr_alloc_default(char *okey, char **nkey) { - int ret = 0; + int ret = 0; - *nkey = gf_strdup (okey); - if (!*nkey) - ret = -1; - return ret; + *nkey = gf_strdup(okey); + if (!*nkey) + ret = -1; + return ret; } +#define PRIV_XA_NS "trusted" +#define UNPRIV_XA_NS "system" + int -fuse_flip_xattr_ns (fuse_private_t *priv, char *okey, char **nkey) +fuse_flip_xattr_ns(fuse_private_t *priv, char *okey, char **nkey) { - int ret = 0; - gf_boolean_t need_flip = _gf_false; - gf_client_pid_t npid = 0; + int ret = 0; + gf_boolean_t need_flip = _gf_false; + + if (GF_CLIENT_PID_GSYNCD == priv->client_pid) { + /* valid xattr(s): *xtime, volume-mark* */ + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "PID: %d, checking xattr(s): " + "volume-mark*, *xtime", + priv->client_pid); + if ((strcmp(okey, UNPRIV_XA_NS ".glusterfs.volume-mark") == 0) || + (fnmatch(UNPRIV_XA_NS ".glusterfs.volume-mark.*", okey, + FNM_PERIOD) == 0) || + (fnmatch(UNPRIV_XA_NS ".glusterfs.*.xtime", okey, FNM_PERIOD) == 0)) + need_flip = _gf_true; + } + + if (need_flip) { + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "flipping %s to " PRIV_XA_NS " equivalent", okey); + ret = fuse_do_flip_xattr_ns(okey, PRIV_XA_NS, nkey); + } else { + /* if we cannot match, continue with what we got */ + ret = fuse_xattr_alloc_default(okey, nkey); + } + + return ret; +} - npid = priv->client_pid; - if (gf_client_pid_check (npid)) { - ret = fuse_xattr_alloc_default (okey, nkey); - goto out; - } +int +fuse_ignore_xattr_set(fuse_private_t *priv, char *key) +{ + int ret = 0; - switch (npid) { - /* - * These two cases will never execute as we check the - * pid range above, but are kept to keep the compiler - * happy. - */ - case GF_CLIENT_PID_MAX: - case GF_CLIENT_PID_MIN: - goto out; + /* don't mess with user namespace */ + if (fnmatch("user.*", key, FNM_PERIOD) == 0) + goto out; - case GF_CLIENT_PID_GSYNCD: - /* valid xattr(s): *xtime, volume-mark* */ - gf_log("glusterfs-fuse", GF_LOG_DEBUG, "PID: %d, checking xattr(s): " - "volume-mark*, *xtime", npid); - if ( (strcmp (okey, "user.glusterfs.volume-mark") == 0) - || (fnmatch (okey, "user.glusterfs.volume-mark.*", FNM_PERIOD) == 0) - || (fnmatch ("user.glusterfs.*.xtime", okey, FNM_PERIOD) == 0) ) - need_flip = _gf_true; - break; + if (priv->client_pid != GF_CLIENT_PID_GSYNCD) + goto out; - case GF_CLIENT_PID_HADOOP: - /* valid xattr(s): pathinfo */ - gf_log("glusterfs-fuse", GF_LOG_DEBUG, "PID: %d, checking xattr(s): " - "pathinfo", npid); - if (strcmp (okey, "user.glusterfs.pathinfo") == 0) - need_flip = _gf_true; - break; - } + /* trusted NS check */ + if (!((fnmatch("*.glusterfs.*.xtime", key, FNM_PERIOD) == 0) || + (fnmatch("*.glusterfs.volume-mark", key, FNM_PERIOD) == 0) || + (fnmatch("*.glusterfs.volume-mark.*", key, FNM_PERIOD) == 0) || + (fnmatch("system.posix_acl_access", key, FNM_PERIOD) == 0) || + (fnmatch("glusterfs.gfid.newfile", key, FNM_PERIOD) == 0) || + (fnmatch("*.glusterfs.shard.block-size", key, FNM_PERIOD) == 0) || + (fnmatch("*.glusterfs.shard.file-size", key, FNM_PERIOD) == 0))) + ret = -1; - if (need_flip) { - gf_log ("glusterfs-fuse", GF_LOG_DEBUG, "flipping %s to trusted equivalent", - okey); - ret = fuse_flip_user_to_trusted (okey, nkey); - } else { - /* if we cannot match, continue with what we got */ - ret = fuse_xattr_alloc_default (okey, nkey); - } - out: - return ret; +out: + gf_log("glusterfs-fuse", GF_LOG_DEBUG, + "%s setxattr: key [%s], " + " client pid [%d]", + (ret ? "disallowing" : "allowing"), key, priv->client_pid); + + return ret; +} + +int +fuse_check_selinux_cap_xattr(fuse_private_t *priv, char *name) +{ + int ret = -1; + + if (strcmp(name, "security.selinux") && + strcmp(name, "security.capability")) { + /* if xattr name is not of interest, no validations needed */ + ret = 0; + goto out; + } + + if ((strcmp(name, "security.selinux") == 0) && (priv->selinux)) { + ret = 0; + } + + if ((strcmp(name, "security.capability") == 0) && + ((priv->capability) || (priv->selinux))) { + ret = 0; + } + +out: + return ret; } diff --git a/xlators/mount/fuse/src/fuse-mem-types.h b/xlators/mount/fuse/src/fuse-mem-types.h index 1fb959c3a44..4fd8e58c523 100644 --- a/xlators/mount/fuse/src/fuse-mem-types.h +++ b/xlators/mount/fuse/src/fuse-mem-types.h @@ -1,36 +1,31 @@ /* - Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ - #ifndef __FUSE_MEM_TYPES_H__ #define __FUSE_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_fuse_mem_types_ { - gf_fuse_mt_iovec = gf_common_mt_end + 1, - gf_fuse_mt_fuse_private_t, - gf_fuse_mt_char, - gf_fuse_mt_iov_base, - gf_fuse_mt_fuse_state_t, - gf_fuse_mt_fd_ctx_t, - gf_fuse_mt_end + gf_fuse_mt_iovec = gf_common_mt_end + 1, + gf_fuse_mt_fuse_private_t, + gf_fuse_mt_char, + gf_fuse_mt_iov_base, + gf_fuse_mt_fuse_state_t, + gf_fuse_mt_fd_ctx_t, + gf_fuse_mt_graph_switch_args_t, + gf_fuse_mt_gids_t, + gf_fuse_mt_invalidate_node_t, + gf_fuse_mt_pthread_t, + gf_fuse_mt_timed_message_t, + gf_fuse_mt_interrupt_record_t, + gf_fuse_mt_end }; #endif - diff --git a/xlators/mount/fuse/src/fuse-resolve.c b/xlators/mount/fuse/src/fuse-resolve.c index 755e2f429f1..6206fd47325 100644 --- a/xlators/mount/fuse/src/fuse-resolve.c +++ b/xlators/mount/fuse/src/fuse-resolve.c @@ -1,581 +1,683 @@ /* - Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif + Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #include "fuse-bridge.h" static int -fuse_resolve_all (fuse_state_t *state); +fuse_resolve_all(fuse_state_t *state); -int fuse_resolve_continue (fuse_state_t *state); -int fuse_resolve_entry_simple (fuse_state_t *state); -int fuse_resolve_inode_simple (fuse_state_t *state); +int +fuse_resolve_continue(fuse_state_t *state); +int +fuse_resolve_entry_simple(fuse_state_t *state); +int +fuse_resolve_inode_simple(fuse_state_t *state); +int +fuse_migrate_fd(xlator_t *this, fd_t *fd, xlator_t *old_subvol, + xlator_t *new_subvol); +fuse_fd_ctx_t * +fuse_fd_ctx_get(xlator_t *this, fd_t *fd); static int -fuse_resolve_loc_touchup (fuse_state_t *state) +fuse_resolve_loc_touchup(fuse_state_t *state) { - fuse_resolve_t *resolve = NULL; - loc_t *loc = NULL; - char *path = NULL; - int ret = 0; - - resolve = state->resolve_now; - loc = state->loc_now; - - if (!loc->path) { - if (loc->parent && resolve->bname) { - ret = inode_path (loc->parent, resolve->bname, &path); - } else if (loc->inode) { - ret = inode_path (loc->inode, NULL, &path); - } - if (ret) - gf_log (THIS->name, GF_LOG_TRACE, - "return value inode_path %d", ret); - loc->path = path; - } + fuse_resolve_t *resolve = NULL; + loc_t *loc = NULL; - return 0; -} + resolve = state->resolve_now; + loc = state->loc_now; + loc_touchup(loc, resolve->bname); + return 0; +} int -fuse_resolve_gfid_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, - struct iatt *buf, dict_t *xattr, - struct iatt *postparent) +fuse_resolve_entry_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *buf, dict_t *xattr, struct iatt *postparent) { - fuse_state_t *state = NULL; - fuse_resolve_t *resolve = NULL; - inode_t *link_inode = NULL; - loc_t *resolve_loc = NULL; - - state = frame->root->state; - resolve = state->resolve_now; - resolve_loc = &resolve->resolve_loc; - - STACK_DESTROY (frame->root); - - if (op_ret == -1) { - gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG : - GF_LOG_WARNING), - "%s/%s: failed to resolve (%s)", - uuid_utoa (resolve_loc->pargfid), resolve_loc->name, - strerror (op_errno)); - goto out; - } + fuse_state_t *state = NULL; + fuse_resolve_t *resolve = NULL; + inode_t *link_inode = NULL; + loc_t *resolve_loc = NULL; + uint64_t ctx_value = LOOKUP_NOT_NEEDED; + + state = frame->root->state; + resolve = state->resolve_now; + resolve_loc = &resolve->resolve_loc; + + STACK_DESTROY(frame->root); + + if (op_ret == -1) { + gf_log(this->name, (op_errno == ENOENT) ? GF_LOG_DEBUG : GF_LOG_WARNING, + "%s/%s: failed to resolve (%s)", uuid_utoa(resolve_loc->pargfid), + resolve_loc->name, strerror(op_errno)); + resolve->op_ret = -1; + resolve->op_errno = op_errno; + goto out; + } + + link_inode = inode_link(inode, resolve_loc->parent, resolve_loc->name, buf); + if (link_inode == inode) + inode_ctx_set(link_inode, this, &ctx_value); + state->loc_now->inode = link_inode; + +out: + loc_wipe(resolve_loc); + + fuse_resolve_continue(state); + return 0; +} - link_inode = inode_link (inode, resolve_loc->parent, - resolve_loc->name, buf); +int +fuse_resolve_entry(fuse_state_t *state) +{ + fuse_resolve_t *resolve = NULL; + loc_t *resolve_loc = NULL; - if (!link_inode) - goto out; + resolve = state->resolve_now; + resolve_loc = &resolve->resolve_loc; - inode_lookup (link_inode); + resolve_loc->parent = inode_ref(state->loc_now->parent); + gf_uuid_copy(resolve_loc->pargfid, state->loc_now->pargfid); + resolve_loc->name = resolve->bname; - inode_unref (link_inode); + resolve_loc->inode = inode_grep(state->itable, resolve->parhint, + resolve->bname); + if (!resolve_loc->inode) { + resolve_loc->inode = inode_new(state->itable); + } + inode_path(resolve_loc->parent, resolve_loc->name, + (char **)&resolve_loc->path); -out: - loc_wipe (resolve_loc); + FUSE_FOP(state, fuse_resolve_entry_cbk, GF_FOP_LOOKUP, lookup, resolve_loc, + NULL); - fuse_resolve_continue (state); - return 0; + return 0; } - int -fuse_resolve_gfid_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, inode_t *inode, struct iatt *buf, - dict_t *xattr, struct iatt *postparent) +fuse_resolve_gfid_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, + struct iatt *buf, dict_t *xattr, struct iatt *postparent) { - fuse_state_t *state = NULL; - fuse_resolve_t *resolve = NULL; - inode_t *link_inode = NULL; - loc_t *resolve_loc = NULL; - - state = frame->root->state; - resolve = state->resolve_now; - resolve_loc = &resolve->resolve_loc; - - STACK_DESTROY (frame->root); - - if (op_ret == -1) { - gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG : - GF_LOG_WARNING), - "%s: failed to resolve (%s)", - uuid_utoa (resolve_loc->gfid), strerror (op_errno)); - loc_wipe (&resolve->resolve_loc); - goto out; + fuse_state_t *state = NULL; + fuse_resolve_t *resolve = NULL; + inode_t *link_inode = NULL; + loc_t *loc_now = NULL; + inode_t *tmp_inode = NULL; + uint64_t ctx_value = LOOKUP_NOT_NEEDED; + + state = frame->root->state; + resolve = state->resolve_now; + loc_now = state->loc_now; + + STACK_DESTROY(frame->root); + + if (op_ret == -1) { + gf_log(this->name, (op_errno == ENOENT) ? GF_LOG_DEBUG : GF_LOG_WARNING, + "%s: failed to resolve (%s)", + uuid_utoa(resolve->resolve_loc.gfid), strerror(op_errno)); + loc_wipe(&resolve->resolve_loc); + + /* resolve->op_ret can have 3 values: 0, -1, -2. + * 0 : resolution was successful. + * -1: parent inode could not be resolved. + * -2: entry (inode corresponding to path) could not be resolved + */ + + if (gf_uuid_is_null(resolve->gfid)) { + resolve->op_ret = -1; + } else { + resolve->op_ret = -2; } - loc_wipe (resolve_loc); + resolve->op_errno = op_errno; + goto out; + } - link_inode = inode_link (inode, NULL, NULL, buf); + link_inode = inode_link(inode, NULL, NULL, buf); + if (link_inode == inode) + inode_ctx_set(link_inode, this, &ctx_value); - if (!link_inode) - goto out; + loc_wipe(&resolve->resolve_loc); - inode_lookup (link_inode); - - if (uuid_is_null (resolve->pargfid)) { - inode_unref (link_inode); - goto out; - } + if (!link_inode) + goto out; - resolve_loc->parent = link_inode; - uuid_copy (resolve_loc->pargfid, resolve_loc->parent->gfid); + if (!gf_uuid_is_null(resolve->gfid)) { + loc_now->inode = link_inode; + goto out; + } - resolve_loc->name = resolve->bname; + loc_now->parent = link_inode; + gf_uuid_copy(loc_now->pargfid, link_inode->gfid); - resolve_loc->inode = inode_new (state->itable); - inode_path (resolve_loc->parent, resolve_loc->name, - (char **) &resolve_loc->path); + tmp_inode = inode_grep(state->itable, link_inode, resolve->bname); + if (tmp_inode && (!inode_needs_lookup(tmp_inode, THIS))) { + loc_now->inode = tmp_inode; + goto out; + } - FUSE_FOP (state, fuse_resolve_gfid_entry_cbk, GF_FOP_LOOKUP, - lookup, &resolve->resolve_loc, NULL); + inode_unref(tmp_inode); + fuse_resolve_entry(state); - return 0; + return 0; out: - fuse_resolve_continue (state); - return 0; + fuse_resolve_continue(state); + return 0; +} + +int +fuse_resolve_gfid(fuse_state_t *state) +{ + fuse_resolve_t *resolve = NULL; + loc_t *resolve_loc = NULL; + int ret = 0; + + resolve = state->resolve_now; + resolve_loc = &resolve->resolve_loc; + + if (!gf_uuid_is_null(resolve->pargfid)) { + gf_uuid_copy(resolve_loc->gfid, resolve->pargfid); + } else if (!gf_uuid_is_null(resolve->gfid)) { + gf_uuid_copy(resolve_loc->gfid, resolve->gfid); + } + + /* inode may already exist in case we are looking up an inode which was + linked through readdirplus */ + resolve_loc->inode = inode_find(state->itable, resolve_loc->gfid); + if (!resolve_loc->inode) + resolve_loc->inode = inode_new(state->itable); + ret = loc_path(resolve_loc, NULL); + + if (ret <= 0) { + gf_log(THIS->name, GF_LOG_WARNING, + "failed to get the path for inode %s", uuid_utoa(resolve->gfid)); + } + + FUSE_FOP(state, fuse_resolve_gfid_cbk, GF_FOP_LOOKUP, lookup, resolve_loc, + NULL); + + return 0; } +/* + * Return value: + * 0 - resolved parent and entry (as necessary) + * -1 - resolved parent but not entry (though necessary) + * 1 - resolved neither parent nor entry + */ int -fuse_resolve_gfid (fuse_state_t *state) +fuse_resolve_parent_simple(fuse_state_t *state) { - fuse_resolve_t *resolve = NULL; - loc_t *resolve_loc = NULL; - int ret = 0; - - resolve = state->resolve_now; - resolve_loc = &resolve->resolve_loc; - - if (!uuid_is_null (resolve->pargfid)) { - uuid_copy (resolve_loc->gfid, resolve->pargfid); - resolve_loc->inode = inode_new (state->itable); - ret = inode_path (resolve_loc->inode, NULL, - (char **)&resolve_loc->path); - } else if (!uuid_is_null (resolve->gfid)) { - uuid_copy (resolve_loc->gfid, resolve->gfid); - resolve_loc->inode = inode_new (state->itable); - ret = inode_path (resolve_loc->inode, NULL, - (char **)&resolve_loc->path); - } - if (ret <= 0) { - gf_log (THIS->name, GF_LOG_WARNING, - "failed to get the path from inode %s", - uuid_utoa (resolve->gfid)); + fuse_resolve_t *resolve = NULL; + loc_t *loc = NULL; + inode_t *parent = NULL; + inode_t *inode = NULL; + xlator_t *this = NULL; + + resolve = state->resolve_now; + loc = state->loc_now; + this = state->this; + + loc->name = resolve->bname; + + parent = resolve->parhint; + if (parent->table == state->itable) { + if (inode_needs_lookup(parent, THIS)) + return 1; + + /* no graph switches since */ + loc->parent = inode_ref(parent); + gf_uuid_copy(loc->pargfid, parent->gfid); + loc->inode = inode_grep(state->itable, parent, loc->name); + + /* nodeid for root is 1 and we blindly take the latest graph's + * table->root as the parhint and because of this there is + * ambiguity whether the entry should have existed or not, and + * we took the conservative approach of assuming entry should + * have been there even though it need not have (bug #804592). + */ + + if (loc->inode && inode_needs_lookup(loc->inode, THIS)) { + inode_unref(loc->inode); + loc->inode = NULL; + return -1; } - FUSE_FOP (state, fuse_resolve_gfid_cbk, GF_FOP_LOOKUP, - lookup, &resolve->resolve_loc, NULL); + if ((loc->inode == NULL) && __is_root_gfid(parent->gfid)) { + /* non decisive result - entry missing */ + return -1; + } + /* decisive result - resolution success */ return 0; -} + } + + parent = inode_find(state->itable, resolve->pargfid); + if (!parent) { + /* non decisive result - parent missing */ + return 1; + } + if (inode_needs_lookup(parent, THIS)) { + inode_unref(parent); + return 1; + } + + loc->parent = parent; + gf_uuid_copy(loc->pargfid, resolve->pargfid); + + inode = inode_grep(state->itable, parent, loc->name); + if (inode && !inode_needs_lookup(inode, this)) { + loc->inode = inode; + /* decisive result - resolution success */ + return 0; + } + /* non decisive result - entry missing */ + return -1; +} int -fuse_resolve_continue (fuse_state_t *state) +fuse_resolve_parent(fuse_state_t *state) { - fuse_resolve_t *resolve = NULL; - int ret = 0; - - resolve = state->resolve_now; + int ret = 0; - resolve->op_ret = 0; - resolve->op_errno = 0; - - /* TODO: should we handle 'fd' here ? */ - if (!uuid_is_null (resolve->pargfid)) - ret = fuse_resolve_entry_simple (state); - else if (!uuid_is_null (resolve->gfid)) - ret = fuse_resolve_inode_simple (state); - if (ret) - gf_log (THIS->name, GF_LOG_DEBUG, - "return value of resolve_*_simple %d", ret); + ret = fuse_resolve_parent_simple(state); + if (ret > 0) { + fuse_resolve_gfid(state); + return 0; + } - fuse_resolve_loc_touchup (state); + if (ret < 0) { + fuse_resolve_entry(state); + return 0; + } - fuse_resolve_all (state); + fuse_resolve_continue(state); - return 0; + return 0; } - -/* - Check if the requirements are fulfilled by entries in the inode cache itself - Return value: - <= 0 - simple resolution was decisive and complete (either success or failure) - > 0 - indecisive, need to perform deep resolution -*/ +int +fuse_resolve_inode_simple(fuse_state_t *state) +{ + fuse_resolve_t *resolve = NULL; + loc_t *loc = NULL; + inode_t *inode = NULL; + + resolve = state->resolve_now; + loc = state->loc_now; + + inode = resolve->hint; + if (inode->table == state->itable) + inode_ref(inode); + else + inode = inode_find(state->itable, resolve->gfid); + + if (inode) { + if (!inode_needs_lookup(inode, THIS)) + goto found; + /* inode was linked through readdirplus */ + inode_unref(inode); + } + + return 1; +found: + loc->inode = inode; + return 0; +} int -fuse_resolve_entry_simple (fuse_state_t *state) +fuse_resolve_inode(fuse_state_t *state) { - fuse_resolve_t *resolve = NULL; - inode_t *parent = NULL; - inode_t *inode = NULL; - int ret = 0; - - resolve = state->resolve_now; - - parent = inode_find (state->itable, resolve->pargfid); - if (!parent) { - /* simple resolution is indecisive. need to perform - deep resolution */ - resolve->op_ret = -1; - resolve->op_errno = ENOENT; - ret = 1; - goto out; - } + int ret = 0; - /* expected @parent was found from the inode cache */ - if (state->loc_now->parent) { - inode_unref (state->loc_now->parent); - } + ret = fuse_resolve_inode_simple(state); - state->loc_now->parent = inode_ref (parent); + if (ret > 0) { + fuse_resolve_gfid(state); + return 0; + } - inode = inode_grep (state->itable, parent, resolve->bname); - if (!inode) { - resolve->op_ret = -1; - resolve->op_errno = ENOENT; - ret = 1; - goto out; - } + fuse_resolve_continue(state); - ret = 0; + return 0; +} - if (state->loc_now->inode) { - inode_unref (state->loc_now->inode); - state->loc_now->inode = NULL; +int +fuse_migrate_fd_task(void *data) +{ + int ret = -1; + fuse_state_t *state = NULL; + fd_t *basefd = NULL, *oldfd = NULL; + fuse_fd_ctx_t *basefd_ctx = NULL; + xlator_t *old_subvol = NULL; + + state = data; + if (state == NULL) { + goto out; + } + + basefd = state->fd; + + basefd_ctx = fuse_fd_ctx_get(state->this, basefd); + if (!basefd_ctx) + goto out; + + LOCK(&basefd->lock); + { + oldfd = basefd_ctx->activefd ? basefd_ctx->activefd : basefd; + fd_ref(oldfd); + } + UNLOCK(&basefd->lock); + + old_subvol = oldfd->inode->table->xl; + + ret = fuse_migrate_fd(state->this, basefd, old_subvol, + state->active_subvol); + + LOCK(&basefd->lock); + { + if (ret < 0) { + basefd_ctx->migration_failed = 1; + } else { + basefd_ctx->migration_failed = 0; } + } + UNLOCK(&basefd->lock); - state->loc_now->inode = inode_ref (inode); - uuid_copy (state->loc_now->gfid, resolve->gfid); + ret = 0; out: - if (parent) - inode_unref (parent); + if (oldfd) + fd_unref(oldfd); - if (inode) - inode_unref (inode); - - return ret; + return ret; } - -int -fuse_resolve_entry (fuse_state_t *state) +static int +fuse_migrate_fd_error(xlator_t *this, fd_t *fd) { - int ret = 0; - loc_t *loc = NULL; + fuse_fd_ctx_t *fdctx = NULL; + char error = 0; - loc = state->loc_now; - - ret = fuse_resolve_entry_simple (state); - if (ret > 0) { - loc_wipe (loc); - fuse_resolve_gfid (state); - return 0; + fdctx = fuse_fd_ctx_get(this, fd); + if (fdctx != NULL) { + if (fdctx->migration_failed) { + error = 1; } + } - if (ret == 0) - fuse_resolve_loc_touchup (state); - - fuse_resolve_all (state); - - return 0; + return error; } +#define FUSE_FD_GET_ACTIVE_FD(activefd, basefd) \ + do { \ + LOCK(&basefd->lock); \ + { \ + activefd = basefd_ctx->activefd ? basefd_ctx->activefd : basefd; \ + if (activefd != basefd) { \ + fd_ref(activefd); \ + } \ + } \ + UNLOCK(&basefd->lock); \ + \ + if (activefd == basefd) { \ + fd_ref(activefd); \ + } \ + } while (0); -int -fuse_resolve_inode_simple (fuse_state_t *state) +static int +fuse_resolve_fd(fuse_state_t *state) { - fuse_resolve_t *resolve = NULL; - inode_t *inode = NULL; - int ret = 0; - - resolve = state->resolve_now; - - inode = inode_find (state->itable, resolve->gfid); - if (!inode) { - resolve->op_ret = -1; - resolve->op_errno = ENOENT; - ret = 1; - goto out; + fuse_resolve_t *resolve = NULL; + fd_t *basefd = NULL, *activefd = NULL; + xlator_t *active_subvol = NULL, *this = NULL; + int ret = 0; + char fd_migration_error = 0; + fuse_fd_ctx_t *basefd_ctx = NULL; + + resolve = state->resolve_now; + + this = state->this; + + basefd = resolve->fd; + basefd_ctx = fuse_fd_ctx_get(this, basefd); + if (basefd_ctx == NULL) { + gf_log(state->this->name, GF_LOG_WARNING, + "fdctx is NULL for basefd (ptr:%p inode-gfid:%s), " + "resolver erroring out with errno EINVAL", + basefd, uuid_utoa(basefd->inode->gfid)); + resolve->op_ret = -1; + resolve->op_errno = EINVAL; + goto resolve_continue; + } + + FUSE_FD_GET_ACTIVE_FD(activefd, basefd); + + active_subvol = activefd->inode->table->xl; + + fd_migration_error = fuse_migrate_fd_error(state->this, basefd); + if (fd_migration_error) { + resolve->op_ret = -1; + resolve->op_errno = EBADF; + } else if (state->active_subvol != active_subvol) { + ret = synctask_new(state->this->ctx->env, fuse_migrate_fd_task, NULL, + NULL, state); + + fd_migration_error = fuse_migrate_fd_error(state->this, basefd); + fd_unref(activefd); + + FUSE_FD_GET_ACTIVE_FD(activefd, basefd); + active_subvol = activefd->inode->table->xl; + + if ((ret == -1) || fd_migration_error || + (state->active_subvol != active_subvol)) { + if (ret == -1) { + gf_log(state->this->name, GF_LOG_WARNING, + "starting sync-task to migrate " + "basefd (ptr:%p inode-gfid:%s) failed " + "(old-subvolume:%s-%d " + "new-subvolume:%s-%d)", + basefd, uuid_utoa(basefd->inode->gfid), + active_subvol->name, active_subvol->graph->id, + state->active_subvol->name, + state->active_subvol->graph->id); + } else { + gf_log(state->this->name, GF_LOG_WARNING, + "fd migration of basefd " + "(ptr:%p inode-gfid:%s) failed " + "(old-subvolume:%s-%d " + "new-subvolume:%s-%d)", + basefd, uuid_utoa(basefd->inode->gfid), + active_subvol->name, active_subvol->graph->id, + state->active_subvol->name, + state->active_subvol->graph->id); + } + + resolve->op_ret = -1; + resolve->op_errno = EBADF; + } else { + gf_log(state->this->name, GF_LOG_DEBUG, + "basefd (ptr:%p inode-gfid:%s) migrated " + "successfully in resolver " + "(old-subvolume:%s-%d new-subvolume:%s-%d)", + basefd, uuid_utoa(basefd->inode->gfid), active_subvol->name, + active_subvol->graph->id, state->active_subvol->name, + state->active_subvol->graph->id); } + } - ret = 0; + if ((resolve->op_ret == -1) && (resolve->op_errno == EBADF)) { + gf_log("fuse-resolve", GF_LOG_WARNING, + "migration of basefd (ptr:%p inode-gfid:%s) " + "did not complete, failing fop with EBADF " + "(old-subvolume:%s-%d new-subvolume:%s-%d)", + basefd, uuid_utoa(basefd->inode->gfid), active_subvol->name, + active_subvol->graph->id, state->active_subvol->name, + state->active_subvol->graph->id); + } - if (state->loc_now->inode) { - inode_unref (state->loc_now->inode); - } + if (activefd != basefd) { + state->fd = fd_ref(activefd); + fd_unref(basefd); + } - state->loc_now->inode = inode_ref (inode); - uuid_copy (state->loc_now->gfid, resolve->gfid); + /* state->active_subvol = active_subvol; */ -out: - if (inode) - inode_unref (inode); +resolve_continue: + if (activefd != NULL) { + fd_unref(activefd); + } - return ret; -} + fuse_resolve_continue(state); + return 0; +} int -fuse_resolve_inode (fuse_state_t *state) +fuse_gfid_set(fuse_state_t *state) { - int ret = 0; - loc_t *loc = NULL; + int ret = 0; - loc = state->loc_now; + if (gf_uuid_is_null(state->gfid)) + goto out; - ret = fuse_resolve_inode_simple (state); + if (!state->xdata) + state->xdata = dict_new(); - if (ret > 0) { - loc_wipe (loc); - fuse_resolve_gfid (state); - return 0; - } - - if (ret == 0) - fuse_resolve_loc_touchup (state); + if (!state->xdata) { + ret = -1; + goto out; + } - fuse_resolve_all (state); - - return 0; + ret = dict_set_gfuuid(state->xdata, "gfid-req", state->gfid, true); +out: + return ret; } -static int -fuse_resolve_fd (fuse_state_t *state) +int +fuse_resolve_entry_init(fuse_state_t *state, fuse_resolve_t *resolve, ino_t par, + char *name) { - fuse_resolve_t *resolve = NULL; - fd_t *fd = NULL; - int ret = 0; - uint64_t tmp_fd_ctx = 0; - char *path = NULL; - char *name = NULL; - - resolve = state->resolve_now; - - fd = resolve->fd; - - ret = fd_ctx_get (fd, state->this, &tmp_fd_ctx); - if (!ret) { - state->fd = (fd_t *)(long)tmp_fd_ctx; - fd_ref (state->fd); - fuse_resolve_all (state); - goto out; - } + inode_t *parent = NULL; - ret = inode_path (fd->inode, 0, &path); - if (ret <= 0) - gf_log ("", GF_LOG_WARNING, - "failed to do inode-path on fd %d %s", ret, path); + parent = fuse_ino_to_inode(par, state->this); + gf_uuid_copy(resolve->pargfid, parent->gfid); + resolve->parhint = parent; + resolve->bname = gf_strdup(name); - name = strrchr (path, '/'); - if (name) - name++; - - resolve->path = path; - resolve->bname = gf_strdup (name); - - state->loc_now = &state->loc; - -out: - return 0; + return 0; } +int +fuse_resolve_inode_init(fuse_state_t *state, fuse_resolve_t *resolve, ino_t ino) +{ + inode_t *inode = NULL; -static int -fuse_resolve (fuse_state_t *state) - { - fuse_resolve_t *resolve = NULL; - - resolve = state->resolve_now; + inode = fuse_ino_to_inode(ino, state->this); + gf_uuid_copy(resolve->gfid, inode->gfid); + resolve->hint = inode; - if (resolve->fd) { + return 0; +} - fuse_resolve_fd (state); +int +fuse_resolve_fd_init(fuse_state_t *state, fuse_resolve_t *resolve, fd_t *fd) +{ + resolve->fd = fd_ref(fd); - } else if (!uuid_is_null (resolve->pargfid)) { + return 0; +} - fuse_resolve_entry (state); +static int +fuse_resolve(fuse_state_t *state) +{ + fuse_resolve_t *resolve = NULL; - } else if (!uuid_is_null (resolve->gfid)) { + resolve = state->resolve_now; - fuse_resolve_inode (state); + if (resolve->fd) { + fuse_resolve_fd(state); - } else { + } else if (!gf_uuid_is_null(resolve->pargfid)) { + fuse_resolve_parent(state); - resolve->op_ret = 0; - resolve->op_errno = EINVAL; + } else if (!gf_uuid_is_null(resolve->gfid)) { + fuse_resolve_inode(state); - fuse_resolve_all (state); - } + } else { + fuse_resolve_all(state); + } - return 0; + return 0; } - static int -fuse_resolve_done (fuse_state_t *state) +fuse_resolve_done(fuse_state_t *state) { - fuse_resume_fn_t fn = NULL; - - if (state->resolve.op_ret || state->resolve2.op_ret) { - send_fuse_err (state->this, state->finh, - state->resolve.op_errno); - free_fuse_state (state); - goto out; - } - fn = state->resume_fn; - if (fn) - fn (state); - -out: - return 0; + fuse_fop_resume(state); + return 0; } - /* * This function is called multiple times, once per resolving one location/fd. * state->resolve_now is used to decide which location/fd is to be resolved now */ static int -fuse_resolve_all (fuse_state_t *state) +fuse_resolve_all(fuse_state_t *state) { - if (state->resolve_now == NULL) { - - state->resolve_now = &state->resolve; - state->loc_now = &state->loc; - - fuse_resolve (state); + if (state->resolve_now == NULL) { + state->resolve_now = &state->resolve; + state->loc_now = &state->loc; - } else if (state->resolve_now == &state->resolve) { + fuse_resolve(state); - state->resolve_now = &state->resolve2; - state->loc_now = &state->loc2; + } else if (state->resolve_now == &state->resolve) { + state->resolve_now = &state->resolve2; + state->loc_now = &state->loc2; - fuse_resolve (state); + fuse_resolve(state); - } else if (state->resolve_now == &state->resolve2) { + } else if (state->resolve_now == &state->resolve2) { + fuse_resolve_done(state); - fuse_resolve_done (state); + } else { + gf_log("fuse-resolve", GF_LOG_ERROR, + "Invalid pointer for state->resolve_now"); + } - } else { - gf_log ("fuse-resolve", GF_LOG_ERROR, - "Invalid pointer for state->resolve_now"); - } - - return 0; + return 0; } - int -fuse_gfid_set (fuse_state_t *state) +fuse_resolve_continue(fuse_state_t *state) { - int ret = 0; - - if (uuid_is_null (state->gfid)) - goto out; + fuse_resolve_loc_touchup(state); - if (!state->dict) - state->dict = dict_new (); - - if (!state->dict) { - ret = -1; - goto out; - } + fuse_resolve_all(state); - ret = dict_set_static_bin (state->dict, "gfid-req", - state->gfid, sizeof (state->gfid)); -out: - return ret; + return 0; } - int -fuse_resolve_and_resume (fuse_state_t *state, fuse_resume_fn_t fn) +fuse_resolve_and_resume(fuse_state_t *state, fuse_resume_fn_t fn) { - xlator_t *inode_xl = NULL; - xlator_t *active_xl = NULL; - - fuse_gfid_set (state); - - state->resume_fn = fn; - - active_xl = fuse_active_subvol (state->this); - inode_xl = fuse_state_subvol (state); - if (!inode_xl && state->loc.parent) - inode_xl = state->loc.parent->table->xl; - - /* If inode or fd is already in new graph, goto resume */ - if (inode_xl == active_xl) { - /* Lets move to resume if there is no other inode to check */ - if (!(state->loc2.parent || state->loc2.inode)) - goto resume; - - inode_xl = NULL; - /* We have to make sure both inodes we are - working on are in same inode table */ - if (state->loc2.inode) - inode_xl = state->loc2.inode->table->xl; - if (!inode_xl && state->loc2.parent) - inode_xl = state->loc2.parent->table->xl; - - if (inode_xl == active_xl) - goto resume; - } - + fuse_gfid_set(state); - /* If the resolve is for 'fd' and its open with 'write' flag - set, don't switch to new graph yet */ + state->resume_fn = fn; - /* TODO: fix it later */ - /* if (state->fd && ((state->fd->flags & O_RDWR) || - (state->fd->flags & O_WRONLY))) - */ - if (state->fd) - goto resume; + fuse_resolve_all(state); - /* - if (state->fd) { - state->resolve.fd = state->fd; - state->fd = NULL; // TODO: we may need a 'fd_unref()' here, not very sure' - } - */ - - /* now we have to resolve the inode to 'itable' */ - state->itable = active_xl->itable; - - fuse_resolve_all (state); - - return 0; -resume: - fn (state); - - return 0; + return 0; } diff --git a/xlators/mount/fuse/utils/Makefile.am b/xlators/mount/fuse/utils/Makefile.am index c626e2769fe..fdad27ad103 100644 --- a/xlators/mount/fuse/utils/Makefile.am +++ b/xlators/mount/fuse/utils/Makefile.am @@ -1,10 +1,9 @@ utildir = @mountutildir@ -if GF_DARWIN_HOST_OS -util_SCRIPTS = mount_glusterfs -else +if GF_LINUX_HOST_OS util_SCRIPTS = mount.glusterfs +else +util_SCRIPTS = mount_glusterfs endif -CLEANFILES = - +CLEANFILES = diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in index daf8ba92b23..ac4d94cb743 100755 --- a/xlators/mount/fuse/utils/mount.glusterfs.in +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -1,20 +1,19 @@ -#!/bin/bash -# (C) 2006, 2007, 2008 Gluster Inc. <http://www.gluster.com> +#!/bin/sh # -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; either version 2 of -# the License, or (at your option) any later version. +# Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> +# Copyright (c) 2015 ungleich GmbH <http://www.ungleich.ch> # -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. +# This file is part of GlusterFS. # -# You should have received a copy of the GNU General Public -# License along with this program; if not, write to the Free -# Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -# Boston, MA 02110-1301 USA +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. + +warn () +{ + echo "$@" >&2 +} _init () { @@ -27,328 +26,812 @@ _init () LOG_DEBUG=DEBUG; LOG_TRACE=TRACE; + HOST_NAME_MAX=64; + prefix="@prefix@"; exec_prefix=@exec_prefix@; cmd_line=$(echo "@sbindir@/glusterfs"); + # check whether getfattr exists + export PATH + getfattr=$(command -v getfattr 2>/dev/null) + if [ $? -ne 0 ]; then + warn "WARNING: getfattr not found, certain checks will be skipped.." + fi + + mounttab=/proc/mounts + uname_s=`uname -s` + case ${uname_s} in + NetBSD) + getinode="stat -f %i" + getdev="stat -f %d" + lgetinode="${getinode} -L" + lgetdev="${getdev} -L" + ;; + Linux) + getinode="stat -c %i" + getdev="stat -c %d" + lgetinode="${getinode} -L" + lgetdev="${getdev} -L" + ;; + esac + UPDATEDBCONF=/etc/updatedb.conf } +is_valid_hostname () +{ + local server=$1 + + length=$(echo $server | wc -c) + if [ ${length} -gt ${HOST_NAME_MAX} ]; then + return 1 + fi +} + +parse_backup_volfile_servers () +{ + local server_list=$1 + local servers="" + local new_servers="" + + servers=$(echo ${server_list} | sed 's/\:/ /g') + for server in ${servers}; do + is_valid_hostname ${server} + if [ $? -eq 1 ]; then + continue + fi + new_servers=$(echo "${new_servers} ${server}") + done + + echo ${new_servers} +} + +parse_volfile_servers () +{ + local server_list=$1 + local servers="" + local new_servers="" + + servers=$(echo ${server_list} | sed 's/,/ /g') + for server in ${servers}; do + is_valid_hostname ${server} + if [ $? -eq 1 ]; then + continue + fi + new_servers=$(echo "${new_servers} ${server}") + done + + echo ${new_servers} +} + start_glusterfs () { if [ -n "$log_level_str" ]; then - case "$log_level_str" in - "ERROR") - log_level=$LOG_ERROR; - ;; + case "$( echo $log_level_str | awk '{print toupper($0)}')" in + "ERROR") + log_level=$LOG_ERROR; + ;; "INFO") - log_level=$LOG_INFO + log_level=$LOG_INFO; ;; - "DEBUG") - log_level=$LOG_DEBUG; - ;; - "CRITICAL") - log_level=$LOG_CRITICAL; - ;; - "WARNING") - log_level=$LOG_WARNING; - ;; - "TRACE") - log_level=$LOG_TRACE; - ;; - "NONE") - log_level=$LOG_NONE; - ;; - *) - echo "invalid log level $log_level_str, using INFO"; - log_level=$LOG_INFO; - ;; - esac - fi - if [ -n "$log_level" ]; then - cmd_line=$(echo "$cmd_line --log-level=$log_level"); + "DEBUG") + log_level=$LOG_DEBUG; + ;; + "CRITICAL") + log_level=$LOG_CRITICAL; + ;; + "WARNING") + log_level=$LOG_WARNING; + ;; + "TRACE") + log_level=$LOG_TRACE; + ;; + "NONE") + log_level=$LOG_NONE; + ;; + *) + warn "invalid log level $log_level_str, using INFO"; + log_level=$LOG_INFO; + ;; + esac fi + # options without values start here if [ -n "$read_only" ]; then - cmd_line=$(echo "$cmd_line --read-only"); + cmd_line=$(echo "$cmd_line --read-only"); fi if [ -n "$acl" ]; then - cmd_line=$(echo "$cmd_line --acl"); + cmd_line=$(echo "$cmd_line --acl"); + fi + + if [ -n "$selinux" ]; then + cmd_line=$(echo "$cmd_line --selinux"); + fi + + if [ -n "$enable_ino32" ]; then + cmd_line=$(echo "$cmd_line --enable-ino32"); fi if [ -n "$worm" ]; then cmd_line=$(echo "$cmd_line --worm"); fi - - if [ -n "$log_file" ]; then - cmd_line=$(echo "$cmd_line --log-file=$log_file"); + if [ -n "$volfile_max_fetch_attempts" ]; then + cmd_line=$(echo "$cmd_line --volfile-max-fetch-attempts=$volfile_max_fetch_attempts") fi if [ -n "$volfile_check" ]; then - cmd_line=$(echo "$cmd_line --volfile-check"); + cmd_line=$(echo "$cmd_line --volfile-check"); + fi + + if [ -n "$mem_accounting" ]; then + cmd_line=$(echo "$cmd_line --mem-accounting"); + fi + + if [ -n "$aux_gfid_mount" ]; then + cmd_line=$(echo "$cmd_line --aux-gfid-mount"); + fi + + if [ -n "$resolve_gids" ]; then + cmd_line=$(echo "$cmd_line --resolve-gids"); + fi + + if [ -n "$no_root_squash" ]; then + cmd_line=$(echo "$cmd_line --no-root-squash"); + fi + + if [ -n "$thin_client" ]; then + cmd_line=$(echo "$cmd_line --thin-client"); + fi + + if [ -n "$global_threading" ]; then + cmd_line=$(echo "$cmd_line --global-threading"); + fi + +#options with optional values start here + if [ -n "$fopen_keep_cache" ]; then + cmd_line=$(echo "$cmd_line --fopen-keep-cache=$fopen_keep_cache"); + fi + +#options with mandatory values start here + if [ -n "$halo_max_latency" ]; then + cmd_line=$(echo "$cmd_line --xlator-option \ + *replicate*.halo-max-latency=$halo_max_latency"); + fi + + if [ -n "$halo_max_replicas" ]; then + cmd_line=$(echo "$cmd_line --xlator-option \ + *replicate*.halo-max-replicas=$halo_max_replicas"); + fi + + if [ -n "$halo_min_replicas" ]; then + cmd_line=$(echo "$cmd_line --xlator-option \ + *replicate*.halo-min-replicas=$halo_min_replicas"); + fi + + if [ -n "$log_level" ]; then + cmd_line=$(echo "$cmd_line --log-level=$log_level"); + fi + + if [ -n "$log_file" ]; then + cmd_line=$(echo "$cmd_line --log-file=$log_file"); fi if [ -n "$direct_io_mode" ]; then - cmd_line=$(echo "$cmd_line --direct-io-mode=$direct_io_mode"); + cmd_line=$(echo "$cmd_line --direct-io-mode=$direct_io_mode"); + fi + + if [ -n "$use_readdirp" ]; then + cmd_line=$(echo "$cmd_line --use-readdirp=$use_readdirp"); + fi + + if [ -n "$event_history" ]; then + cmd_line=$(echo "$cmd_line --event-history=$event_history"); + fi + + if [ -n "$reader_thread_count" ]; then + cmd_line=$(echo "$cmd_line --reader-thread-count=$reader_thread_count"); + fi + + if [ -n "$fuse_auto_invalidation" ]; then + cmd_line=$(echo "$cmd_line --auto-invalidation=$fuse_auto_invalidation"); fi if [ -n "$volume_name" ]; then cmd_line=$(echo "$cmd_line --volume-name=$volume_name"); fi - if [ -n "$log_server" ]; then - if [ -n "$log_server_port" ]; then - cmd_line=$(echo "$cmd_line \ ---log-server=$log_server \ ---log-server-port=$log_server_port"); - fi + if [ -n "$attribute_timeout" ]; then + cmd_line=$(echo "$cmd_line --attribute-timeout=$attribute_timeout"); + fi + + if [ -n "$entry_timeout" ]; then + cmd_line=$(echo "$cmd_line --entry-timeout=$entry_timeout"); + fi + + if [ -n "$negative_timeout" ]; then + cmd_line=$(echo "$cmd_line --negative-timeout=$negative_timeout"); + fi + + if [ -n "$gid_timeout" ]; then + cmd_line=$(echo "$cmd_line --gid-timeout=$gid_timeout"); + fi + + if [ -n "$lru_limit" ]; then + cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit"); + fi + + if [ -n "$invalidate_limit" ]; then + cmd_line=$(echo "$cmd_line --invalidate-limit=$invalidate_limit"); + fi + + if [ -n "$bg_qlen" ]; then + cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen"); + fi + + if [ -n "$cong_threshold" ]; then + cmd_line=$(echo "$cmd_line --congestion-threshold=$cong_threshold"); + fi + + if [ -n "$oom_score_adj" ]; then + cmd_line=$(echo "$cmd_line --oom-score-adj=$oom_score_adj"); + fi + + if [ -n "$fuse_mountopts" ]; then + cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts"); + fi + + if [ -n "$xlator_option" ]; then + cmd_line=$(echo "$cmd_line --xlator-option=$xlator_option"); + fi + + if [ -n "$kernel_writeback_cache" ]; then + cmd_line=$(echo "$cmd_line --kernel-writeback-cache=$kernel_writeback_cache"); + fi + + if [ -n "$attr_times_granularity" ]; then + cmd_line=$(echo "$cmd_line --attr-times-granularity=$attr_times_granularity"); + fi + + if [ -n "$dump_fuse" ]; then + cmd_line=$(echo "$cmd_line --dump-fuse=$dump_fuse"); + fi + + if [ -n "$fuse_flush_handle_interrupt" ]; then + cmd_line=$(echo "$cmd_line --fuse-flush-handle-interrupt=$fuse_flush_handle_interrupt"); + fi + + if [ -n "$process_name" ]; then + cmd_line=$(echo "$cmd_line --process-name fuse.$process_name"); + else + cmd_line=$(echo "$cmd_line --process-name fuse"); fi + # if trasnport type is specified, we have to append it to + # volume name, so that it fetches the right client vol file + if [ -z "$volfile_loc" ]; then if [ -n "$server_ip" ]; then - if [ -n "$server_port" ]; then - cmd_line=$(echo "$cmd_line --volfile-server-port=$server_port"); + + servers=$(parse_volfile_servers ${server_ip}); + if [ -n "$servers" ]; then + for i in $(echo ${servers}); do + cmd_line=$(echo "$cmd_line --volfile-server=$i"); + done + else + warn "ERROR: No valid servers found on command line.. exiting" + print_usage + exit 1 fi - if [ -n "$transport" ]; then - cmd_line=$(echo "$cmd_line --volfile-server-transport=$transport"); + + if [ -n "$backupvolfile_server" ]; then + if [ -z "$backup_volfile_servers" ]; then + is_valid_hostname ${backupvolfile_server}; + if [ $? -eq 1 ]; then + warn "ERROR: Invalid backup server specified.. exiting" + exit 1 + fi + cmd_line=$(echo "$cmd_line --volfile-server=$backupvolfile_server"); + fi fi - if [ -n "$volume_id" ]; then - cmd_line=$(echo "$cmd_line --volfile-id=$volume_id"); + + if [ -n "$backup_volfile_servers" ]; then + backup_servers=$(parse_backup_volfile_servers ${backup_volfile_servers}) + for i in $(echo ${backup_servers}); do + cmd_line=$(echo "$cmd_line --volfile-server=$i"); + done fi - if [ -n "$backupvolfile_server" ]; then - cmd_line1=$(echo "$cmd_line --volfile-server=$backupvolfile_server"); + if [ -n "$server_port" ]; then + cmd_line=$(echo "$cmd_line --volfile-server-port=$server_port"); fi - if [ -n "$volfile_max_fetch_attempts" ]; then - cmd_line=$(echo "$cmd_line --volfile-max-fetch-attempts=$volfile_max_fetch_attempts"); + + if [ -n "$volume_id" ]; then + if [ -n "$transport" ]; then + volume_id="$volume_id.$transport"; + cmd_line=$(echo "$cmd_line --volfile-server-transport=$transport"); + fi + cmd_line=$(echo "$cmd_line --volfile-id=$volume_id"); fi - cmd_line=$(echo "$cmd_line --volfile-server=$server_ip"); fi else cmd_line=$(echo "$cmd_line --volfile=$volfile_loc"); fi - cmd_line=$(echo "$cmd_line $mount_point"); - err=0; - $cmd_line; + if [ -n "$fuse_mountopts" ]; then + cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts"); + fi + if [ -n "$subdir_mount" ]; then + cmd_line=$(echo "$cmd_line --subdir-mount=/$subdir_mount"); + fi - inode=$(stat -c %i $mount_point 2>/dev/null); + if [ -n "$fuse_dev_eperm_ratelimit_ns" ]; then + cmd_line=$(echo "$cmd_line --fuse-dev-eperm-ratelimit-ns=$fuse_dev_eperm_ratelimit_ns"); + fi - # this is required if the stat returns error - if [ -z "$inode" ]; then - inode="0"; - fi - - # retry the failover - # if [ $? != "0" ]; then # <--- TODO: Once glusterfs returns proper error code, change it. - if [ $inode -ne 1 ]; then - err=1; - if [ -n "$cmd_line1" ]; then - cmd_line1=$(echo "$cmd_line1 $mount_point"); - $cmd_line1 - - inode=$(stat -c %i $mount_point 2>/dev/null); - # this is required if the stat returns error - if [ -z "$inode" ]; then - inode="0"; - fi - if [ $inode -ne 1]; then - err=1; - fi - fi + cmd_line=$(echo "$cmd_line $mount_point"); + $cmd_line; + if [ $? -ne 0 ]; then + # If this is true, then glusterfs process returned error without + # getting daemonized. We have made sure the logs are posted to + # 'stderr', so no need to point them to logfile. + warn "Mounting glusterfs on $mount_point failed." + exit 1; fi - if [ $err -eq "1" ]; then - echo "Mount failed. Please check the log file for more details." - exit 1; + + inode=$( ${getinode} $mount_point 2>/dev/null); + # this is required if the stat returns error + if [ $? -ne 0 ]; then + # At this time, glusterfs got daemonized, and then later exited. + # These failures are only logged in log file. + warn "Mount failed. Check the log file ${log_file} for more details." + umount $mount_point > /dev/null 2>&1; + exit 1; fi } -usage () +print_usage () { - -echo "Usage: mount.glusterfs <volumeserver>:<volumeid/volumeport> -o <options> <mountpoint> +cat << EOF +Usage: $0 <server>:<volume/subdir> <mountpoint> -o<options> Options: -man 8 mount.glusterfs - -To display the version number of the mount helper: -mount.glusterfs --version" - +man 8 $(basename $0) +To display the version number of the mount helper: $0 -V +EOF } # check for recursive mounts. i.e, mounting over an existing brick check_recursive_mount () { - if [ $2 = "/" ]; then - echo Cannot mount over root; + if [ $1 = "/" ]; then + warn "Cannot mount over root"; exit 2; fi + # GFID check first # remove trailing / from mount point - mnt_dir=${2%/}; + mnt_dir=${1%/}; - export PATH; - # check whether getfattr exists - which getfattr > /dev/null; - if [ $? -ne 0 ]; then - return; + if [ -n "${getfattr}" ]; then + ${getfattr} -n trusted.gfid $mnt_dir 2>/dev/null | grep -iq "trusted.gfid="; + if [ $? -eq 0 ]; then + warn "ERROR: $mnt_dir is in use as a brick of a gluster volume"; + exit 2; + fi fi - getfattr -n trusted.gfid $mnt_dir 2>/dev/null | grep -iq "trusted.gfid="; - if [ $? -eq 0 ]; then - echo "ERROR: $mnt_dir is in use as a brick of a gluster volume"; - exit 2; + # check if the mount point is a brick's parent directory + GLUSTERD_WORKDIR="@GLUSTERD_WORKDIR@"; + + ls -L "${GLUSTERD_WORKDIR}"/vols/*/bricks/* > /dev/null 2>&1; + if [ $? -ne 0 ]; then + return; fi - # check if the mount point is a brick's parent directory - brick_path=(`cat /etc/glusterd/vols/*/bricks/* | grep ^path | cut -d "=" -f 2`); - root_inode=`stat -Lc %i /`; - root_dev=`stat -Lc %d /`; - mnt_inode=`stat -Lc %i $mnt_dir`; - mnt_dev=`stat -Lc %d $mnt_dir`; - for brick in "$brick_path"; - do + brick_path=`grep ^path "$GLUSTERD_WORKDIR"/vols/*/bricks/* 2>/dev/null | cut -d "=" -f 2`; + root_inode=`${lgetinode} /`; + root_dev=`${lgetdev} /`; + mnt_inode=`${lgetinode} $mnt_dir`; + mnt_dev=`${lgetdev} $mnt_dir`; + for brick in "$brick_path"; do # evaluate brick path to see if this is local, if non-local, skip iteration ls $brick > /dev/null 2>&1; if [ $? -ne 0 ]; then continue; fi - getfattr -n trusted.gfid "$brick" 2>/dev/null | grep -iq "trusted.gfid="; - if [ $? -ne 0 ]; then - continue; + + if [ -n "${getfattr}" ]; then + ${getfattr} -n trusted.gfid "$brick" 2>/dev/null | grep -iq "trusted.gfid="; + if [ $? -eq 0 ]; then + # brick is local + while [ 1 ]; do + tmp_brick="$brick"; + brick="$brick"/..; + brick_dev=`${lgetdev} $brick`; + brick_inode=`${lgetinode} $brick`; + if [ "$mnt_inode" -eq "$brick_inode" \ + -a "$mnt_dev" -eq "$brick_dev" ]; then + warn "ERROR: ${mnt_dir} is a parent of the brick ${tmp_brick}"; + exit 2; + fi + [ "$root_inode" -ne "$brick_inode" \ + -o "$root_dev" -ne "$brick_dev" ] || break; + done; + else + continue; + fi else - # brick is local - while [ 1 ]; - do - tmp_brick="$brick"; - brick="$brick"/..; - brick_dev=`stat -Lc %d $brick`; - brick_inode=`stat -Lc %i $brick`; - if [ "$mnt_inode" -eq "$brick_inode" -a "$mnt_dev" -eq "$brick_dev" ]; then - echo ERROR: $mnt_dir is a parent of the brick $tmp_brick; - exit 2; - fi - [ "$root_inode" -ne "$brick_inode" -o "$root_dev" -ne "$brick_dev" ] || break; - done; + continue; fi done; } -main () +with_options() { - helper=$(echo "$@" | sed -n 's/.*\--[ ]*\([^ ]*\).*/\1/p'); + local key=$1 + local value=$2 + + # Handle options with values. + case "$key" in + "log-level") + log_level_str=$value + ;; + "log-file") + log_file=$value + ;; + "transport") + transport=$value + ;; + "direct-io-mode") + direct_io_mode=$value + ;; + "volume-name") + volume_name=$value + ;; + "volume-id") + volume_id=$value + ;; + "subdir-mount") + subdir_mount=$value + ;; + "volfile-check") + volfile_check=$value + ;; + "server-port") + server_port=$value + ;; + "attribute-timeout") + attribute_timeout=$value + ;; + "entry-timeout") + entry_timeout=$value + ;; + "negative-timeout") + negative_timeout=$value + ;; + "gid-timeout") + gid_timeout=$value + ;; + "lru-limit") + lru_limit=$value + ;; + "invalidate-limit") + invalidate_limit=$value + ;; + "background-qlen") + bg_qlen=$value + ;; + "backup-volfile-servers") + backup_volfile_servers=$value + ;; + "backupvolfile-server") + backupvolfile_server=$value + ;; + "fetch-attempts") + volfile_max_fetch_attempts=$value + ;; + "congestion-threshold") + cong_threshold=$value + ;; + "oom-score-adj") + oom_score_adj=$value + ;; + "xlator-option") + xlator_option=$value + ;; + "fuse-mountopts") + fuse_mountopts=$value + ;; + "use-readdirp") + use_readdirp=$value + ;; + "event-history") + event_history=$value + ;; + "reader-thread-count") + reader_thread_count=$value + ;; + "auto-invalidation") + fuse_auto_invalidation=$value + ;; + "no-root-squash") + if [ $value = "yes" ] || + [ $value = "on" ] || + [ $value = "enable" ] || + [ $value = "true" ] ; then + no_root_squash=1; + fi ;; + "root-squash") + if [ $value = "no" ] || + [ $value = "off" ] || + [ $value = "disable" ] || + [ $value = "false" ] ; then + no_root_squash=1; + fi ;; + "kernel-writeback-cache") + kernel_writeback_cache=$value + ;; + "attr-times-granularity") + attr_times_granularity=$value + ;; + "dump-fuse") + dump_fuse=$value + ;; + "fuse-flush-handle-interrupt") + fuse_flush_handle_interrupt=$value + ;; + "fuse-dev-eperm-ratelimit-ns") + fuse_dev_eperm_ratelimit_ns=$value + ;; + "context"|"fscontext"|"defcontext"|"rootcontext") + # standard SElinux mount options to pass to the kernel + [ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts," + fuse_mountopts="${fuse_mountopts}$key=\"$value\"" + ;; + "halo-max-latency") + halo_max_latency=$value + ;; + "halo-max-replicas") + halo_max_replicas=$value + ;; + "halo-min-replicas") + halo_min_replicas=$value + ;; + "process-name") + process_name=$value + ;; + # Values that are optional + "fopen-keep-cache") + fopen_keep_cache="=$value" + ;; + x-*) + # comments or userspace application-specific options, drop them + ;; + *) + warn "Invalid option: $key" + exit 1 + ;; + esac +} - options=$(echo "$@" | sed -n 's/.*\-o[ ]*\([^ ]*\).*/\1/p'); +without_options() +{ + local option=$1 + # Handle options without values. + case "$option" in + "ro") + read_only=1 + ;; + "acl") + acl=1 + ;; + "selinux") + selinux=1 + ;; + "worm") + worm=1 + ;; + "enable-ino32") + enable_ino32=1 + ;; + "mem-accounting") + mem_accounting=1 + ;; + "aux-gfid-mount") + if [ ${uname_s} = "Linux" ]; then + aux_gfid_mount=1 + fi + ;; + "thin-client") + thin_client=1 + ;; + "resolve-gids") + resolve_gids=1 + ;; + # "mount -t glusterfs" sends this, but it's useless. + "rw") + ;; + "global-threading") + global_threading=1 + ;; + # TODO: not sure how to handle this yet + "async"|"sync"|"dirsync"|\ + "mand"|"nomand"|\ + "silent"|"loud"|\ + "iversion"|"noiversion"|\ + "nofail") + warn "mount option '${option}' is not handled (yet?)" + ;; + # standard mount options to pass to the kernel + "atime"|"noatime"|"diratime"|"nodiratime"|\ + "relatime"|"norelatime"|\ + "strictatime"|"nostrictatime"|"lazyatime"|"nolazyatime"|\ + "dev"|"nodev"|"exec"|"noexec"|"suid"|"nosuid"|"auto_unmount") + [ -z "$fuse_mountopts" ] || fuse_mountopts="$fuse_mountopts," + fuse_mountopts="${fuse_mountopts}${option}" + ;; + # these ones are interpreted during system initialization + "auto"|"noauto") + ;; + "_netdev") + ;; + # Values that are optional + "fopen-keep-cache") + fopen_keep_cache="true" + ;; + x-*) + # comments or userspace application-specific options, drop them + ;; + *) + warn "Invalid option $option"; + exit 1 + ;; + esac +} - new_log_level=$(echo "$options" | sed -n 's/.*log-level=\([^,]*\).*/\1/p'); +parse_options() +{ + local optarg=${1} + for pair in $(echo ${optarg}|sed 's/,/ /g'); do + key=$(echo "$pair" | cut -f1 -d'='); + value=$(echo "$pair" | cut -f2- -d'='); + if [ "$key" = "$value" ]; then + without_options $pair; + else + with_options $key $value; + fi + done +} - [ -n "$new_log_level" ] && { - log_level_str="$new_log_level"; +update_updatedb() +{ + # Append fuse.glusterfs to PRUNEFS variable in updatedb.conf(5). + # updatedb(8) should not index files under GlusterFS, indexing + # GlusterFS is not necessary and should be avoided. + # Following code disables updatedb crawl on 'glusterfs' + test -f $UPDATEDBCONF && { + if ! grep -q 'glusterfs' $UPDATEDBCONF; then + sed 's/\(PRUNEFS.*\)"/\1 fuse.glusterfs"/' $UPDATEDBCONF \ + > ${UPDATEDBCONF}.bak + mv -f ${UPDATEDBCONF}.bak $UPDATEDBCONF + fi } +} - log_file=$(echo "$options" | sed -n 's/.*log-file=\([^,]*\).*/\1/p'); - - read_only=$(echo "$options" | sed -n 's/.*\(ro\)[^,]*.*/\1/p'); - - acl=$(echo "$options" | sed -n 's/.*\(acl\)[^,]*.*/\1/p'); - - worm=$(echo "$options" | sed -n 's/.*\(worm\)[^,]*.*/\1/p'); - - transport=$(echo "$options" | sed -n 's/.*transport=\([^,]*\).*/\1/p'); - - direct_io_mode=$(echo "$options" | sed -n 's/.*direct-io-mode=\([^,]*\).*/\1/p'); - - volume_name=$(echo "$options" | sed -n 's/.*volume-name=\([^,]*\).*/\1/p'); - - volume_id=$(echo "$options" | sed -n 's/.*volume_id=\([^,]*\).*/\1/p'); - - volfile_check=$(echo "$options" | sed -n 's/.*volfile-check=\([^,]*\).*/\1/p'); - - volfile_max_fetch_attempts=$(echo "$options" | sed -n 's/.*fetch-attempts=\([^,]*\).*/\1/p'); - - server_port=$(echo "$options" | sed -n 's/.*server-port=\([^,]*\).*/\1/p'); - - backupvolfile_server=$(echo "$options" | sed -n 's/.*backupvolfile-server=\([^,]*\).*/\1/p'); - - log_server=$(echo "$options" | sed -n 's/.*log-server=\([^,]*\).*/\1/p'); +main () +{ + if [ "x${uname_s}" = "xLinux" -a $# -ge 2 ] ; then + volfile_loc=$1 + mount_point=$2 - log_server_port=$(echo "$options" | sed -n 's/.*log-server-port=\([^,]*\).*/\1/p'); + ## `mount` specifies options as a last argument + shift 2; + fi + while getopts "Vo:hns" opt; do + case "${opt}" in + o) + parse_options ${OPTARG}; + shift 2; + ;; + n) + ;; + s) + # accept+ignore sloppy mount, passed by autofs + ;; + V) + ${cmd_line} -V; + exit 0; + ;; + h) + print_usage; + exit 0; + ;; + ?) + print_usage; + exit 0; + ;; + esac + done - volfile_loc="$1"; + if [ "x${uname_s}" = "xNetBSD" ] ; then + volfile_loc=$1 + mount_point=$2 + fi [ -r "$volfile_loc" ] || { - server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:.\-]*\):.*/\1/p'); - test_str=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p'); - [ -n "$test_str" ] && { - volume_id="$test_str"; + # '%' included to support ipv6 link local addresses + server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:%,.\-]*\):.*/\1/p'); + volume_str=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p'); + [ -n "$volume_str" ] && { + volume_id=$volume_str + volume_str_temp=$volume_str + first_char=$(echo "$volume_str" | cut -c 1) + [ ${first_char} = '/' ] && { + volume_str_temp=$(echo "$volume_str" | cut -c 2-) + } + volume_id_temp=$(echo "$volume_str_temp" | cut -f1 -d '/'); + [ $(echo $volume_str_temp | grep -c "/") -eq 1 ] && + [ "$volume_id_temp" != "snaps" ] && { + volume_id=$volume_id_temp; + [ ${first_char} = '/' ] && volume_id=/$volume_id; + subdir_mount=$(echo "$volume_str_temp" | cut -f2- -d '/'); + } + } + volfile_loc=""; + [ -z "$volume_id" -o -z "$server_ip" ] && { + cat <<EOF >&2 +ERROR: Server name/volume name unspecified cannot proceed further.. +Please specify correct format +Usage: +man 8 $0 +EOF + exit 1; } - volfile_loc=""; } - new_fs_options=$(echo "$options" | sed -e 's/[,]*log-file=[^,]*//' \ - -e 's/[,]*log-level=[^,]*//' \ - -e 's/[,]*volume-name=[^,]*//' \ - -e 's/[,]*direct-io-mode=[^,]*//' \ - -e 's/[,]*volfile-check=[^,]*//' \ - -e 's/[,]*transport=[^,]*//' \ - -e 's/[,]*backupvolfile-server=[^,]*//' \ - -e 's/[,]*server-port=[^,]*//' \ - -e 's/[,]*volume-id=[^,]*//' \ - -e 's/[,]*fetch-attempts=[^,]*//' \ - -e 's/[,]*log-server=[^,]*//' \ - -e 's/[,]*ro[^,]*//' \ - -e 's/[,]*acl[^,]*//' \ - -e 's/[,]*worm[^,]*//' \ - -e 's/[,]*log-server-port=[^,]*//'); - - # - [ -n "$helper" ] && { - cmd_line=$(echo "$cmd_line --$helper"); - exec $cmd_line; - exit 0; + grep_ret=$(echo ${mount_point} | grep '^\-o'); + [ "x" != "x${grep_ret}" ] && { + cat <<EOF >&2 +ERROR: -o options cannot be specified in either first two arguments.. +Please specify correct style +Usage: +man 8 $0 +EOF + exit 1; } - mount_point="" - for arg in "$@"; do - [ -d "$arg" ] && { - mount_point=$arg - } - done - # No need to do a ! -d test, it is taken care while initializing the # variable mount_point - [ -z "$mount_point" ] && { - echo "ERROR: Mount point does not exist." - usage; - exit 0; + [ -z "$mount_point" -o ! -d "$mount_point" ] && { + cat <<EOF >&2 +ERROR: Mount point does not exist +Please specify a mount point +Usage: +man 8 $0 +EOF + exit 1; } # Simple check to avoid multiple identical mounts - if grep -q " $mount_point fuse" /etc/mtab; then - echo -n "$0: according to mtab, GlusterFS is already mounted on " - echo "$mount_point" - exit 0; + if grep -q "[[:space:]+]${mount_point}[[:space:]+]fuse.glusterfs" $mounttab; then + warn "$0: according to mtab, GlusterFS is already mounted on" \ + "$mount_point" + exit 32; fi - check_recursive_mount "$@"; + #Snapshot volumes are mounted read only + case $volume_id in + /snaps/* ) read_only=1 + esac - fs_options=$(echo "$fs_options,$new_fs_options"); + check_recursive_mount "$mount_point"; - # Append fuse.glusterfs to PRUNEFS variable in updatedb.conf(5). updatedb(8) - # should not index files under GlusterFS, indexing will slow down GlusteFS - # if the filesystem is several TB in size. - test -f $UPDATEDBCONF && { - if ! grep -q 'glusterfs' $UPDATEDBCONF; then - sed 's/\(PRUNEFS.*\)"/\1 fuse.glusterfs"/' $UPDATEDBCONF \ - > ${UPDATEDBCONF}.bak - mv -f ${UPDATEDBCONF}.bak $UPDATEDBCONF - fi - } + update_updatedb; start_glusterfs; } _init "$@" && main "$@"; - diff --git a/xlators/mount/fuse/utils/mount_glusterfs.in b/xlators/mount/fuse/utils/mount_glusterfs.in index 3d4d286201e..3a5feb606d7 100755 --- a/xlators/mount/fuse/utils/mount_glusterfs.in +++ b/xlators/mount/fuse/utils/mount_glusterfs.in @@ -1,187 +1,554 @@ #!/bin/sh -# (C) 2008 Gluster Inc. <http://www.gluster.com> -# +# (C) 2014 Red Hat Inc. <http://www.redhat.com> +# (C) 2015 ungleich GmbH <http://www.ungleich.ch> +# # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License as # published by the Free Software Foundation; either version 2 of # the License, or (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free # Software Foundation, Inc., 51 Franklin Street, Fifth Floor, # Boston, MA 02110-1301 USA +warn () +{ + echo "$@" >&2 +} _init () { + # log level definitions LOG_NONE=NONE; LOG_CRITICAL=CRITICAL; LOG_ERROR=ERROR; LOG_WARNING=WARNING; - LOG_INFO=INFO; + LOG_INFO=INFO LOG_DEBUG=DEBUG; + LOG_TRACE=TRACE; - # set default log level to ERROR - log_level=$LOG_INFO; -} + HOST_NAME_MAX=64; -start_glusterfs () -{ prefix="@prefix@"; exec_prefix=@exec_prefix@; cmd_line=$(echo "@sbindir@/glusterfs"); - + + alias lsL='ls -L' + uname_s=`uname -s` + case ${uname_s} in + Darwin) + getinode="stat -f %i" + getdev="stat -f %d" + ;; + esac +} + +is_valid_hostname () +{ + local server=$1 + + length=$(echo $server | wc -c) + if [ ${length} -gt ${HOST_NAME_MAX} ]; then + return 1 + fi +} + +parse_backup_volfile_servers () +{ + local server_list=$1 + local servers="" + local new_servers="" + + servers=$(echo ${server_list} | sed 's/\:/ /g') + for server in ${servers}; do + is_valid_hostname ${server} + if [ $? -eq 1 ]; then + continue + fi + new_servers=$(echo "${new_servers} ${server}") + done + + echo ${new_servers} +} + +parse_volfile_servers () +{ + local server_list=$1 + local servers="" + local new_servers="" + + servers=$(echo ${server_list} | sed 's/,/ /g') + for server in ${servers}; do + is_valid_hostname ${server} + if [ $? -eq 1 ]; then + continue + fi + new_servers=$(echo "${new_servers} ${server}") + done + + echo ${new_servers} +} + +start_glusterfs () +{ if [ -n "$log_level_str" ]; then - case "$log_level_str" in - "ERROR") - log_level=$LOG_ERROR; - ;; + case "$( echo $log_level_str | awk '{print toupper($0)}')" in + "ERROR") + log_level=$LOG_ERROR; + ;; "INFO") log_level=$LOG_INFO; ;; - "DEBUG") - log_level=$LOG_DEBUG; - ;; - "CRITICAL") - log_level=$LOG_CRITICAL; - ;; - "WARNING") - log_level=$LOG_WARNING; - ;; - "NONE") - log_level=$LOG_NONE; - ;; - *) - echo "invalid log level $log_level_str, using INFO"; - log_level=$LOG_INFO; - ;; - esac - fi - cmd_line=$(echo "$cmd_line --log-level=$log_level"); - - if [ -n "$log_file" ]; then - cmd_line=$(echo "$cmd_line --log-file=$log_file"); + "DEBUG") + log_level=$LOG_DEBUG; + ;; + "CRITICAL") + log_level=$LOG_CRITICAL; + ;; + "WARNING") + log_level=$LOG_WARNING; + ;; + "TRACE") + log_level=$LOG_TRACE; + ;; + "NONE") + log_level=$LOG_NONE; + ;; + *) + warn "invalid log level $log_level_str, using INFO"; + log_level=$LOG_INFO; + ;; + esac + fi + + # options without values start here + if [ -n "$read_only" ]; then + cmd_line=$(echo "$cmd_line --read-only"); + fi + + if [ -n "$acl" ]; then + cmd_line=$(echo "$cmd_line --acl"); + fi + + if [ -n "$selinux" ]; then + cmd_line=$(echo "$cmd_line --selinux"); + fi + + if [ -n "$enable_ino32" ]; then + cmd_line=$(echo "$cmd_line --enable-ino32"); + fi + + if [ -n "$worm" ]; then + cmd_line=$(echo "$cmd_line --worm"); + fi + if [ -n "$volfile_max_fetch_attempts" ]; then + cmd_line=$(echo "$cmd_line --volfile-max-fetch-attempts=$volfile_max_fetch_attempts") + fi + + if [ -n "$fopen_keep_cache" ]; then + cmd_line=$(echo "$cmd_line --fopen-keep-cache"); fi if [ -n "$volfile_check" ]; then - cmd_line=$(echo "$cmd_line --volfile-check"); + cmd_line=$(echo "$cmd_line --volfile-check"); + fi + + if [ -n "$mem_accounting" ]; then + cmd_line=$(echo "$cmd_line --mem-accounting"); + fi + + if [ -n "$aux_gfid_mount" ]; then + cmd_line=$(echo "$cmd_line --aux-gfid-mount"); + fi + + if [ -n "$no_root_squash" ]; then + cmd_line=$(echo "$cmd_line --no-root-squash"); + fi + + if [ -n "$capability" ]; then + cmd_line=$(echo "$cmd_line --capability"); + fi + +#options with values start here + if [ -n "$log_level" ]; then + cmd_line=$(echo "$cmd_line --log-level=$log_level"); + fi + + if [ -n "$log_file" ]; then + cmd_line=$(echo "$cmd_line --log-file=$log_file"); fi if [ -n "$direct_io_mode" ]; then - cmd_line=$(echo "$cmd_line --disable-direct-io-mode"); + cmd_line=$(echo "$cmd_line --direct-io-mode=$direct_io_mode"); fi - - if [ -z "$volfile_loc" ]; then - if [ -n "$transport" ]; then - cmd_line=$(echo "$cmd_line \ ---volfile-server=$server_ip \ ---volfile-server-transport=$transport"); - else - cmd_line=$(echo "$cmd_line \ ---volfile-server=$server_ip \ - fi - else - cmd_line=$(echo "$cmd_line --volfile=$volfile_loc"); + + if [ -n "$mac_compat" ]; then + cmd_line=$(echo "$cmd_line --mac-compat=$mac_compat"); + fi + + if [ -n "$use_readdirp" ]; then + cmd_line=$(echo "$cmd_line --use-readdirp=$use_readdirp"); fi if [ -n "$volume_name" ]; then cmd_line=$(echo "$cmd_line --volume-name=$volume_name"); fi - - if [ -n "$volume_id" ]; then - cmd_line=$(echo "$cmd_line --volfile-id=$volume_id"); + + if [ -n "$attribute_timeout" ]; then + cmd_line=$(echo "$cmd_line --attribute-timeout=$attribute_timeout"); + fi + + if [ -n "$entry_timeout" ]; then + cmd_line=$(echo "$cmd_line --entry-timeout=$entry_timeout"); + fi + + if [ -n "$negative_timeout" ]; then + cmd_line=$(echo "$cmd_line --negative-timeout=$negative_timeout"); + fi + + if [ -n "$gid_timeout" ]; then + cmd_line=$(echo "$cmd_line --gid-timeout=$gid_timeout"); + fi + + if [ -n "$bg_qlen" ]; then + cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen"); + fi + + if [ -n "$cong_threshold" ]; then + cmd_line=$(echo "$cmd_line --congestion-threshold=$cong_threshold"); + fi + + if [ -n "$fuse_mountopts" ]; then + cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts"); + fi + + if [ -n "$xlator_option" ]; then + cmd_line=$(echo "$cmd_line --xlator-option=$xlator_option"); + fi + + if [ -n "$process_name" ]; then + cmd_line=$(echo "$cmd_line --process-name fuse.$process_name"); + else + cmd_line=$(echo "$cmd_line --process-name fuse"); + fi + + if [ -z "$volfile_loc" ]; then + if [ -n "$server_ip" ]; then + + servers=$(parse_volfile_servers ${server_ip}); + if [ -n "$servers" ]; then + for i in $(echo ${servers}); do + cmd_line=$(echo "$cmd_line --volfile-server=$i"); + done + else + warn "ERROR: No valid servers found on command line.. exiting" + print_usage + exit 1 + fi + + if [ -n "$backupvolfile_server" ]; then + if [ -z "$backup_volfile_servers" ]; then + is_valid_hostname ${backupvolfile_server}; + if [ $? -eq 1 ]; then + warn "ERROR: Invalid backup server specified.. exiting" + exit 1 + fi + cmd_line=$(echo "$cmd_line --volfile-server=$backupvolfile_server"); + fi + fi + + if [ -n "$backup_volfile_servers" ]; then + backup_servers=$(parse_backup_volfile_servers ${backup_volfile_servers}) + for i in $(echo ${backup_servers}); do + cmd_line=$(echo "$cmd_line --volfile-server=$i"); + done + fi + + if [ -n "$server_port" ]; then + cmd_line=$(echo "$cmd_line --volfile-server-port=$server_port"); + fi + + if [ -n "$transport" ]; then + cmd_line=$(echo "$cmd_line --volfile-server-transport=$transport"); + fi + + if [ -n "$volume_id" ]; then + cmd_line=$(echo "$cmd_line --volfile-id=$volume_id"); + fi + fi + else + cmd_line=$(echo "$cmd_line --volfile=$volfile_loc"); + fi + + if [ -n "$fuse_mountopts" ]; then + cmd_line=$(echo "$cmd_line --fuse-mountopts=$fuse_mountopts"); fi cmd_line=$(echo "$cmd_line $mount_point"); - exec $cmd_line; + $cmd_line; + + if [ $? -ne 0 ]; then + exit 1; + fi +} + +print_usage () +{ +cat << EOF >&2 +Usage: $0 <volumeserver>:<volumeid/volumeport> -o<options> <mountpoint> +Options: +man 8 $0 +To display the version number of the mount helper: $0 -V +EOF +} + +with_options() +{ + local key=$1 + local value=$2 + + # Handle options with values. + case "$key" in + "log-level") + log_level_str=$value + ;; + "log-file") + log_file=$value + ;; + "transport") + transport=$value + ;; + "direct-io-mode") + direct_io_mode=$value + ;; + "mac-compat") + mac_compat=$value + ;; + "volume-name") + volume_name=$value + ;; + "volume-id") + volume_id=$value + ;; + "volfile-check") + volfile_check=$value + ;; + "server-port") + server_port=$value + ;; + "attribute-timeout") + attribute_timeout=$value + ;; + "entry-timeout") + entry_timeout=$value + ;; + "negative-timeout") + negative_timeout=$value + ;; + "gid-timeout") + gid_timeout=$value + ;; + "background-qlen") + bg_qlen=$value + ;; + "backup-volfile-servers") + backup_volfile_servers=$value + ;; + "backupvolfile-server") + backupvolfile_server=$value + ;; + "fetch-attempts") + volfile_max_fetch_attempts=$value + ;; + "congestion-threshold") + cong_threshold=$value + ;; + "xlator-option") + xlator_option=$value + ;; + "fuse-mountopts") + fuse_mountopts=$value + ;; + "use-readdirp") + use_readdirp=$value + ;; + "no-root-squash") + if [ $value = "yes" ] || + [ $value = "on" ] || + [ $value = "enable" ] || + [ $value = "true" ] ; then + no_root_squash=1; + fi ;; + "root-squash") + if [ $value = "no" ] || + [ $value = "off" ] || + [ $value = "disable" ] || + [ $value = "false" ] ; then + no_root_squash=1; + fi ;; + "process-name") + process_name=$value + ;; + *) + warn "Invalid option: $key" + exit 1 + ;; + esac } +without_options() +{ + local option=$1 + # Handle options without values. + case "$option" in + "ro") + read_only=1 + ;; + "acl") + acl=1 + ;; + "selinux") + selinux=1 + ;; + "worm") + worm=1 + ;; + "fopen-keep-cache") + fopen_keep_cache=1 + ;; + "enable-ino32") + enable_ino32=1 + ;; + "mem-accounting") + mem_accounting=1 + ;; + "aux-gfid-mount") + if [ ${uname_s} = "Linux" ]; then + aux_gfid_mount=1 + fi + ;; + # "mount -t glusterfs" sends this, but it's useless. + "rw") + ;; + # these ones are interpreted during system initialization + "noauto") + ;; + "_netdev") + ;; + "capability") + capability=1 + ;; + *) + warn "Invalid option $option"; + exit 1 + ;; + esac +} + +parse_options() +{ + local optarg=${1} + for pair in $(echo $optarg | sed 's/,/ /g'); do + key=$(echo "$pair" | cut -f1 -d'='); + value=$(echo "$pair" | cut -f2- -d'='); + if [ "$key" = "$value" ]; then + without_options $pair; + else + with_options $key $value; + fi + done +} main () { - - new_log_level="" - log_file="" - transport="" - direct_io_mode="" - volume_name="" - new_fs_options="" - volfile_check="" - - while getopts o: opt; do - case "$opt" in - o) - options=$(echo $OPTARG | sed -n 's/.*\-o[ ]*\([^ ]*\).*/\1/p'); - [ -z $new_log_level ] && { - new_log_level=$(echo "$options" | sed -n 's/.*log-level=\([^,]*\).*/\1/p'); - } - - [ -z $log_file ] && { - log_file=$(echo "$options" | sed -n 's/.*log-file=\([^,]*\).*/\1/p'); - } - - [ -z $transport ] && { - transport=$(echo "$options" | sed -n 's/.*transport=\([^,]*\).*/\1/p'); - } - - [ -z $direct_io_mode ] && { - direct_io_mode=$(echo "$options" | sed -n 's/.*direct-io-mode=\([^,]*\).*/\1/p'); - } - - [ -z $volfile_check ] && { - volfile_check=$(echo "$options" | sed -n 's/.*volfile-check=\([^,]*\).*/\1/p'); - } - - [ -z $volume_name ] && { - volume_name=$(echo "$options" | sed -n 's/.*volume-name=\([^,]*\).*/\1/p'); - } - - [ -z $volume_id ] && { - volume_id=$(echo "$options" | sed -n 's/.*volume-id=\([^,]*\).*/\1/p'); - } - - this_option=$(echo "$options" | sed -e 's/[,]*log-file=[^,]*//' \ - -e 's/[,]*log-level=[^,]*//' \ - -e 's/[,]*volume-name=[^,]*//' \ - -e 's/[,]*volfile-check=[^,]*//' \ - -e 's/[,]*direct-io-mode=[^,]*//' \ - -e 's/[,]*transport=[^,]*//' \ - -e 's/[,]*volume-id=[^,]*//'); - new_fs_options="$new_fs_options $this_option"; - ;; - esac +#if !defined(__FreeBSD__) + ## `mount` on OSX specifies options as first argument + echo $1|grep -q -- "-o" + if [ $? -eq 0 ]; then + volfile_loc=$3 + mount_point=$4 + else + volfile_loc=$1 + mount_point=$2 + fi +#endif /* __FreeBSD__ */ + while getopts "Vo:h" opt; do + case "${opt}" in + o) + parse_options ${OPTARG}; + ;; + V) + ${cmd_line} -V; + exit 0; + ;; + h) + print_usage; + exit 0; + ;; + ?) + print_usage; + exit 0; + ;; + esac done - [ -n "$new_log_level" ] && { - log_level_str="$new_log_level"; - } +#ifdef __FreeBSD__ + shift $((OPTIND - 1)) + volfile_loc="$1" + mount_point="$2" +#endif /* __FreeBSD__ */ - # TODO: use getopt. This is very much darwin specific - volfile_loc="$1"; - while [ "$volfile_loc" = "-o" ] ; do - shift ; - shift ; - volfile_loc="$1"; - done - [ -r "$volfile_loc" ] || { - server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:.\-]*\):.*/\1/p'); - volfile_loc=""; + # '%' included to support ipv6 link local addresses + server_ip=$(echo "$volfile_loc" | sed -n 's/\([a-zA-Z0-9:%.\-]*\):.*/\1/p'); + volume_str=$(echo "$volfile_loc" | sed -n 's/.*:\([^ ]*\).*/\1/p'); + [ -n "$volume_str" ] && { + volume_id="$volume_str"; + } + volfile_loc=""; + } + + [ -z "$volume_id" -o -z "$server_ip" ] && { + cat <<EOF >&2 +ERROR: Server name/volume name unspecified cannot proceed further.. +Please specify correct format +Usage: +man 8 $0 +EOF + exit 1; + } + + grep_ret=$(echo ${mount_point} | grep '^\-o'); + [ "x" != "x${grep_ret}" ] && { + cat <<EOF >&2 +ERROR: -o options cannot be specified in either first two arguments.. +Please specify correct style +Usage: +man 8 $0 +EOF + exit 1; + } + + # No need to do a ! -d test, it is taken care while initializing the + # variable mount_point + [ -z "$mount_point" -o ! -d "$mount_point" ] && { + cat <<EOF >&2 +ERROR: Mount point does not exist +Please specify a mount point +Usage: +man 8 $0 +EOF + exit 1; } - # following line is product of love towards sed - # $2=$(echo "$@" | sed -n 's/[^ ]* \([^ ]*\).*/\1/p'); - - mount_point="$2"; - fs_options=$(echo "$fs_options,$new_fs_options"); - start_glusterfs; } |
