diff options
Diffstat (limited to 'xlators/storage/posix/src/posix.c')
| -rw-r--r-- | xlators/storage/posix/src/posix.c | 1897 |
1 files changed, 1413 insertions, 484 deletions
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 2df5702cb..fb45c7a67 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" @@ -24,6 +14,7 @@ #define __XOPEN_SOURCE 500 +#include <openssl/md5.h> #include <stdint.h> #include <sys/time.h> #include <sys/resource.h> @@ -32,6 +23,8 @@ #include <pthread.h> #include <ftw.h> #include <sys/stat.h> +#include <signal.h> +#include <sys/uio.h> #ifndef GF_BSD_HOST_OS #include <alloca.h> @@ -42,7 +35,6 @@ #endif /* HAVE_LINKAT */ #include "glusterfs.h" -#include "md5.h" #include "checksum.h" #include "dict.h" #include "logging.h" @@ -59,7 +51,11 @@ #include "timer.h" #include "glusterfs3-xdr.h" #include "hashfn.h" +#include "posix-aio.h" +#include "glusterfs-acl.h" +extern char *marker_xattrs[]; +#define ALIGN_SIZE 4096 #undef HAVE_SET_FSID #ifdef HAVE_SET_FSID @@ -98,7 +94,7 @@ posix_forget (xlator_t *this, inode_t *inode) int32_t posix_lookup (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr_req) + loc_t *loc, dict_t *xdata) { struct iatt buf = {0, }; int32_t op_ret = -1; @@ -108,11 +104,11 @@ posix_lookup (call_frame_t *frame, xlator_t *this, char * real_path = NULL; char * par_path = NULL; struct iatt postparent = {0,}; + int32_t gfidless = 0; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (loc->path, out); /* The Hidden directory should be for housekeeping purpose and it should not get any gfid on it */ @@ -126,14 +122,19 @@ posix_lookup (call_frame_t *frame, xlator_t *this, goto out; } + op_ret = dict_get_int32 (xdata, GF_GFIDLESS_LOOKUP, &gfidless); + op_ret = -1; if (uuid_is_null (loc->pargfid)) { /* nameless lookup */ MAKE_INODE_HANDLE (real_path, this, loc, &buf); } else { MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &buf); - if (uuid_is_null (loc->inode->gfid)) - posix_gfid_set (this, real_path, loc, xattr_req); + if (uuid_is_null (loc->inode->gfid)) { + posix_gfid_heal (this, real_path, loc, xdata); + MAKE_ENTRY_HANDLE (real_path, par_path, this, + loc, &buf); + } } op_errno = errno; @@ -149,9 +150,9 @@ posix_lookup (call_frame_t *frame, xlator_t *this, goto parent; } - if (xattr_req && (op_ret == 0)) { + if (xdata && (op_ret == 0)) { xattr = posix_lookup_xattr_fill (this, real_path, loc, - xattr_req, &buf); + xdata, &buf); } parent: @@ -171,11 +172,11 @@ out: if (xattr) dict_ref (xattr); - if (!op_ret && uuid_is_null (buf.ia_gfid)) { + if (!op_ret && !gfidless && uuid_is_null (buf.ia_gfid)) { gf_log (this->name, GF_LOG_ERROR, "buf->ia_gfid is null for " "%s", (real_path) ? real_path: ""); op_ret = -1; - op_errno = ENOENT; + op_errno = ENODATA; } STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, (loc)?loc->inode:NULL, &buf, xattr, &postparent); @@ -188,7 +189,7 @@ out: int32_t -posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) +posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { struct iatt buf = {0,}; int32_t op_ret = -1; @@ -211,7 +212,8 @@ posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, + gf_log (this->name, (op_errno == ENOENT)? + GF_LOG_DEBUG:GF_LOG_ERROR, "lstat on %s failed: %s", real_path, strerror (op_errno)); goto out; @@ -221,7 +223,7 @@ posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) out: SET_TO_OLD_FS_ID(); - STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf); + STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf, NULL); return 0; } @@ -327,7 +329,7 @@ out: int posix_setattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, struct iatt *stbuf, int32_t valid) + loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -412,7 +414,7 @@ out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, - &statpre, &statpost); + &statpre, &statpost, NULL); return 0; } @@ -462,7 +464,7 @@ posix_do_futimes (xlator_t *this, int posix_fsetattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, struct iatt *stbuf, int32_t valid) + fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -557,14 +559,297 @@ out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, - &statpre, &statpost); + &statpre, &statpost, NULL); + + return 0; +} + +static int32_t +posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + off_t offset, size_t len, struct iatt *statpre, + struct iatt *statpost) +{ + struct posix_fd *pfd = NULL; + int32_t ret = -1; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = posix_fd_ctx_get (fd, this, &pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "pfd is NULL from fd=%p", fd); + goto out; + } + + ret = posix_fdstat (this, pfd->fd, statpre); + if (ret == -1) { + ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "fallocate (fstat) failed on fd=%p: %s", fd, + strerror (errno)); + goto out; + } + + ret = sys_fallocate(pfd->fd, flags, offset, len); + if (ret == -1) { + ret = -errno; + goto out; + } + + ret = posix_fdstat (this, pfd->fd, statpost); + if (ret == -1) { + ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "fallocate (fstat) failed on fd=%p: %s", fd, + strerror (errno)); + goto out; + } + +out: + SET_TO_OLD_FS_ID (); + + return ret; +} + +char* +_page_aligned_alloc (size_t size, char **aligned_buf) +{ + char *alloc_buf = NULL; + char *buf = NULL; + + alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char); + if (!alloc_buf) + goto out; + /* page aligned buffer */ + buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE); + *aligned_buf = buf; +out: + return alloc_buf; +} + +static int32_t +_posix_do_zerofill(int fd, off_t offset, size_t len, int o_direct) +{ + size_t num_vect = 0; + int32_t num_loop = 1; + int32_t idx = 0; + int32_t op_ret = -1; + int32_t vect_size = VECTOR_SIZE; + size_t remain = 0; + size_t extra = 0; + struct iovec *vector = NULL; + char *iov_base = NULL; + char *alloc_buf = NULL; + + if (len == 0) + return 0; + if (len < VECTOR_SIZE) + vect_size = len; + + num_vect = len / (vect_size); + remain = len % vect_size ; + if (num_vect > MAX_NO_VECT) { + extra = num_vect % MAX_NO_VECT; + num_loop = num_vect / MAX_NO_VECT; + num_vect = MAX_NO_VECT; + } + + vector = GF_CALLOC (num_vect, sizeof(struct iovec), + gf_common_mt_iovec); + if (!vector) + return -1; + if (o_direct) { + alloc_buf = _page_aligned_alloc(vect_size, &iov_base); + if (!alloc_buf) { + gf_log ("_posix_do_zerofill", GF_LOG_DEBUG, + "memory alloc failed, vect_size %d: %s", + vect_size, strerror(errno)); + GF_FREE(vector); + return -1; + } + } else { + iov_base = GF_CALLOC (vect_size, sizeof(char), + gf_common_mt_char); + if (!iov_base) { + GF_FREE(vector); + return -1; + } + } + + for (idx = 0; idx < num_vect; idx++) { + vector[idx].iov_base = iov_base; + vector[idx].iov_len = vect_size; + } + lseek(fd, offset, SEEK_SET); + for (idx = 0; idx < num_loop; idx++) { + op_ret = writev(fd, vector, num_vect); + if (op_ret < 0) + goto err; + } + if (extra) { + op_ret = writev(fd, vector, extra); + if (op_ret < 0) + goto err; + } + if (remain) { + vector[0].iov_len = remain; + op_ret = writev(fd, vector , 1); + if (op_ret < 0) + goto err; + } +err: + if (o_direct) + GF_FREE(alloc_buf); + else + GF_FREE(iov_base); + GF_FREE(vector); + return op_ret; +} + +static int32_t +posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, + off_t offset, size_t len, struct iatt *statpre, + struct iatt *statpost) +{ + struct posix_fd *pfd = NULL; + int32_t ret = -1; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = posix_fd_ctx_get (fd, this, &pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "pfd is NULL from fd=%p", fd); + goto out; + } + + ret = posix_fdstat (this, pfd->fd, statpre); + if (ret == -1) { + ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "pre-operation fstat failed on fd = %p: %s", fd, + strerror (errno)); + goto out; + } + ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT); + if (ret < 0) { + ret = -errno; + gf_log(this->name, GF_LOG_ERROR, + "zerofill failed on fd %d length %ld %s", + pfd->fd, len, strerror(errno)); + goto out; + } + if (pfd->flags & (O_SYNC|O_DSYNC)) { + ret = fsync (pfd->fd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "fsync() in writev on fd %d failed: %s", + pfd->fd, strerror (errno)); + ret = -errno; + goto out; + } + } + + ret = posix_fdstat (this, pfd->fd, statpost); + if (ret == -1) { + ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "post operation fstat failed on fd=%p: %s", fd, + strerror (errno)); + goto out; + } +out: + SET_TO_OLD_FS_ID (); + + return ret; +} + +static int32_t +_posix_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size, + off_t offset, size_t len, dict_t *xdata) +{ + int32_t ret; + int32_t flags = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + + if (keep_size) + flags = FALLOC_FL_KEEP_SIZE; + + ret = posix_do_fallocate(frame, this, fd, flags, offset, len, + &statpre, &statpost); + if (ret < 0) + goto err; + + STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL); + return 0; + +err: + STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL); + return 0; +} + +static int32_t +posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + int32_t ret; + int32_t flags = FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + + ret = posix_do_fallocate(frame, this, fd, flags, offset, len, + &statpre, &statpost); + if (ret < 0) + goto err; + + STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, NULL); + return 0; + +err: + STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, NULL); + return 0; + +} + +static int32_t +posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + int32_t ret = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + + ret = posix_do_zerofill(frame, this, fd, offset, len, + &statpre, &statpost); + if (ret < 0) + goto err; + + STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL); return 0; + +err: + STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL); + return 0; + } int32_t posix_opendir (call_frame_t *frame, xlator_t *this, - loc_t *loc, fd_t *fd) + loc_t *loc, fd_t *fd, dict_t *xdata) { char * real_path = NULL; int32_t op_ret = -1; @@ -633,7 +918,7 @@ out: } SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd); + STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, NULL); return 0; } @@ -681,7 +966,7 @@ out: int32_t posix_readlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, size_t size) + loc_t *loc, size_t size, dict_t *xdata) { char * dest = NULL; int32_t op_ret = -1; @@ -719,7 +1004,7 @@ posix_readlink (call_frame_t *frame, xlator_t *this, out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, dest, &stbuf); + STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, dest, &stbuf, NULL); return 0; } @@ -727,7 +1012,7 @@ out: int posix_mknod (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, dev_t dev, dict_t *params) + loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata) { int tmp_fd = 0; int32_t op_ret = -1; @@ -773,9 +1058,9 @@ posix_mknod (call_frame_t *frame, xlator_t *this, /* Check if the 'gfid' already exists, because this mknod may be an internal call from distribute for creating 'linkfile', and that linkfile may be for a hardlinked file */ - if (dict_get (params, GLUSTERFS_INTERNAL_FOP_KEY)) { - dict_del (params, GLUSTERFS_INTERNAL_FOP_KEY); - op_ret = dict_get_ptr (params, "gfid-req", &uuid_req); + if (dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { + dict_del (xdata, GLUSTERFS_INTERNAL_FOP_KEY); + op_ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); if (op_ret) { gf_log (this->name, GF_LOG_DEBUG, "failed to get the gfid from dict for %s", @@ -818,7 +1103,7 @@ real_op: } } - op_ret = posix_gfid_set (this, real_path, loc, params); + op_ret = posix_gfid_set (this, real_path, loc, xdata); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, "setting gfid on %s failed", real_path); @@ -836,14 +1121,14 @@ real_op: #endif post_op: - op_ret = posix_acl_xattr_set (this, real_path, params); + op_ret = posix_acl_xattr_set (this, real_path, xdata); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, "setting ACLs on %s failed (%s)", real_path, strerror (errno)); } - op_ret = posix_entry_create_xattr_set (this, real_path, params); + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, "setting xattrs on %s failed (%s)", real_path, @@ -874,7 +1159,8 @@ out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, - (loc)?loc->inode:NULL, &stbuf, &preparent, &postparent); + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); if ((op_ret == -1) && (!was_present)) { unlink (real_path); @@ -886,7 +1172,7 @@ out: int posix_mkdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, dict_t *params) + loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -905,6 +1191,18 @@ posix_mkdir (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); + /* The Hidden directory should be for housekeeping purpose and it + should not get created from a user request */ + if (__is_root_gfid (loc->pargfid) && + (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { + gf_log (this->name, GF_LOG_WARNING, + "mkdir issued on %s, which is not permitted", + GF_HIDDEN_PATH); + op_errno = EPERM; + op_ret = -1; + goto out; + } + priv = this->private; VALIDATE_OR_GOTO (priv, out); @@ -942,7 +1240,7 @@ posix_mkdir (call_frame_t *frame, xlator_t *this, goto out; } - op_ret = posix_gfid_set (this, real_path, loc, params); + op_ret = posix_gfid_set (this, real_path, loc, xdata); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, "setting gfid on %s failed", real_path); @@ -959,14 +1257,14 @@ posix_mkdir (call_frame_t *frame, xlator_t *this, } #endif - op_ret = posix_acl_xattr_set (this, real_path, params); + op_ret = posix_acl_xattr_set (this, real_path, xdata); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, "setting ACLs on %s failed (%s)", real_path, strerror (errno)); } - op_ret = posix_entry_create_xattr_set (this, real_path, params); + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, "setting xattrs on %s failed (%s)", real_path, @@ -997,7 +1295,8 @@ out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, - (loc)?loc->inode:NULL, &stbuf, &preparent, &postparent); + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); if ((op_ret == -1) && (!was_present)) { unlink (real_path); @@ -1009,17 +1308,17 @@ out: int32_t posix_unlink (call_frame_t *frame, xlator_t *this, - loc_t *loc) + loc_t *loc, int xflag, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = NULL; - char *par_path = NULL; - int32_t fd = -1; - struct iatt stbuf; - struct posix_private *priv = NULL; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + char *par_path = NULL; + int32_t fd = -1; + struct iatt stbuf = {0,}; + struct posix_private *priv = NULL; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -1081,7 +1380,7 @@ out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, - &preparent, &postparent); + &preparent, &postparent, NULL); if (fd != -1) { close (fd); @@ -1093,12 +1392,13 @@ out: int posix_rmdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, int flags) + loc_t *loc, int flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = NULL; char * par_path = NULL; + char * gfid_str = NULL; struct iatt preparent = {0,}; struct iatt postparent = {0,}; struct iatt stbuf; @@ -1138,12 +1438,13 @@ posix_rmdir (call_frame_t *frame, xlator_t *this, } if (flags) { - uint32_t hashval = 0; - char *tmp_path = alloca (strlen (priv->trash_path) + 16); + gfid_str = uuid_utoa (stbuf.ia_gfid); + char *tmp_path = alloca (strlen (priv->trash_path) + + strlen ("/") + + strlen (gfid_str) + 1); mkdir (priv->trash_path, 0755); - hashval = gf_dm_hashfn (real_path, strlen (real_path)); - sprintf (tmp_path, "%s/%u", priv->trash_path, hashval); + sprintf (tmp_path, "%s/%s", priv->trash_path, gfid_str); op_ret = rename (real_path, tmp_path); } else { op_ret = rmdir (real_path); @@ -1186,7 +1487,7 @@ out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, - &preparent, &postparent); + &preparent, &postparent, NULL); return 0; } @@ -1194,7 +1495,7 @@ out: int posix_symlink (call_frame_t *frame, xlator_t *this, - const char *linkname, loc_t *loc, dict_t *params) + const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -1250,7 +1551,7 @@ posix_symlink (call_frame_t *frame, xlator_t *this, goto out; } - op_ret = posix_gfid_set (this, real_path, loc, params); + op_ret = posix_gfid_set (this, real_path, loc, xdata); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, "setting gfid on %s failed", real_path); @@ -1267,14 +1568,14 @@ posix_symlink (call_frame_t *frame, xlator_t *this, } #endif - op_ret = posix_acl_xattr_set (this, real_path, params); + op_ret = posix_acl_xattr_set (this, real_path, xdata); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, "setting ACLs on %s failed (%s)", real_path, strerror (errno)); } - op_ret = posix_entry_create_xattr_set (this, real_path, params); + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, "setting xattrs on %s failed (%s)", real_path, @@ -1305,7 +1606,8 @@ out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, - (loc)?loc->inode:NULL, &stbuf, &preparent, &postparent); + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); if ((op_ret == -1) && (!was_present)) { unlink (real_path); @@ -1317,7 +1619,7 @@ out: int posix_rename (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc) + loc_t *oldloc, loc_t *newloc, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -1461,7 +1763,7 @@ out: STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, &stbuf, &preoldparent, &postoldparent, - &prenewparent, &postnewparent); + &prenewparent, &postnewparent, NULL); if ((op_ret == -1) && !was_present) { unlink (real_newpath); @@ -1473,7 +1775,7 @@ out: int posix_link (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc) + loc_t *oldloc, loc_t *newloc, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -1516,7 +1818,7 @@ posix_link (call_frame_t *frame, xlator_t *this, /* * On most systems (Linux being the notable exception), link(2) * first resolves symlinks. If the target is a directory or - * is nonexistent, it will fail. linkat(2) operates on the + * is nonexistent, it will fail. linkat(2) operates on the * symlink instead of its target when the AT_SYMLINK_FOLLOW * flag is not supplied. */ @@ -1556,7 +1858,7 @@ out: STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, (oldloc)?oldloc->inode:NULL, &stbuf, &preparent, - &postparent); + &postparent, NULL); if ((op_ret == -1) && (!was_present)) { unlink (real_newpath); @@ -1567,7 +1869,8 @@ out: int32_t -posix_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) +posix_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -1619,7 +1922,7 @@ out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, - &prebuf, &postbuf); + &prebuf, &postbuf, NULL); return 0; } @@ -1628,7 +1931,7 @@ out: int posix_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, mode_t mode, - fd_t *fd, dict_t *params) + mode_t umask, fd_t *fd, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -1701,7 +2004,10 @@ posix_create (call_frame_t *frame, xlator_t *this, goto out; } - op_ret = posix_gfid_set (this, real_path, loc, params); + if (was_present) + goto fill_stat; + + op_ret = posix_gfid_set (this, real_path, loc, xdata); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, "setting gfid on %s failed", real_path); @@ -1717,20 +2023,21 @@ posix_create (call_frame_t *frame, xlator_t *this, } #endif - op_ret = posix_acl_xattr_set (this, real_path, params); + op_ret = posix_acl_xattr_set (this, real_path, xdata); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, "setting ACLs on %s failed (%s)", real_path, strerror (errno)); } - op_ret = posix_entry_create_xattr_set (this, real_path, params); + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, "setting xattrs on %s failed (%s)", real_path, strerror (errno)); } +fill_stat: op_ret = posix_fdstat (this, _fd, &stbuf); if (op_ret == -1) { op_errno = errno; @@ -1785,14 +2092,14 @@ out: STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, (loc)?loc->inode:NULL, &stbuf, &preparent, - &postparent); + &postparent, xdata); return 0; } int32_t posix_open (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, fd_t *fd, int wbflags) + loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -1838,8 +2145,6 @@ posix_open (call_frame_t *frame, xlator_t *this, pfd->flags = flags; pfd->fd = _fd; - if (wbflags == GF_OPEN_FSYNC) - pfd->flushwrites = 1; op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); if (op_ret) @@ -1864,17 +2169,14 @@ out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd); + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, NULL); return 0; } -#define ALIGN_BUF(ptr,bound) ((void *)((unsigned long)(ptr + bound - 1) & \ - (unsigned long)(~(bound - 1)))) - int posix_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset, uint32_t flags) + fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -1953,18 +2255,14 @@ posix_readv (call_frame_t *frame, xlator_t *this, } /* Hack to notify higher layers of EOF. */ - if (stbuf.ia_size == 0) - op_errno = ENOENT; - else if ((offset + vec.iov_len) == stbuf.ia_size) - op_errno = ENOENT; - else if (offset > stbuf.ia_size) + if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size) op_errno = ENOENT; op_ret = vec.iov_len; out: STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, - &vec, 1, &stbuf, iobref); + &vec, 1, &stbuf, iobref, NULL); if (iobref) iobref_unref (iobref); @@ -2002,14 +2300,12 @@ err: return op_ret; } - int32_t __posix_writev (int fd, struct iovec *vector, int count, off_t startoff, int odirect) { int32_t op_ret = 0; int idx = 0; - int align = 4096; int max_buf_size = 0; int retval = 0; char *buf = NULL; @@ -2025,7 +2321,7 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff, max_buf_size = vector[idx].iov_len; } - alloc_buf = GF_MALLOC (1 * (max_buf_size + align), gf_posix_mt_char); + alloc_buf = _page_aligned_alloc (max_buf_size, &buf); if (!alloc_buf) { op_ret = -errno; goto err; @@ -2033,9 +2329,6 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff, internal_off = startoff; for (idx = 0; idx < count; idx++) { - /* page aligned buffer */ - buf = ALIGN_BUF (alloc_buf, align); - memcpy (buf, vector[idx].iov_base, vector[idx].iov_len); /* not sure whether writev works on O_DIRECT'd fd */ @@ -2050,17 +2343,58 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff, } err: - if (alloc_buf) - GF_FREE (alloc_buf); + GF_FREE (alloc_buf); return op_ret; } +dict_t* +_fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append) +{ + dict_t *rsp_xdata = NULL; + int32_t ret = 0; + inode_t *inode = NULL; + + if (fd) + inode = fd->inode; + + if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) { + gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid Args: " + "fd: %p inode: %p gfid:%s", fd, inode?inode:0, + inode?uuid_utoa(inode->gfid):"N/A"); + goto out; + } + + if (!xdata || !dict_get (xdata, GLUSTERFS_OPEN_FD_COUNT)) + goto out; + + rsp_xdata = dict_new(); + if (!rsp_xdata) + goto out; + + ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_OPEN_FD_COUNT, + fd->inode->fd_count); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set " + "dictionary value for %s", uuid_utoa (fd->inode->gfid), + GLUSTERFS_OPEN_FD_COUNT); + } + + ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, + is_append); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set " + "dictionary value for %s", uuid_utoa (fd->inode->gfid), + GLUSTERFS_WRITE_IS_APPEND); + } +out: + return rsp_xdata; +} int32_t posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, - uint32_t flags, struct iobref *iobref) + uint32_t flags, struct iobref *iobref, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -2070,7 +2404,9 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt preop = {0,}; struct iatt postop = {0,}; int ret = -1; - + dict_t *rsp_xdata = NULL; + int is_append = 0; + gf_boolean_t locked = _gf_false; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -2092,6 +2428,17 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, _fd = pfd->fd; + if (xdata && dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) { + /* The write_is_append check and write must happen + atomically. Else another write can overtake this + write after the check and get written earlier. + + So lock before preop-stat and unlock after write. + */ + locked = _gf_true; + LOCK(&fd->inode->lock); + } + op_ret = posix_fdstat (this, _fd, &preop); if (op_ret == -1) { op_errno = errno; @@ -2101,8 +2448,19 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, goto out; } + if (locked) { + if (preop.ia_size == offset || (fd->flags & O_APPEND)) + is_append = 1; + } + op_ret = __posix_writev (_fd, vector, count, offset, (pfd->flags & O_DIRECT)); + + if (locked) { + UNLOCK (&fd->inode->lock); + locked = _gf_false; + } + if (op_ret < 0) { op_errno = -op_ret; op_ret = -1; @@ -2118,14 +2476,21 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, UNLOCK (&priv->lock); if (op_ret >= 0) { + rsp_xdata = _fill_writev_xdata (fd, xdata, this, is_append); /* wiretv successful, we also need to get the stat of * the file we wrote to */ - if (pfd->flushwrites) { - /* NOTE: ignore the error, if one occurs at this - * point */ - fsync (_fd); + if (flags & (O_SYNC|O_DSYNC)) { + ret = fsync (_fd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "fsync() in writev on fd %d failed: %s", + _fd, strerror (errno)); + op_ret = -1; + op_errno = errno; + goto out; + } } ret = posix_fdstat (this, _fd, &postop); @@ -2141,15 +2506,23 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, out: - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop); + if (locked) { + UNLOCK (&fd->inode->lock); + locked = _gf_false; + } + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop, + rsp_xdata); + + if (rsp_xdata) + dict_unref (rsp_xdata); return 0; } int32_t posix_statfs (call_frame_t *frame, xlator_t *this, - loc_t *loc) + loc_t *loc, dict_t *xdata) { char * real_path = NULL; int32_t op_ret = -1; @@ -2188,14 +2561,14 @@ posix_statfs (call_frame_t *frame, xlator_t *this, op_ret = 0; out: - STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf); + STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf, NULL); return 0; } int32_t posix_flush (call_frame_t *frame, xlator_t *this, - fd_t *fd) + fd_t *fd, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -2217,15 +2590,14 @@ posix_flush (call_frame_t *frame, xlator_t *this, op_ret = 0; out: - STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL); return 0; } int32_t -posix_release (xlator_t *this, - fd_t *fd) +posix_release (xlator_t *this, fd_t *fd) { struct posix_private * priv = NULL; struct posix_fd * pfd = NULL; @@ -2270,9 +2642,36 @@ out: } +int +posix_batch_fsync (call_frame_t *frame, xlator_t *this, + fd_t *fd, int datasync, dict_t *xdata) +{ + call_stub_t *stub = NULL; + struct posix_private *priv = NULL; + + priv = this->private; + + stub = fop_fsync_stub (frame, default_fsync, fd, datasync, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0); + return 0; + } + + pthread_mutex_lock (&priv->fsync_mutex); + { + list_add_tail (&stub->list, &priv->fsyncs); + priv->fsync_queue_count++; + pthread_cond_signal (&priv->fsync_cond); + } + pthread_mutex_unlock (&priv->fsync_mutex); + + return 0; +} + + int32_t posix_fsync (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t datasync) + fd_t *fd, int32_t datasync, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -2281,6 +2680,7 @@ posix_fsync (call_frame_t *frame, xlator_t *this, int ret = -1; struct iatt preop = {0,}; struct iatt postop = {0,}; + struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; @@ -2296,6 +2696,12 @@ posix_fsync (call_frame_t *frame, xlator_t *this, goto out; #endif + priv = this->private; + if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) { + posix_batch_fsync (frame, this, fd, datasync, xdata); + return 0; + } + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; @@ -2350,22 +2756,34 @@ posix_fsync (call_frame_t *frame, xlator_t *this, out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, &preop, &postop); + STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, &preop, &postop, + NULL); return 0; } static int gf_posix_xattr_enotsup_log; +static int +_handle_setxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) +{ + posix_xattr_filler_t *filler = NULL; + + filler = tmp; + + return posix_handle_pair (filler->this, filler->real_path, k, v, + filler->flags); +} int32_t posix_setxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *dict, int flags) + loc_t *loc, dict_t *dict, int flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = NULL; - data_pair_t * trav = NULL; - int ret = -1; + + posix_xattr_filler_t filler = {0,}; DECLARE_OLD_FS_ID_VAR; SET_FS_ID (frame->root->uid, frame->root->gid); @@ -2380,27 +2798,69 @@ posix_setxattr (call_frame_t *frame, xlator_t *this, op_ret = -1; dict_del (dict, GFID_XATTR_KEY); - trav = dict->members_list; - - while (trav) { - ret = posix_handle_pair (this, real_path, trav, flags); - if (ret < 0) { - op_errno = -ret; - goto out; - } - trav = trav->next; - } - - op_ret = 0; + filler.real_path = real_path; + filler.this = this; + filler.flags = flags; + op_ret = dict_foreach (dict, _handle_setxattr_keyvalue_pair, + &filler); + if (op_ret < 0) + op_errno = -op_ret; out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL); return 0; } + +int +posix_xattr_get_real_filename (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *key, dict_t *dict, dict_t *xdata) +{ + char *real_path = NULL; + struct dirent *dirent = NULL; + DIR *fd = NULL; + const char *fname = NULL; + char *found = NULL; + int ret = -1; + int op_ret = -1; + + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + + fd = opendir (real_path); + if (!fd) + return -errno; + + fname = key + strlen (GF_XATTR_GET_REAL_FILENAME_KEY); + + while ((dirent = readdir (fd))) { + if (strcasecmp (dirent->d_name, fname) == 0) { + found = gf_strdup (dirent->d_name); + if (!found) { + closedir (fd); + return -ENOMEM; + } + break; + } + } + + closedir (fd); + + if (!found) + return -ENOENT; + + ret = dict_set_dynstr (dict, (char *)key, found); + if (ret) { + GF_FREE (found); + return -ENOMEM; + } + ret = strlen (found) + 1; + + return ret; +} + /** * posix_getxattr - this function returns a dictionary with all the * key:value pair present as xattr. used for @@ -2408,13 +2868,13 @@ out: */ int32_t posix_getxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name) + loc_t *loc, const char *name, dict_t *xdata) { struct posix_private *priv = NULL; int32_t op_ret = -1; int32_t op_errno = 0; int32_t list_offset = 0; - size_t size = 0; + ssize_t size = 0; size_t remaining_size = 0; char key[4096] = {0,}; char host_buf[1024] = {0,}; @@ -2453,12 +2913,31 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, } } - /* Get the total size */ - dict = get_new_dict (); + dict = dict_new (); if (!dict) { + op_errno = ENOMEM; goto out; } + if (loc->inode && name && + (strncmp (name, GF_XATTR_GET_REAL_FILENAME_KEY, + strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) { + ret = posix_xattr_get_real_filename (frame, this, loc, + name, dict, xdata); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + gf_log (this->name, (op_errno == ENOENT) ? + GF_LOG_DEBUG : GF_LOG_WARNING, + "Failed to get real filename (%s, %s): %s", + loc->path, name, strerror (op_errno)); + goto out; + } + + size = ret; + goto done; + } + if (loc->inode && name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) { if (!list_empty (&loc->inode->fd_list)) { ret = dict_set_uint32 (dict, (char *)name, 1); @@ -2482,8 +2961,13 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, else rpath = real_path; - (void) snprintf (host_buf, 1024, "<POSIX(%s):%s:%s>", - priv->base_path, priv->hostname, rpath); + (void) snprintf (host_buf, 1024, + "<POSIX(%s):%s:%s>", priv->base_path, + ((priv->node_uuid_pathinfo + && !uuid_is_null(priv->glusterd_uuid)) + ? uuid_utoa (priv->glusterd_uuid) + : priv->hostname), + rpath); dyn_rpath = gf_strdup (host_buf); if (!dyn_rpath) { @@ -2497,6 +2981,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, gf_log (this->name, GF_LOG_WARNING, "could not set value (%s) in dictionary", dyn_rpath); + GF_FREE (dyn_rpath); } goto done; @@ -2521,6 +3006,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, gf_log (this->name, GF_LOG_WARNING, "could not set value (%s) in dictionary", dyn_rpath); + GF_FREE (dyn_rpath); } goto done; } @@ -2539,6 +3025,7 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, gf_log (this->name, GF_LOG_WARNING, "could not set value (%s) in dictionary", host_buf); + GF_FREE (path); } goto done; } @@ -2547,24 +3034,49 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, strcpy (key, name); size = sys_lgetxattr (real_path, key, NULL, 0); - if (size == -1) { - op_ret = -1; + if (size <= 0) { op_errno = errno; - goto out; + if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) { + GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, + this->name, GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting" + " brick with 'user_xattr' " + "flag)"); + } else if (op_errno == ENOATTR || + op_errno == ENODATA) { + gf_log (this->name, GF_LOG_DEBUG, + "No such attribute:%s for file %s", + key, real_path); + } else { + gf_log (this->name, GF_LOG_ERROR, + "getxattr failed on %s: %s (%s)", + real_path, key, strerror (op_errno)); + } + + goto done; } value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); if (!value) { op_ret = -1; goto out; } - op_ret = sys_lgetxattr (real_path, key, value, size); - if (op_ret == -1) { + size = sys_lgetxattr (real_path, key, value, size); + if (size == -1) { + op_ret = -1; op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " + "%s: key = %s (%s)", real_path, key, + strerror (op_errno)); + GF_FREE (value); goto out; } - value [op_ret] = '\0'; - op_ret = dict_set_dynptr (dict, key, value, op_ret); + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "dict set operation " + "on %s for the key %s failed.", real_path, key); + GF_FREE (value); goto out; } @@ -2578,7 +3090,9 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, this->name, GF_LOG_WARNING, "Extended attributes not " - "supported."); + "supported (try remounting" + " brick with 'user_xattr' " + "flag)"); } else { gf_log (this->name, GF_LOG_ERROR, @@ -2606,26 +3120,40 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, break; strcpy (key, list + list_offset); - op_ret = sys_lgetxattr (real_path, key, NULL, 0); - if (op_ret == -1) + size = sys_lgetxattr (real_path, key, NULL, 0); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " + "%s: key = %s (%s)", real_path, key, + strerror (op_errno)); break; + } - value = GF_CALLOC (op_ret + 1, sizeof(char), + value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); if (!value) { op_errno = errno; goto out; } - op_ret = sys_lgetxattr (real_path, key, value, op_ret); - if (op_ret == -1) { + size = sys_lgetxattr (real_path, key, value, size); + if (size == -1) { + op_ret = -1; op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " + "%s: key = %s (%s)", real_path, key, + strerror (op_errno)); + GF_FREE (value); break; } - value [op_ret] = '\0'; - op_ret = dict_set_dynptr (dict, key, value, op_ret); + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "dict set operation " + "on %s for the key %s failed.", real_path, key); + GF_FREE (value); goto out; } @@ -2639,13 +3167,12 @@ done: if (dict) { dict_del (dict, GFID_XATTR_KEY); - dict_ref (dict); } out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict); + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, NULL); if (dict) dict_unref (dict); @@ -2656,14 +3183,14 @@ out: int32_t posix_fgetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name) + fd_t *fd, const char *name, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = ENOENT; struct posix_fd * pfd = NULL; int _fd = -1; int32_t list_offset = 0; - size_t size = 0; + ssize_t size = 0; size_t remaining_size = 0; char key[4096] = {0,}; char * value = NULL; @@ -2708,19 +3235,34 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, strcpy (key, name); size = sys_fgetxattr (_fd, key, NULL, 0); + if (size <= 0) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " + "key %s (%s)", key, strerror (op_errno)); + goto done; + } + value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); if (!value) { op_ret = -1; goto out; } - op_ret = sys_fgetxattr (_fd, key, value, op_ret); - if (op_ret == -1) { + size = sys_fgetxattr (_fd, key, value, size); + if (size == -1) { + op_ret = -1; op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " + "fd %p for the key %s (%s)", fd, key, + strerror (op_errno)); + GF_FREE (value); goto out; } - value [op_ret] = '\0'; - op_ret = dict_set_dynptr (dict, key, value, op_ret); + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "dict set operation " + "on key %s failed", key); + GF_FREE (value); goto out; } goto done; @@ -2733,7 +3275,8 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, this->name, GF_LOG_WARNING, "Extended attributes not " - "supported."); + "supported (try remounting " + "brick with 'user_xattr' flag)"); } else { gf_log (this->name, GF_LOG_ERROR, @@ -2761,24 +3304,41 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, break; strcpy (key, list + list_offset); - op_ret = sys_fgetxattr (_fd, key, NULL, 0); - if (op_ret == -1) + size = sys_fgetxattr (_fd, key, NULL, 0); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " + "fd %p for the key %s (%s)", fd, key, + strerror (op_errno)); break; + } - value = GF_CALLOC (op_ret + 1, sizeof(char), + value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); if (!value) { + op_ret = -1; op_errno = errno; goto out; } - op_ret = sys_fgetxattr (_fd, key, value, op_ret); - if (op_ret == -1) + size = sys_fgetxattr (_fd, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " + "the fd %p for the key %s (%s)", fd, key, + strerror (op_errno)); + GF_FREE (value); break; + } - value [op_ret] = '\0'; - op_ret = dict_set_dynptr (dict, key, value, op_ret); + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, "dict set operation " + "failed on key %s", key); + GF_FREE (value); goto out; } remaining_size -= strlen (key) + 1; @@ -2797,7 +3357,7 @@ done: out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict); + STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL); if (dict) dict_unref (dict); @@ -2805,17 +3365,29 @@ out: return 0; } +static int +_handle_fsetxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) +{ + posix_xattr_filler_t *filler = NULL; + + filler = tmp; + + return posix_fhandle_pair (filler->this, filler->fd, k, v, + filler->flags); +} int32_t posix_fsetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *dict, int flags) + fd_t *fd, dict_t *dict, int flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; struct posix_fd * pfd = NULL; int _fd = -1; - data_pair_t * trav = NULL; - int ret = -1; + int ret = -1; + + posix_xattr_filler_t filler = {0,}; DECLARE_OLD_FS_ID_VAR; SET_FS_ID (frame->root->uid, frame->root->gid); @@ -2836,35 +3408,53 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this, dict_del (dict, GFID_XATTR_KEY); - trav = dict->members_list; - - while (trav) { - ret = posix_fhandle_pair (this, _fd, trav, flags); - if (ret < 0) { - op_errno = -ret; - goto out; - } - trav = trav->next; - } - - op_ret = 0; + filler.fd = _fd; + filler.this = this; + filler.flags = flags; + op_ret = dict_foreach (dict, _handle_fsetxattr_keyvalue_pair, + &filler); + if (op_ret < 0) + op_errno = -op_ret; out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL); return 0; } +int +_posix_remove_xattr (dict_t *dict, char *key, data_t *value, void *data) +{ + int32_t op_ret = 0; + xlator_t *this = NULL; + posix_xattr_filler_t *filler = NULL; + + filler = (posix_xattr_filler_t *) data; + this = filler->this; + + op_ret = sys_lremovexattr (filler->real_path, key); + if (op_ret == -1) { + filler->op_errno = errno; + if (errno != ENOATTR && errno != EPERM) + gf_log (this->name, GF_LOG_ERROR, + "removexattr failed on %s (for %s): %s", + filler->real_path, key, strerror (errno)); + } + + return op_ret; +} + int32_t posix_removexattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name) + loc_t *loc, const char *name, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = NULL; + posix_xattr_filler_t filler = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -2880,6 +3470,22 @@ posix_removexattr (call_frame_t *frame, xlator_t *this, SET_FS_ID (frame->root->uid, frame->root->gid); + /** + * sending an empty key name with xdata containing the + * list of key(s) to be removed implies "bulk remove request" + * for removexattr. + */ + if (name && (strcmp (name, "") == 0) && xdata) { + filler.real_path = real_path; + filler.this = this; + op_ret = dict_foreach (xdata, _posix_remove_xattr, &filler); + if (op_ret) { + op_errno = filler.op_errno; + } + + goto out; + } + op_ret = sys_lremovexattr (real_path, name); if (op_ret == -1) { op_errno = errno; @@ -2895,19 +3501,18 @@ posix_removexattr (call_frame_t *frame, xlator_t *this, out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, NULL); return 0; } int32_t posix_fremovexattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name) + fd_t *fd, const char *name, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; struct posix_fd * pfd = NULL; int _fd = -1; - uint64_t tmp_pfd = 0; int ret = -1; DECLARE_OLD_FS_ID_VAR; @@ -2918,15 +3523,13 @@ posix_fremovexattr (call_frame_t *frame, xlator_t *this, goto out; } - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; gf_log (this->name, GF_LOG_WARNING, "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; - _fd = pfd->fd; @@ -2948,14 +3551,14 @@ posix_fremovexattr (call_frame_t *frame, xlator_t *this, out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, NULL); return 0; } int32_t posix_fsyncdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, int datasync) + fd_t *fd, int datasync, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -2977,7 +3580,7 @@ posix_fsyncdir (call_frame_t *frame, xlator_t *this, op_ret = 0; out: - STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, NULL); return 0; } @@ -3004,9 +3607,31 @@ posix_print_xattr (dict_t *this, static void __add_array (int32_t *dest, int32_t *src, int count) { + int i = 0; + int32_t destval = 0; + for (i = 0; i < count; i++) { + destval = ntoh32 (dest[i]); + if (destval == 0xffffffff) + continue; + dest[i] = hton32 (destval + ntoh32 (src[i])); + } +} + +static void +__or_array (int32_t *dest, int32_t *src, int count) +{ + int i = 0; + for (i = 0; i < count; i++) { + dest[i] = hton32 (ntoh32 (dest[i]) | ntoh32 (src[i])); + } +} + +static void +__and_array (int32_t *dest, int32_t *src, int count) +{ int i = 0; for (i = 0; i < count; i++) { - dest[i] = hton32 (ntoh32 (dest[i]) + ntoh32 (src[i])); + dest[i] = hton32 (ntoh32 (dest[i]) & ntoh32 (src[i])); } } @@ -3019,6 +3644,159 @@ __add_long_array (int64_t *dest, int64_t *src, int count) } } +static int +_posix_handle_xattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) +{ + int size = 0; + int count = 0; + int op_ret = 0; + int op_errno = 0; + gf_xattrop_flags_t optype = 0; + char *array = NULL; + inode_t *inode = NULL; + xlator_t *this = NULL; + posix_xattr_filler_t *filler = NULL; + + filler = tmp; + + optype = (gf_xattrop_flags_t)(filler->flags); + this = filler->this; + inode = filler->inode; + + count = v->len; + array = GF_CALLOC (count, sizeof (char), gf_posix_mt_char); + + LOCK (&inode->lock); + { + if (filler->real_path) { + size = sys_lgetxattr (filler->real_path, k, + (char *)array, v->len); + } else { + size = sys_fgetxattr (filler->fd, k, (char *)array, + v->len); + } + + op_errno = errno; + if ((size == -1) && (op_errno != ENODATA) && + (op_errno != ENOATTR)) { + if (op_errno == ENOTSUP) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, + this->name, GF_LOG_WARNING, + "Extended attributes not " + "supported by filesystem"); + } else if (op_errno != ENOENT || + !posix_special_xattr (marker_xattrs, + k)) { + if (filler->real_path) + gf_log (this->name, GF_LOG_ERROR, + "getxattr failed on %s while doing " + "xattrop: Key:%s (%s)", + filler->real_path, + k, strerror (op_errno)); + else + gf_log (this->name, GF_LOG_ERROR, + "fgetxattr failed on fd=%d while doing " + "xattrop: Key:%s (%s)", + filler->fd, + k, strerror (op_errno)); + } + + op_ret = -1; + goto unlock; + } + + switch (optype) { + + case GF_XATTROP_ADD_ARRAY: + __add_array ((int32_t *) array, (int32_t *) v->data, + v->len / 4); + break; + + case GF_XATTROP_ADD_ARRAY64: + __add_long_array ((int64_t *) array, (int64_t *) v->data, + v->len / 8); + break; + + case GF_XATTROP_OR_ARRAY: + __or_array ((int32_t *) array, + (int32_t *) v->data, + v->len / 4); + break; + + case GF_XATTROP_AND_ARRAY: + __and_array ((int32_t *) array, + (int32_t *) v->data, + v->len / 4); + break; + + default: + gf_log (this->name, GF_LOG_ERROR, + "Unknown xattrop type (%d) on %s. Please send " + "a bug report to gluster-devel@nongnu.org", + optype, filler->real_path); + op_ret = -1; + op_errno = EINVAL; + goto unlock; + } + + if (filler->real_path) { + size = sys_lsetxattr (filler->real_path, k, array, + v->len, 0); + } else { + size = sys_fsetxattr (filler->fd, k, (char *)array, + v->len, 0); + } + } +unlock: + UNLOCK (&inode->lock); + + if (op_ret == -1) + goto out; + + op_errno = errno; + if (size == -1) { + if (filler->real_path) + gf_log (this->name, GF_LOG_ERROR, + "setxattr failed on %s while doing xattrop: " + "key=%s (%s)", filler->real_path, + k, strerror (op_errno)); + else + gf_log (this->name, GF_LOG_ERROR, + "fsetxattr failed on fd=%d while doing xattrop: " + "key=%s (%s)", filler->fd, + k, strerror (op_errno)); + + op_ret = -1; + goto out; + } else { + size = dict_set_bin (d, k, array, v->len); + + if (size != 0) { + if (filler->real_path) + gf_log (this->name, GF_LOG_DEBUG, + "dict_set_bin failed (path=%s): " + "key=%s (%s)", filler->real_path, + k, strerror (-size)); + else + gf_log (this->name, GF_LOG_DEBUG, + "dict_set_bin failed (fd=%d): " + "key=%s (%s)", filler->fd, + k, strerror (-size)); + + op_ret = -1; + op_errno = EINVAL; + goto out; + } + array = NULL; + } + + array = NULL; + +out: + return op_ret; +} + /** * xattrop - xattr operations - for internal use by GlusterFS * @optype: ADD_ARRAY: @@ -3027,39 +3805,27 @@ __add_long_array (int64_t *dest, int64_t *src, int count) */ int -do_xattrop (call_frame_t *frame, xlator_t *this, - loc_t *loc, fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr) +do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr) { - char *real_path = NULL; - char *array = NULL; - int size = 0; - int count = 0; - - int op_ret = 0; - int op_errno = 0; - - int ret = 0; - int _fd = -1; - struct posix_fd *pfd = NULL; - - data_pair_t *trav = NULL; - - char * path = NULL; - inode_t * inode = NULL; + int op_ret = 0; + int op_errno = 0; + int _fd = -1; + char *real_path = NULL; + struct posix_fd *pfd = NULL; + inode_t *inode = NULL; + posix_xattr_filler_t filler = {0,}; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (xattr, out); VALIDATE_OR_GOTO (this, out); - trav = xattr->members_list; - if (fd) { - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { + op_ret = posix_fd_ctx_get (fd, this, &pfd); + if (op_ret < 0) { gf_log (this->name, GF_LOG_WARNING, "failed to get pfd from fd=%p", fd); - op_ret = -1; op_errno = EBADFD; goto out; } @@ -3070,145 +3836,30 @@ do_xattrop (call_frame_t *frame, xlator_t *this, MAKE_INODE_HANDLE (real_path, this, loc, NULL); if (real_path) { - path = gf_strdup (real_path); inode = loc->inode; } else if (fd) { inode = fd->inode; } - while (trav && inode) { - count = trav->value->len; - array = GF_CALLOC (count, sizeof (char), - gf_posix_mt_char); - - LOCK (&inode->lock); - { - if (loc) { - size = sys_lgetxattr (real_path, trav->key, (char *)array, - trav->value->len); - } else { - size = sys_fgetxattr (_fd, trav->key, (char *)array, - trav->value->len); - } - - op_errno = errno; - if ((size == -1) && (op_errno != ENODATA) && - (op_errno != ENOATTR)) { - if (op_errno == ENOTSUP) { - GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, - this->name,GF_LOG_WARNING, - "Extended attributes not " - "supported by filesystem"); - } else { - if (loc) - gf_log (this->name, GF_LOG_ERROR, - "getxattr failed on %s while doing " - "xattrop: Key:%s (%s)", path, - trav->key, strerror (op_errno)); - else - gf_log (this->name, GF_LOG_ERROR, - "fgetxattr failed on fd=%d while doing " - "xattrop: Key:%s (%s)", _fd, - trav->key, strerror (op_errno)); - } - - op_ret = -1; - goto unlock; - } - - switch (optype) { + filler.this = this; + filler.fd = _fd; + filler.real_path = real_path; + filler.flags = (int)optype; + filler.inode = inode; - case GF_XATTROP_ADD_ARRAY: - __add_array ((int32_t *) array, (int32_t *) trav->value->data, - trav->value->len / 4); - break; - - case GF_XATTROP_ADD_ARRAY64: - __add_long_array ((int64_t *) array, (int64_t *) trav->value->data, - trav->value->len / 8); - break; - - default: - gf_log (this->name, GF_LOG_ERROR, - "Unknown xattrop type (%d) on %s. Please send " - "a bug report to gluster-devel@nongnu.org", - optype, path); - op_ret = -1; - op_errno = EINVAL; - goto unlock; - } - - if (loc) { - size = sys_lsetxattr (real_path, trav->key, array, - trav->value->len, 0); - } else { - size = sys_fsetxattr (_fd, trav->key, (char *)array, - trav->value->len, 0); - } - } - unlock: - UNLOCK (&inode->lock); - - if (op_ret == -1) - goto out; - - op_errno = errno; - if (size == -1) { - if (loc) - gf_log (this->name, GF_LOG_ERROR, - "setxattr failed on %s while doing xattrop: " - "key=%s (%s)", path, - trav->key, strerror (op_errno)); - else - gf_log (this->name, GF_LOG_ERROR, - "fsetxattr failed on fd=%d while doing xattrop: " - "key=%s (%s)", _fd, - trav->key, strerror (op_errno)); - - op_ret = -1; - goto out; - } else { - size = dict_set_bin (xattr, trav->key, array, - trav->value->len); - - if (size != 0) { - if (loc) - gf_log (this->name, GF_LOG_DEBUG, - "dict_set_bin failed (path=%s): " - "key=%s (%s)", path, - trav->key, strerror (-size)); - else - gf_log (this->name, GF_LOG_DEBUG, - "dict_set_bin failed (fd=%d): " - "key=%s (%s)", _fd, - trav->key, strerror (-size)); - - op_ret = -1; - op_errno = EINVAL; - goto out; - } - array = NULL; - } - - array = NULL; - trav = trav->next; - } + op_ret = dict_foreach (xattr, _posix_handle_xattr_keyvalue_pair, + &filler); out: - if (array) - GF_FREE (array); - - if (path) - GF_FREE (path); - STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr); + STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr, NULL); return 0; } int posix_xattrop (call_frame_t *frame, xlator_t *this, - loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr) + loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { do_xattrop (frame, this, loc, NULL, optype, xattr); return 0; @@ -3217,7 +3868,7 @@ posix_xattrop (call_frame_t *frame, xlator_t *this, int posix_fxattrop (call_frame_t *frame, xlator_t *this, - fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr) + fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { do_xattrop (frame, this, NULL, fd, optype, xattr); return 0; @@ -3226,7 +3877,7 @@ posix_fxattrop (call_frame_t *frame, xlator_t *this, int posix_access (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t mask) + loc_t *loc, int32_t mask, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -3253,14 +3904,14 @@ posix_access (call_frame_t *frame, xlator_t *this, out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (access, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, NULL); return 0; } int32_t posix_ftruncate (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset) + fd_t *fd, off_t offset, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -3324,7 +3975,8 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this, out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, &preop, &postop); + STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, &preop, + &postop, NULL); return 0; } @@ -3332,7 +3984,7 @@ out: int32_t posix_fstat (call_frame_t *frame, xlator_t *this, - fd_t *fd) + fd_t *fd, dict_t *xdata) { int _fd = -1; int32_t op_ret = -1; @@ -3375,7 +4027,7 @@ posix_fstat (call_frame_t *frame, xlator_t *this, out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf); + STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf, NULL); return 0; } @@ -3383,7 +4035,7 @@ static int gf_posix_lk_log; int32_t posix_lk (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct gf_flock *lock) + fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) { struct gf_flock nullock = {0, }; @@ -3392,33 +4044,35 @@ posix_lk (call_frame_t *frame, xlator_t *this, "not loaded. You need to use it for proper " "functioning of your application."); - STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock); + STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock, NULL); return 0; } int32_t posix_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock) + const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) { GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, "\"features/locks\" translator is " "not loaded. You need to use it for proper " "functioning of your application."); - STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS); + STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS, NULL); return 0; } int32_t posix_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock) + const char *volume, fd_t *fd, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) { GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, "\"features/locks\" translator is " "not loaded. You need to use it for proper " "functioning of your application."); - STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS); + STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS, NULL); return 0; } @@ -3426,35 +4080,35 @@ posix_finodelk (call_frame_t *frame, xlator_t *this, int32_t posix_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type) + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, "\"features/locks\" translator is " "not loaded. You need to use it for proper " "functioning of your application."); - STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS); + STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS, NULL); return 0; } int32_t posix_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type) + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, "\"features/locks\" translator is " "not loaded. You need to use it for proper " "functioning of your application."); - STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS); + STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS, NULL); return 0; } int posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, - gf_dirent_t *entries) + gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs) { off_t in_case = -1; size_t filled = 0; @@ -3464,6 +4118,18 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, int32_t this_size = -1; gf_dirent_t *this_entry = NULL; uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + struct stat stbuf = {0,}; + char *hpath = NULL; + int len = 0; + int ret = 0; + + if (skip_dirs) { + len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); + hpath = alloca (len + 256); /* NAME_MAX */ + posix_handle_path (this, fd->inode->gfid, NULL, hpath, len); + len = strlen (hpath); + hpath[len] = '/'; + } if (!off) { rewinddir (dir); @@ -3495,10 +4161,6 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, break; } - if ((uuid_compare (fd->inode->gfid, rootgfid) == 0) - && (!strcmp (entry->d_name, GF_REPLICATE_TRASH_DIR))) - continue; - #ifdef __NetBSD__ /* * NetBSD with UFS1 backend uses backing files for @@ -3518,6 +4180,17 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, continue; } + if (skip_dirs) { + if (DT_ISDIR (entry->d_type)) { + continue; + } else if (hpath) { + strcpy (&hpath[len+1],entry->d_name); + ret = lstat (hpath, &stbuf); + if (!ret && S_ISDIR (stbuf.st_mode)) + continue; + } + } + this_size = max (sizeof (gf_dirent_t), sizeof (gfs3_dirplist)) + strlen (entry->d_name) + 1; @@ -3537,6 +4210,7 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, } this_entry->d_off = telldir (dir); this_entry->d_ino = entry->d_ino; + this_entry->d_type = entry->d_type; list_add_tail (&this_entry->list, &entries->list); @@ -3569,6 +4243,66 @@ posix_entry_xattr_fill (xlator_t *this, inode_t *inode, } + +int +posix_readdirp_fill (xlator_t *this, fd_t *fd, gf_dirent_t *entries, dict_t *dict) +{ + gf_dirent_t *entry = NULL; + inode_table_t *itable = NULL; + inode_t *inode = NULL; + char *hpath = NULL; + int len = 0; + struct iatt stbuf = {0, }; + uuid_t gfid; + + if (list_empty(&entries->list)) + return 0; + + itable = fd->inode->table; + + len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); + hpath = alloca (len + 256); /* NAME_MAX */ + posix_handle_path (this, fd->inode->gfid, NULL, hpath, len); + len = strlen (hpath); + hpath[len] = '/'; + + list_for_each_entry (entry, &entries->list, list) { + memset (gfid, 0, 16); + inode = inode_grep (fd->inode->table, fd->inode, + entry->d_name); + if (inode) + uuid_copy (gfid, inode->gfid); + + strcpy (&hpath[len+1], entry->d_name); + + posix_pstat (this, gfid, hpath, &stbuf); + + if (!inode) + inode = inode_find (itable, stbuf.ia_gfid); + + if (!inode) + inode = inode_new (itable); + + entry->inode = inode; + + if (dict) { + entry->dict = + posix_entry_xattr_fill (this, entry->inode, + fd, entry->d_name, + dict, &stbuf); + dict_ref (entry->dict); + } + + entry->d_stat = stbuf; + if (stbuf.ia_ino) + entry->d_ino = stbuf.ia_ino; + inode = NULL; + } + + return 0; +} + + int32_t posix_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off, int whichop, dict_t *dict) @@ -3580,13 +4314,8 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this, int32_t op_ret = -1; int32_t op_errno = 0; gf_dirent_t entries; - struct iatt stbuf = {0, }; - gf_dirent_t *tmp_entry = NULL; - inode_table_t *itable = NULL; -#ifdef IGNORE_READDIRP_ATTRS - uuid_t gfid; - ia_type_t entry_type = 0; -#endif + int32_t skip_dirs = 0; + VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -3609,9 +4338,30 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this, "dir is NULL for fd=%p", fd); op_errno = EINVAL; goto out; - } + } - count = posix_fill_readdir (fd, dir, off, size, &entries); + /* When READDIR_FILTER option is set to on, we can filter out + * directory's entry from the entry->list. + */ + ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs); + + LOCK (&fd->lock); + { + /* posix_fill_readdir performs multiple separate individual + readdir() calls to fill up the buffer. + + In case of NFS where the same anonymous FD is shared between + different applications, reading a common directory can + result in the anonymous fd getting re-used unsafely between + the two readdir requests (in two different io-threads). + + It would also help, in the future, to replace the loop + around readdir() with a single large getdents() call. + */ + count = posix_fill_readdir (fd, dir, off, size, &entries, this, + skip_dirs); + } + UNLOCK (&fd->lock); /* pick ENOENT to indicate EOF */ op_errno = errno; @@ -3620,46 +4370,10 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this, if (whichop != GF_FOP_READDIRP) goto out; - itable = fd->inode->table; - - list_for_each_entry (tmp_entry, &entries.list, list) { -#ifdef IGNORE_READDIRP_ATTRS - ret = inode_grep_for_gfid (fd->inode->table, fd->inode, - tmp_entry->d_name, gfid, - &entry_type); - if (ret == 0) { - memset (&stbuf, 0, sizeof (stbuf)); - uuid_copy (stbuf.ia_gfid, gfid); - posix_fill_ino_from_gfid (this, &stbuf); - stbuf.ia_type = entry_type; - } else { - posix_istat (this, fd->inode->gfid, - tmp_entry->d_name, &stbuf); - } -#else - posix_istat (this, fd->inode->gfid, - tmp_entry->d_name, &stbuf); -#endif - if (stbuf.ia_ino) - tmp_entry->d_ino = stbuf.ia_ino; - - if (dict) { - tmp_entry->inode = inode_find (itable, stbuf.ia_gfid); - if (!tmp_entry->inode) - tmp_entry->inode = inode_new (itable); - - tmp_entry->dict = - posix_entry_xattr_fill (this, tmp_entry->inode, - fd, tmp_entry->d_name, - dict, &stbuf); - dict_ref (tmp_entry->dict); - } - - tmp_entry->d_stat = stbuf; - } + posix_readdirp_fill (this, fd, &entries, dict); out: - STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries); + STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, NULL); gf_dirent_free (&entries); @@ -3669,9 +4383,9 @@ out: int32_t posix_readdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off) + fd_t *fd, size_t size, off_t off, dict_t *xdata) { - posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR, NULL); + posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR, xdata); return 0; } @@ -3720,30 +4434,28 @@ posix_inode (xlator_t *this) int32_t posix_rchecksum (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset, int32_t len) + fd_t *fd, off_t offset, int32_t len, dict_t *xdata) { - char *buf = NULL; - - int _fd = -1; - - struct posix_fd *pfd = NULL; - - int op_ret = -1; - int op_errno = 0; - - int ret = 0; - - int32_t weak_checksum = 0; - uint8_t strong_checksum[MD5_DIGEST_LEN]; + char *alloc_buf = NULL; + char *buf = NULL; + int _fd = -1; + struct posix_fd *pfd = NULL; + int op_ret = -1; + int op_errno = 0; + int ret = 0; + int32_t weak_checksum = 0; + unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0}; + struct posix_private *priv = NULL; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - memset (strong_checksum, 0, MD5_DIGEST_LEN); - buf = GF_CALLOC (1, len, gf_posix_mt_char); + priv = this->private; + memset (strong_checksum, 0, MD5_DIGEST_LENGTH); - if (!buf) { + alloc_buf = _page_aligned_alloc (len, &buf); + if (!alloc_buf) { op_errno = ENOMEM; goto out; } @@ -3758,25 +4470,36 @@ posix_rchecksum (call_frame_t *frame, xlator_t *this, _fd = pfd->fd; - ret = pread (_fd, buf, len, offset); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "pread of %d bytes returned %d (%s)", - len, ret, strerror (errno)); + LOCK (&fd->lock); + { + if (priv->aio_capable && priv->aio_init_done) + __posix_fd_set_odirect (fd, pfd, 0, offset, len); + + ret = pread (_fd, buf, len, offset); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "pread of %d bytes returned %d (%s)", + len, ret, strerror (errno)); + + op_errno = errno; + } - op_errno = errno; - goto out; } + UNLOCK (&fd->lock); - weak_checksum = gf_rsync_weak_checksum (buf, len); - gf_rsync_strong_checksum (buf, len, strong_checksum); + if (ret < 0) + goto out; - GF_FREE (buf); + weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) len); + gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) len, (unsigned char *) strong_checksum); op_ret = 0; out: STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, - weak_checksum, strong_checksum); + weak_checksum, strong_checksum, NULL); + + GF_FREE (alloc_buf); + return 0; } @@ -3824,6 +4547,99 @@ mem_acct_init (xlator_t *this) return ret; } +static int +posix_set_owner (xlator_t *this, uid_t uid, gid_t gid) +{ + struct posix_private *priv = NULL; + int ret = -1; + + priv = this->private; + + ret = sys_chown (priv->base_path, uid, gid); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "uid/gid for brick path %s, %s", + priv->base_path, strerror (errno)); + + return ret; +} + + +static int +set_batch_fsync_mode (struct posix_private *priv, const char *str) +{ + if (strcmp (str, "none") == 0) + priv->batch_fsync_mode = BATCH_NONE; + else if (strcmp (str, "syncfs") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS; + else if (strcmp (str, "syncfs-single-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC; + else if (strcmp (str, "syncfs-reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC; + else if (strcmp (str, "reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_REVERSE_FSYNC; + else + return -1; + + return 0; +} + + +int +reconfigure (xlator_t *this, dict_t *options) +{ + int ret = -1; + struct posix_private *priv = NULL; + uid_t uid = -1; + gid_t gid = -1; + char *batch_fsync_mode_str = NULL; + + priv = this->private; + + GF_OPTION_RECONF ("brick-uid", uid, options, uint32, out); + GF_OPTION_RECONF ("brick-gid", gid, options, uint32, out); + posix_set_owner (this, uid, gid); + + GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec, + options, uint32, out); + + GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str, + options, str, out); + + if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) { + gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s", + batch_fsync_mode_str); + goto out; + } + + GF_OPTION_RECONF ("linux-aio", priv->aio_configured, + options, bool, out); + + if (priv->aio_configured) + posix_aio_on (this); + else + posix_aio_off (this); + + GF_OPTION_RECONF ("node-uuid-pathinfo", priv->node_uuid_pathinfo, + options, bool, out); + + if (priv->node_uuid_pathinfo && + (uuid_is_null (priv->glusterd_uuid))) { + gf_log (this->name, GF_LOG_INFO, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to <hostname>:<export>"); + } + + GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval, + options, uint32, out); + posix_spawn_health_check_thread (this); + + ret = 0; +out: + return ret; +} + + /** * init - */ @@ -3838,12 +4654,16 @@ init (xlator_t *this) int dict_ret = 0; int ret = 0; int op_ret = -1; + ssize_t size = -1; int32_t janitor_sleep = 0; uuid_t old_uuid = {0,}; uuid_t dict_uuid = {0,}; uuid_t gfid = {0,}; uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; char *guuid = NULL; + uid_t uid = -1; + gid_t gid = -1; + char *batch_fsync_mode_str; dir_data = dict_get (this->options, "directory"); @@ -3924,33 +4744,31 @@ init (xlator_t *this) ret = -1; goto out; } - op_ret = sys_lgetxattr (dir_data->data, - "trusted.glusterfs.volume-id", old_uuid, 16); - if (op_ret == 16) { + size = sys_lgetxattr (dir_data->data, + "trusted.glusterfs.volume-id", old_uuid, 16); + if (size == 16) { if (uuid_compare (old_uuid, dict_uuid)) { gf_log (this->name, GF_LOG_ERROR, - "mismatching volume-id (%s) recieved. " + "mismatching volume-id (%s) received. " "already is a part of volume %s ", tmp_data->data, uuid_utoa (old_uuid)); ret = -1; goto out; } - } else if ((op_ret == -1) && (errno == ENODATA)) { - /* Using the export for first time */ - op_ret = sys_lsetxattr (dir_data->data, - "trusted.glusterfs.volume-id", - dict_uuid, 16, 0); - if (op_ret == -1) { + } else if ((size == -1) && (errno == ENODATA)) { + gf_log (this->name, GF_LOG_ERROR, - "failed to set volume id on export"); + "Extended attribute trusted.glusterfs." + "volume-id is absent"); ret = -1; goto out; - } - } else if ((op_ret == -1) && (errno != ENODATA)) { + + } else if ((size == -1) && (errno != ENODATA)) { /* Wrong 'volume-id' is set, it should be error */ gf_log (this->name, GF_LOG_WARNING, "%s: failed to fetch volume-id (%s)", dir_data->data, strerror (errno)); + ret = -1; goto out; } else { ret = -1; @@ -3962,8 +4780,8 @@ init (xlator_t *this) /* Now check if the export directory has some other 'gfid', other than that of root '/' */ - ret = sys_lgetxattr (dir_data->data, "trusted.gfid", gfid, 16); - if (ret == 16) { + size = sys_lgetxattr (dir_data->data, "trusted.gfid", gfid, 16); + if (size == 16) { if (!__is_root_gfid (gfid)) { gf_log (this->name, GF_LOG_WARNING, "%s: gfid (%s) is not that of glusterfs '/' ", @@ -3971,34 +4789,36 @@ init (xlator_t *this) ret = -1; goto out; } - } else if (ret != -1) { + } else if (size != -1) { /* Wrong 'gfid' is set, it should be error */ gf_log (this->name, GF_LOG_WARNING, "%s: wrong value set as gfid", dir_data->data); ret = -1; goto out; - } else if ((ret == -1) && (errno != ENODATA)) { + } else if ((size == -1) && (errno != ENODATA)) { /* Wrong 'gfid' is set, it should be error */ gf_log (this->name, GF_LOG_WARNING, "%s: failed to fetch gfid (%s)", dir_data->data, strerror (errno)); + ret = -1; goto out; } else { /* First time volume, set the GFID */ - ret = sys_lsetxattr (dir_data->data, "trusted.gfid", rootgfid, + size = sys_lsetxattr (dir_data->data, "trusted.gfid", rootgfid, 16, XATTR_CREATE); - if (ret) { + if (size) { gf_log (this->name, GF_LOG_ERROR, "%s: failed to set gfid (%s)", dir_data->data, strerror (errno)); + ret = -1; goto out; } } - op_ret = sys_lgetxattr (dir_data->data, "system.posix_acl_access", - NULL, 0); - if ((op_ret < 0) && (errno == ENOTSUP)) + size = sys_lgetxattr (dir_data->data, POSIX_ACL_ACCESS_XATTR, + NULL, 0); + if ((size < 0) && (errno == ENOTSUP)) gf_log (this->name, GF_LOG_WARNING, "Posix access control list is not supported."); @@ -4013,20 +4833,6 @@ init (xlator_t *this) _private->base_path = gf_strdup (dir_data->data); _private->base_path_length = strlen (_private->base_path); - _private->trash_path = GF_CALLOC (1, _private->base_path_length - + strlen ("/") - + strlen (GF_REPLICATE_TRASH_DIR) - + 1, - gf_posix_mt_trash_path); - - if (!_private->trash_path) { - ret = -1; - goto out; - } - - strncpy (_private->trash_path, _private->base_path, _private->base_path_length); - strcat (_private->trash_path, "/" GF_REPLICATE_TRASH_DIR); - LOCK_INIT (&_private->lock); ret = dict_get_str (this->options, "hostname", &_private->hostname); @@ -4162,11 +4968,76 @@ init (xlator_t *this) goto out; } + op_ret = posix_handle_trash_init (this); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Posix landfill setup failed"); + ret = -1; + goto out; + } + + _private->aio_init_done = _gf_false; + _private->aio_capable = _gf_false; + + GF_OPTION_INIT ("brick-uid", uid, uint32, out); + GF_OPTION_INIT ("brick-gid", gid, uint32, out); + posix_set_owner (this, uid, gid); + + GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out); + + if (_private->aio_configured) { + op_ret = posix_aio_on (this); + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "Posix AIO init failed"); + ret = -1; + goto out; + } + } + + GF_OPTION_INIT ("node-uuid-pathinfo", + _private->node_uuid_pathinfo, bool, out); + if (_private->node_uuid_pathinfo && + (uuid_is_null (_private->glusterd_uuid))) { + gf_log (this->name, GF_LOG_INFO, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to <hostname>:<export>"); + } + + _private->health_check_active = _gf_false; + GF_OPTION_INIT ("health-check-interval", + _private->health_check_interval, uint32, out); + if (_private->health_check_interval) + posix_spawn_health_check_thread (this); + pthread_mutex_init (&_private->janitor_lock, NULL); pthread_cond_init (&_private->janitor_cond, NULL); INIT_LIST_HEAD (&_private->janitor_fds); posix_spawn_janitor_thread (this); + + pthread_mutex_init (&_private->fsync_mutex, NULL); + pthread_cond_init (&_private->fsync_cond, NULL); + INIT_LIST_HEAD (&_private->fsyncs); + + ret = gf_thread_create (&_private->fsyncer, NULL, posix_fsyncer, this); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "fsyncer thread" + " creation failed (%s)", strerror (errno)); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out); + + if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) { + gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s", + batch_fsync_mode_str); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, + uint32, out); out: return ret; } @@ -4232,6 +5103,9 @@ struct xlator_fops fops = { .fxattrop = posix_fxattrop, .setattr = posix_setattr, .fsetattr = posix_fsetattr, + .fallocate = _posix_fallocate, + .discard = posix_discard, + .zerofill = posix_zerofill, }; struct xlator_cbks cbks = { @@ -4259,5 +5133,60 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_ANY }, { .key = {"glusterd-uuid"}, .type = GF_OPTION_TYPE_STR }, + { + .key = {"linux-aio"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Support for native Linux AIO" + }, + { + .key = {"brick-uid"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .validate = GF_OPT_VALIDATE_MIN, + .description = "Support for setting uid of brick's owner" + }, + { + .key = {"brick-gid"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .validate = GF_OPT_VALIDATE_MIN, + .description = "Support for setting gid of brick's owner" + }, + { .key = {"node-uuid-pathinfo"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "return glusterd's node-uuid in pathinfo xattr" + " string instead of hostname" + }, + { + .key = {"health-check-interval"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "30", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Interval in seconds for a filesystem health check, " + "set to 0 to disable" + }, + { .key = {"batch-fsync-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "reverse-fsync", + .description = "Possible values:\n" + "\t- syncfs: Perform one syncfs() on behalf oa batch" + "of fsyncs.\n" + "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch" + " of fsyncs and one fsync() per batch.\n" + "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch" + " of fsyncs and fsync() each file in the batch in reverse order.\n" + " in reverse order.\n" + "\t- reverse-fsync: Perform fsync() of each file in the batch in" + " reverse order." + }, + { .key = {"batch-fsync-delay-usec"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + .description = "Num of usecs to wait for aggregating fsync" + " requests", + }, { .key = {NULL} } }; |
