diff options
Diffstat (limited to 'xlators/storage/posix/src/posix.c')
| -rw-r--r-- | xlators/storage/posix/src/posix.c | 3759 |
1 files changed, 2161 insertions, 1598 deletions
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 7f2c9ab1f..fb45c7a67 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -1,22 +1,12 @@ /* - Copyright (c) 2006-2010 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" @@ -24,6 +14,7 @@ #define __XOPEN_SOURCE 500 +#include <openssl/md5.h> #include <stdint.h> #include <sys/time.h> #include <sys/resource.h> @@ -32,13 +23,18 @@ #include <pthread.h> #include <ftw.h> #include <sys/stat.h> +#include <signal.h> +#include <sys/uio.h> #ifndef GF_BSD_HOST_OS #include <alloca.h> #endif /* GF_BSD_HOST_OS */ +#ifdef HAVE_LINKAT +#include <fcntl.h> +#endif /* HAVE_LINKAT */ + #include "glusterfs.h" -#include "md5.h" #include "checksum.h" #include "dict.h" #include "logging.h" @@ -55,7 +51,11 @@ #include "timer.h" #include "glusterfs3-xdr.h" #include "hashfn.h" +#include "posix-aio.h" +#include "glusterfs-acl.h" +extern char *marker_xattrs[]; +#define ALIGN_SIZE 4096 #undef HAVE_SET_FSID #ifdef HAVE_SET_FSID @@ -80,14 +80,6 @@ #endif -typedef struct { - xlator_t *this; - const char *real_path; - dict_t *xattr; - struct iatt *stbuf; - loc_t *loc; -} posix_xattr_filler_t; - int posix_forget (xlator_t *this, inode_t *inode) { @@ -98,379 +90,94 @@ posix_forget (xlator_t *this, inode_t *inode) return 0; } -static void -_posix_xattr_get_set (dict_t *xattr_req, - char *key, - data_t *data, - void *xattrargs) -{ - posix_xattr_filler_t *filler = xattrargs; - char *value = NULL; - ssize_t xattr_size = -1; - int ret = -1; - char *databuf = NULL; - int _fd = -1; - loc_t *loc = NULL; - ssize_t req_size = 0; - - - /* should size be put into the data_t ? */ - if (!strcmp (key, GF_CONTENT_KEY) - && IA_ISREG (filler->stbuf->ia_type)) { - - /* file content request */ - req_size = data_to_uint64 (data); - if (req_size >= filler->stbuf->ia_size) { - _fd = open (filler->real_path, O_RDONLY); - if (_fd == -1) { - gf_log (filler->this->name, GF_LOG_ERROR, - "Opening file %s failed: %s", - filler->real_path, strerror (errno)); - goto err; - } - - databuf = GF_CALLOC (1, filler->stbuf->ia_size, - gf_posix_mt_char); - if (!databuf) { - goto err; - } - - ret = read (_fd, databuf, filler->stbuf->ia_size); - if (ret == -1) { - gf_log (filler->this->name, GF_LOG_ERROR, - "Read on file %s failed: %s", - filler->real_path, strerror (errno)); - goto err; - } - - ret = close (_fd); - _fd = -1; - if (ret == -1) { - gf_log (filler->this->name, GF_LOG_ERROR, - "Close on file %s failed: %s", - filler->real_path, strerror (errno)); - goto err; - } - - ret = dict_set_bin (filler->xattr, key, - databuf, filler->stbuf->ia_size); - if (ret < 0) { - gf_log (filler->this->name, GF_LOG_ERROR, - "failed to set dict value. key: %s, path: %s", - key, filler->real_path); - goto err; - } - - /* To avoid double free in cleanup below */ - databuf = NULL; - err: - if (_fd != -1) - close (_fd); - if (databuf) - GF_FREE (databuf); - } - } else if (!strcmp (key, GLUSTERFS_OPEN_FD_COUNT)) { - loc = filler->loc; - if (!list_empty (&loc->inode->fd_list)) { - ret = dict_set_uint32 (filler->xattr, key, 1); - if (ret < 0) - gf_log (filler->this->name, GF_LOG_WARNING, - "Failed to set dictionary value for %s", - key); - } else { - ret = dict_set_uint32 (filler->xattr, key, 0); - if (ret < 0) - gf_log (filler->this->name, GF_LOG_WARNING, - "Failed to set dictionary value for %s", - key); - } - } else { - xattr_size = sys_lgetxattr (filler->real_path, key, NULL, 0); - - if (xattr_size > 0) { - value = GF_CALLOC (1, xattr_size + 1, - gf_posix_mt_char); - if (!value) - return; - - sys_lgetxattr (filler->real_path, key, value, - xattr_size); - - value[xattr_size] = '\0'; - ret = dict_set_bin (filler->xattr, key, - value, xattr_size); - if (ret < 0) - gf_log (filler->this->name, GF_LOG_DEBUG, - "dict set failed. path: %s, key: %s", - filler->real_path, key); - } - } -} - - -int -posix_fill_gfid_path (xlator_t *this, const char *path, struct iatt *iatt) -{ - int ret = 0; - - if (!iatt) - return 0; - - ret = sys_lgetxattr (path, GFID_XATTR_KEY, iatt->ia_gfid, 16); - /* Return value of getxattr */ - if (ret == 16) - ret = 0; - - return ret; -} - - -int -posix_fill_gfid_fd (xlator_t *this, int fd, struct iatt *iatt) -{ - int ret = 0; - - if (!iatt) - return 0; - - ret = sys_fgetxattr (fd, GFID_XATTR_KEY, iatt->ia_gfid, 16); - /* Return value of getxattr */ - if (ret == 16) - ret = 0; - - return ret; -} - - -int -posix_lstat_with_gfid (xlator_t *this, const char *path, struct iatt *stbuf_p) -{ - struct posix_private *priv = NULL; - int ret = 0; - struct stat lstatbuf = {0, }; - struct iatt stbuf = {0, }; - - priv = this->private; - - ret = lstat (path, &lstatbuf); - if (ret == -1) - goto out; - - iatt_from_stat (&stbuf, &lstatbuf); - - ret = posix_fill_gfid_path (this, path, &stbuf); - if (ret) - gf_log_callingfn (this->name, GF_LOG_DEBUG, "failed to get gfid"); - - if (stbuf_p) - *stbuf_p = stbuf; -out: - return ret; -} - - -int -posix_fstat_with_gfid (xlator_t *this, int fd, struct iatt *stbuf_p) -{ - struct posix_private *priv = NULL; - int ret = 0; - struct stat fstatbuf = {0, }; - struct iatt stbuf = {0, }; - - priv = this->private; - - ret = fstat (fd, &fstatbuf); - if (ret == -1) - goto out; - - iatt_from_stat (&stbuf, &fstatbuf); - - ret = posix_fill_gfid_fd (this, fd, &stbuf); - if (ret) - gf_log_callingfn (this->name, GF_LOG_DEBUG, "failed to get gfid"); - - if (stbuf_p) - *stbuf_p = stbuf; - -out: - return ret; -} - - -dict_t * -posix_lookup_xattr_fill (xlator_t *this, const char *real_path, loc_t *loc, - dict_t *xattr_req, struct iatt *buf) -{ - dict_t *xattr = NULL; - posix_xattr_filler_t filler = {0, }; - - xattr = get_new_dict(); - if (!xattr) { - goto out; - } - - filler.this = this; - filler.real_path = real_path; - filler.xattr = xattr; - filler.stbuf = buf; - filler.loc = loc; - - dict_foreach (xattr_req, _posix_xattr_get_set, &filler); -out: - return xattr; -} - - -/* - * If the parent directory of {real_path} has the setgid bit set, - * then set {gid} to the gid of the parent. Otherwise, - * leave {gid} unchanged. - */ - -int -setgid_override (xlator_t *this, char *real_path, gid_t *gid) -{ - char * tmp_path = NULL; - char * parent_path = NULL; - struct iatt parent_stbuf; - - int op_ret = 0; - - tmp_path = gf_strdup (real_path); - if (!tmp_path) { - op_ret = -ENOMEM; - goto out; - } - - parent_path = dirname (tmp_path); - - op_ret = posix_lstat_with_gfid (this, parent_path, &parent_stbuf); - if (op_ret == -1) { - op_ret = -errno; - gf_log_callingfn (this->name, GF_LOG_ERROR, - "lstat on parent directory (%s) failed: %s", - parent_path, strerror (errno)); - goto out; - } - - if (parent_stbuf.ia_prot.sgid) { - /* - * Entries created inside a setgid directory - * should inherit the gid from the parent - */ - - *gid = parent_stbuf.ia_gid; - } -out: - - if (tmp_path) - GF_FREE (tmp_path); - - return op_ret; -} - - -int -posix_gfid_set (xlator_t *this, const char *path, dict_t *xattr_req) -{ - void *uuid_req = NULL; - uuid_t uuid_curr; - int ret = 0; - struct stat stat = {0, }; - - if (!xattr_req) - goto out; - - if (sys_lstat (path, &stat) != 0) - goto out; - - ret = sys_lgetxattr (path, GFID_XATTR_KEY, uuid_curr, 16); - if (ret == 16) { - ret = 0; - goto out; - } - - ret = dict_get_ptr (xattr_req, "gfid-req", &uuid_req); - if (ret) { - gf_log_callingfn (this->name, GF_LOG_DEBUG, - "failed to get the gfid from dict"); - goto out; - } - - ret = sys_lsetxattr (path, GFID_XATTR_KEY, uuid_req, 16, XATTR_CREATE); - -out: - return ret; -} - +/* Regular fops */ int32_t posix_lookup (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr_req) + loc_t *loc, dict_t *xdata) { struct iatt buf = {0, }; - char * real_path = NULL; int32_t op_ret = -1; int32_t entry_ret = 0; int32_t op_errno = 0; dict_t * xattr = NULL; - char * pathdup = NULL; - char * parentpath = NULL; + char * real_path = NULL; + char * par_path = NULL; struct iatt postparent = {0,}; + int32_t gfidless = 0; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (loc->path, out); - MAKE_REAL_PATH (real_path, this, loc->path); + /* The Hidden directory should be for housekeeping purpose and it + should not get any gfid on it */ + if (__is_root_gfid (loc->pargfid) && + (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { + gf_log (this->name, GF_LOG_WARNING, + "Lookup issued on %s, which is not permitted", + GF_HIDDEN_PATH); + op_errno = EPERM; + op_ret = -1; + goto out; + } + + op_ret = dict_get_int32 (xdata, GF_GFIDLESS_LOOKUP, &gfidless); + op_ret = -1; + if (uuid_is_null (loc->pargfid)) { + /* nameless lookup */ + MAKE_INODE_HANDLE (real_path, this, loc, &buf); + } else { + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &buf); - posix_gfid_set (this, real_path, xattr_req); + if (uuid_is_null (loc->inode->gfid)) { + posix_gfid_heal (this, real_path, loc, xdata); + MAKE_ENTRY_HANDLE (real_path, par_path, this, + loc, &buf); + } + } - op_ret = posix_lstat_with_gfid (this, real_path, &buf); op_errno = errno; if (op_ret == -1) { if (op_errno != ENOENT) { gf_log (this->name, GF_LOG_ERROR, "lstat on %s failed: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); } entry_ret = -1; goto parent; } - if (xattr_req && (op_ret == 0)) { + if (xdata && (op_ret == 0)) { xattr = posix_lookup_xattr_fill (this, real_path, loc, - xattr_req, &buf); + xdata, &buf); } parent: - if (loc->parent) { - pathdup = gf_strdup (real_path); - GF_VALIDATE_OR_GOTO (this->name, pathdup, out); - - parentpath = dirname (pathdup); - - op_ret = posix_lstat_with_gfid (this, parentpath, &postparent); + if (par_path) { + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "post-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } } op_ret = entry_ret; out: - if (pathdup) - GF_FREE (pathdup); - if (xattr) dict_ref (xattr); + if (!op_ret && !gfidless && uuid_is_null (buf.ia_gfid)) { + gf_log (this->name, GF_LOG_ERROR, "buf->ia_gfid is null for " + "%s", (real_path) ? real_path: ""); + op_ret = -1; + op_errno = ENODATA; + } STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, (loc)?loc->inode:NULL, &buf, xattr, &postparent); @@ -482,13 +189,13 @@ out: int32_t -posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) +posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { struct iatt buf = {0,}; - char * real_path = NULL; int32_t op_ret = -1; int32_t op_errno = 0; struct posix_private *priv = NULL; + char *real_path = NULL; DECLARE_OLD_FS_ID_VAR; @@ -500,13 +207,14 @@ posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) VALIDATE_OR_GOTO (priv, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); - op_ret = posix_lstat_with_gfid (this, real_path, &buf); + MAKE_INODE_HANDLE (real_path, this, loc, &buf); + if (op_ret == -1) { op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "lstat on %s failed: %s", loc->path, + gf_log (this->name, (op_errno == ENOENT)? + GF_LOG_DEBUG:GF_LOG_ERROR, + "lstat on %s failed: %s", real_path, strerror (op_errno)); goto out; } @@ -515,7 +223,7 @@ posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) out: SET_TO_OLD_FS_ID(); - STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf); + STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf, NULL); return 0; } @@ -621,7 +329,7 @@ out: int posix_setattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, struct iatt *stbuf, int32_t valid) + loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -636,9 +344,8 @@ posix_setattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, &statpre); - op_ret = posix_lstat_with_gfid (this, real_path, &statpre); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -692,7 +399,7 @@ posix_setattr (call_frame_t *frame, xlator_t *this, } } - op_ret = posix_lstat_with_gfid (this, real_path, &statpost); + op_ret = posix_pstat (this, loc->gfid, real_path, &statpost); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -707,7 +414,7 @@ out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, - &statpre, &statpost); + &statpre, &statpost, NULL); return 0; } @@ -757,14 +464,13 @@ posix_do_futimes (xlator_t *this, int posix_fsetattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, struct iatt *stbuf, int32_t valid) + fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; struct iatt statpre = {0,}; struct iatt statpost = {0,}; struct posix_fd *pfd = NULL; - uint64_t tmp_pfd = 0; int32_t ret = -1; DECLARE_OLD_FS_ID_VAR; @@ -775,16 +481,15 @@ posix_fsetattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; gf_log (this->name, GF_LOG_DEBUG, "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; - op_ret = posix_fstat_with_gfid (this, pfd->fd, &statpre); + op_ret = posix_fdstat (this, pfd->fd, &statpre); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -839,7 +544,7 @@ posix_fsetattr (call_frame_t *frame, xlator_t *this, } } - op_ret = posix_fstat_with_gfid (this, pfd->fd, &statpost); + op_ret = posix_fdstat (this, pfd->fd, &statpost); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -854,14 +559,297 @@ out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, - &statpre, &statpost); + &statpre, &statpost, NULL); return 0; } +static int32_t +posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + off_t offset, size_t len, struct iatt *statpre, + struct iatt *statpost) +{ + struct posix_fd *pfd = NULL; + int32_t ret = -1; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = posix_fd_ctx_get (fd, this, &pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "pfd is NULL from fd=%p", fd); + goto out; + } + + ret = posix_fdstat (this, pfd->fd, statpre); + if (ret == -1) { + ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "fallocate (fstat) failed on fd=%p: %s", fd, + strerror (errno)); + goto out; + } + + ret = sys_fallocate(pfd->fd, flags, offset, len); + if (ret == -1) { + ret = -errno; + goto out; + } + + ret = posix_fdstat (this, pfd->fd, statpost); + if (ret == -1) { + ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "fallocate (fstat) failed on fd=%p: %s", fd, + strerror (errno)); + goto out; + } + +out: + SET_TO_OLD_FS_ID (); + + return ret; +} + +char* +_page_aligned_alloc (size_t size, char **aligned_buf) +{ + char *alloc_buf = NULL; + char *buf = NULL; + + alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char); + if (!alloc_buf) + goto out; + /* page aligned buffer */ + buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE); + *aligned_buf = buf; +out: + return alloc_buf; +} + +static int32_t +_posix_do_zerofill(int fd, off_t offset, size_t len, int o_direct) +{ + size_t num_vect = 0; + int32_t num_loop = 1; + int32_t idx = 0; + int32_t op_ret = -1; + int32_t vect_size = VECTOR_SIZE; + size_t remain = 0; + size_t extra = 0; + struct iovec *vector = NULL; + char *iov_base = NULL; + char *alloc_buf = NULL; + + if (len == 0) + return 0; + if (len < VECTOR_SIZE) + vect_size = len; + + num_vect = len / (vect_size); + remain = len % vect_size ; + if (num_vect > MAX_NO_VECT) { + extra = num_vect % MAX_NO_VECT; + num_loop = num_vect / MAX_NO_VECT; + num_vect = MAX_NO_VECT; + } + + vector = GF_CALLOC (num_vect, sizeof(struct iovec), + gf_common_mt_iovec); + if (!vector) + return -1; + if (o_direct) { + alloc_buf = _page_aligned_alloc(vect_size, &iov_base); + if (!alloc_buf) { + gf_log ("_posix_do_zerofill", GF_LOG_DEBUG, + "memory alloc failed, vect_size %d: %s", + vect_size, strerror(errno)); + GF_FREE(vector); + return -1; + } + } else { + iov_base = GF_CALLOC (vect_size, sizeof(char), + gf_common_mt_char); + if (!iov_base) { + GF_FREE(vector); + return -1; + } + } + + for (idx = 0; idx < num_vect; idx++) { + vector[idx].iov_base = iov_base; + vector[idx].iov_len = vect_size; + } + lseek(fd, offset, SEEK_SET); + for (idx = 0; idx < num_loop; idx++) { + op_ret = writev(fd, vector, num_vect); + if (op_ret < 0) + goto err; + } + if (extra) { + op_ret = writev(fd, vector, extra); + if (op_ret < 0) + goto err; + } + if (remain) { + vector[0].iov_len = remain; + op_ret = writev(fd, vector , 1); + if (op_ret < 0) + goto err; + } +err: + if (o_direct) + GF_FREE(alloc_buf); + else + GF_FREE(iov_base); + GF_FREE(vector); + return op_ret; +} + +static int32_t +posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, + off_t offset, size_t len, struct iatt *statpre, + struct iatt *statpost) +{ + struct posix_fd *pfd = NULL; + int32_t ret = -1; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = posix_fd_ctx_get (fd, this, &pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "pfd is NULL from fd=%p", fd); + goto out; + } + + ret = posix_fdstat (this, pfd->fd, statpre); + if (ret == -1) { + ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "pre-operation fstat failed on fd = %p: %s", fd, + strerror (errno)); + goto out; + } + ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT); + if (ret < 0) { + ret = -errno; + gf_log(this->name, GF_LOG_ERROR, + "zerofill failed on fd %d length %ld %s", + pfd->fd, len, strerror(errno)); + goto out; + } + if (pfd->flags & (O_SYNC|O_DSYNC)) { + ret = fsync (pfd->fd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "fsync() in writev on fd %d failed: %s", + pfd->fd, strerror (errno)); + ret = -errno; + goto out; + } + } + + ret = posix_fdstat (this, pfd->fd, statpost); + if (ret == -1) { + ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "post operation fstat failed on fd=%p: %s", fd, + strerror (errno)); + goto out; + } + +out: + SET_TO_OLD_FS_ID (); + + return ret; +} + +static int32_t +_posix_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size, + off_t offset, size_t len, dict_t *xdata) +{ + int32_t ret; + int32_t flags = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + + if (keep_size) + flags = FALLOC_FL_KEEP_SIZE; + + ret = posix_do_fallocate(frame, this, fd, flags, offset, len, + &statpre, &statpost); + if (ret < 0) + goto err; + + STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL); + return 0; + +err: + STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL); + return 0; +} + +static int32_t +posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + int32_t ret; + int32_t flags = FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + + ret = posix_do_fallocate(frame, this, fd, flags, offset, len, + &statpre, &statpost); + if (ret < 0) + goto err; + + STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, NULL); + return 0; + +err: + STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, NULL); + return 0; + +} + +static int32_t +posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + int32_t ret = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + + ret = posix_do_zerofill(frame, this, fd, offset, len, + &statpre, &statpost); + if (ret < 0) + goto err; + + STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL); + return 0; + +err: + STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL); + return 0; + +} + int32_t posix_opendir (call_frame_t *frame, xlator_t *this, - loc_t *loc, fd_t *fd) + loc_t *loc, fd_t *fd, dict_t *xdata) { char * real_path = NULL; int32_t op_ret = -1; @@ -878,15 +866,16 @@ posix_opendir (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (fd, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + op_ret = -1; dir = opendir (real_path); if (dir == NULL) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "opendir failed on %s: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); goto out; } @@ -895,7 +884,7 @@ posix_opendir (call_frame_t *frame, xlator_t *this, op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "dirfd() failed on %s: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); goto out; } @@ -907,16 +896,12 @@ posix_opendir (call_frame_t *frame, xlator_t *this, pfd->dir = dir; pfd->fd = dirfd (dir); - pfd->path = gf_strdup (real_path); - if (!pfd->path) { - goto out; - } op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); if (op_ret) gf_log (this->name, GF_LOG_WARNING, "failed to set the fd context path=%s fd=%p", - loc->path, fd); + real_path, fd); op_ret = 0; @@ -927,15 +912,13 @@ out: dir = NULL; } if (pfd) { - if (pfd->path) - GF_FREE (pfd->path); GF_FREE (pfd); pfd = NULL; } } SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd); + STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, NULL); return 0; } @@ -962,19 +945,12 @@ posix_releasedir (xlator_t *this, pfd = (struct posix_fd *)(long)tmp_pfd; if (!pfd->dir) { gf_log (this->name, GF_LOG_WARNING, - "pfd->dir is NULL for fd=%p path=%s", - fd, pfd->path ? pfd->path : "<NULL>"); + "pfd->dir is NULL for fd=%p", fd); goto out; } priv = this->private; - if (!pfd->path) { - gf_log (this->name, GF_LOG_WARNING, - "pfd->path was NULL. fd=%p pfd=%p", - fd, pfd); - } - pthread_mutex_lock (&priv->janitor_lock); { INIT_LIST_HEAD (&pfd->list); @@ -990,11 +966,10 @@ out: int32_t posix_readlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, size_t size) + loc_t *loc, size_t size, dict_t *xdata) { char * dest = NULL; int32_t op_ret = -1; - int32_t lstat_ret = -1; int32_t op_errno = 0; char * real_path = NULL; struct iatt stbuf = {0,}; @@ -1007,33 +982,29 @@ posix_readlink (call_frame_t *frame, xlator_t *this, dest = alloca (size + 1); - MAKE_REAL_PATH (real_path, this, loc->path); - - op_ret = readlink (real_path, dest, size); + MAKE_INODE_HANDLE (real_path, this, loc, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "readlink on %s failed: %s", loc->path, + "lstat on %s failed: %s", real_path, strerror (op_errno)); goto out; } - dest[op_ret] = 0; - - lstat_ret = posix_lstat_with_gfid (this, real_path, &stbuf); - if (lstat_ret == -1) { - op_ret = -1; + op_ret = readlink (real_path, dest, size); + if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "lstat on %s failed: %s", loc->path, + "readlink on %s failed: %s", real_path, strerror (op_errno)); goto out; } + dest[op_ret] = 0; out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, dest, &stbuf); + STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, dest, &stbuf, NULL); return 0; } @@ -1041,20 +1012,20 @@ out: int posix_mknod (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, dev_t dev, dict_t *params) + loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata) { int tmp_fd = 0; int32_t op_ret = -1; int32_t op_errno = 0; char *real_path = 0; + char *par_path = 0; struct iatt stbuf = { 0, }; char was_present = 1; struct posix_private *priv = NULL; gid_t gid = 0; - char *pathdup = NULL; struct iatt preparent = {0,}; struct iatt postparent = {0,}; - char *parentpath = NULL; + void * uuid_req = NULL; DECLARE_OLD_FS_ID_VAR; @@ -1065,32 +1036,49 @@ posix_mknod (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL); gid = frame->root->gid; - op_ret = setgid_override (this, real_path, &gid); - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; - goto out; - } - SET_FS_ID (frame->root->uid, gid); - pathdup = gf_strdup (real_path); - GF_VALIDATE_OR_GOTO (this->name, pathdup, out); - - parentpath = dirname (pathdup); - op_ret = posix_lstat_with_gfid (this, parentpath, &preparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "pre-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); goto out; } + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + } + + /* Check if the 'gfid' already exists, because this mknod may be an + internal call from distribute for creating 'linkfile', and that + linkfile may be for a hardlinked file */ + if (dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { + dict_del (xdata, GLUSTERFS_INTERNAL_FOP_KEY); + op_ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); + if (op_ret) { + gf_log (this->name, GF_LOG_DEBUG, + "failed to get the gfid from dict for %s", + loc->path); + goto real_op; + } + op_ret = posix_create_link_if_gfid_exists (this, uuid_req, + real_path); + if (!op_ret) + goto post_op; + } + +real_op: +#ifdef __NetBSD__ + if (S_ISFIFO(mode)) + op_ret = mkfifo (real_path, mode); + else +#endif /* __NetBSD__ */ op_ret = mknod (real_path, mode, dev); if (op_ret == -1) { @@ -1102,23 +1090,23 @@ posix_mknod (call_frame_t *frame, xlator_t *this, if (tmp_fd == -1) { gf_log (this->name, GF_LOG_ERROR, "create failed on %s: %s", - loc->path, strerror (errno)); + real_path, strerror (errno)); goto out; } close (tmp_fd); } else { gf_log (this->name, GF_LOG_ERROR, - "mknod on %s failed: %s", loc->path, + "mknod on %s failed: %s", real_path, strerror (op_errno)); goto out; } } - op_ret = posix_gfid_set (this, real_path, params); + op_ret = posix_gfid_set (this, real_path, loc, xdata); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, - "setting gfid on %s failed", loc->path); + "setting gfid on %s failed", real_path); } #ifndef HAVE_SET_FSID @@ -1126,40 +1114,53 @@ posix_mknod (call_frame_t *frame, xlator_t *this, if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "lchown on %s failed: %s", loc->path, + "lchown on %s failed: %s", real_path, strerror (op_errno)); goto out; } #endif - op_ret = posix_lstat_with_gfid (this, real_path, &stbuf); +post_op: + op_ret = posix_acl_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting ACLs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting xattrs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_pstat (this, NULL, real_path, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "mknod on %s failed: %s", loc->path, + "mknod on %s failed: %s", real_path, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gfid (this, parentpath, &postparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "post-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } op_ret = 0; out: - if (pathdup) - GF_FREE (pathdup); - SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, - (loc)?loc->inode:NULL, &stbuf, &preparent, &postparent); + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); if ((op_ret == -1) && (!was_present)) { unlink (real_path); @@ -1169,164 +1170,18 @@ out: } -static int -janitor_walker (const char *fpath, const struct stat *sb, - int typeflag, struct FTW *ftwbuf) -{ - switch (sb->st_mode & S_IFMT) { - case S_IFREG: - case S_IFBLK: - case S_IFLNK: - case S_IFCHR: - case S_IFIFO: - case S_IFSOCK: - gf_log (THIS->name, GF_LOG_TRACE, - "unlinking %s", fpath); - unlink (fpath); - break; - - case S_IFDIR: - if (ftwbuf->level) { /* don't remove top level dir */ - gf_log (THIS->name, GF_LOG_TRACE, - "removing directory %s", fpath); - - rmdir (fpath); - } - break; - } - - return 0; /* 0 = FTW_CONTINUE */ -} - - -static struct posix_fd * -janitor_get_next_fd (xlator_t *this) -{ - struct posix_private *priv = NULL; - struct posix_fd *pfd = NULL; - - struct timespec timeout; - - priv = this->private; - - pthread_mutex_lock (&priv->janitor_lock); - { - if (list_empty (&priv->janitor_fds)) { - time (&timeout.tv_sec); - timeout.tv_sec += priv->janitor_sleep_duration; - timeout.tv_nsec = 0; - - pthread_cond_timedwait (&priv->janitor_cond, - &priv->janitor_lock, - &timeout); - goto unlock; - } - - pfd = list_entry (priv->janitor_fds.next, struct posix_fd, - list); - - list_del (priv->janitor_fds.next); - } -unlock: - pthread_mutex_unlock (&priv->janitor_lock); - - return pfd; -} - - -static void * -posix_janitor_thread_proc (void *data) -{ - xlator_t * this = NULL; - struct posix_private *priv = NULL; - struct posix_fd *pfd; - - time_t now; - - this = data; - priv = this->private; - - THIS = this; - - while (1) { - time (&now); - if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) { - gf_log (this->name, GF_LOG_TRACE, - "janitor cleaning out /" GF_REPLICATE_TRASH_DIR); - - nftw (priv->trash_path, - janitor_walker, - 32, - FTW_DEPTH | FTW_PHYS); - - priv->last_landfill_check = now; - } - - pfd = janitor_get_next_fd (this); - if (pfd) { - if (pfd->dir == NULL) { - gf_log (this->name, GF_LOG_TRACE, - "janitor: closing file fd=%d", pfd->fd); - close (pfd->fd); - } else { - gf_log (this->name, GF_LOG_TRACE, - "janitor: closing dir fd=%p", pfd->dir); - closedir (pfd->dir); - } - - if (pfd->path) - GF_FREE (pfd->path); - - GF_FREE (pfd); - } - } - - return NULL; -} - - -static void -posix_spawn_janitor_thread (xlator_t *this) -{ - struct posix_private *priv = NULL; - int ret = 0; - - priv = this->private; - - LOCK (&priv->lock); - { - if (!priv->janitor_present) { - ret = pthread_create (&priv->janitor, NULL, - posix_janitor_thread_proc, this); - - if (ret < 0) { - gf_log (this->name, GF_LOG_ERROR, - "spawning janitor thread failed: %s", - strerror (errno)); - goto unlock; - } - - priv->janitor_present = _gf_true; - } - } -unlock: - UNLOCK (&priv->lock); -} - - int posix_mkdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, dict_t *params) + loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char *real_path = NULL; + char *par_path = NULL; struct iatt stbuf = {0, }; char was_present = 1; struct posix_private *priv = NULL; gid_t gid = 0; - char *pathdup = NULL; - char *parentpath = NULL; struct iatt preparent = {0,}; struct iatt postparent = {0,}; @@ -1336,54 +1191,59 @@ posix_mkdir (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); + /* The Hidden directory should be for housekeeping purpose and it + should not get created from a user request */ + if (__is_root_gfid (loc->pargfid) && + (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { + gf_log (this->name, GF_LOG_WARNING, + "mkdir issued on %s, which is not permitted", + GF_HIDDEN_PATH); + op_errno = EPERM; + op_ret = -1; + goto out; + } + priv = this->private; VALIDATE_OR_GOTO (priv, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL); gid = frame->root->gid; - op_ret = posix_lstat_with_gfid (this, real_path, &stbuf); + op_ret = posix_pstat (this, NULL, real_path, &stbuf); if ((op_ret == -1) && (errno == ENOENT)) { was_present = 0; } - op_ret = setgid_override (this, real_path, &gid); - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; - goto out; - } - SET_FS_ID (frame->root->uid, gid); - pathdup = gf_strdup (real_path); - if (!pathdup) - goto out; - - parentpath = dirname (pathdup); - op_ret = posix_lstat_with_gfid (this, parentpath, &preparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "pre-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + mode |= S_ISGID; + } + op_ret = mkdir (real_path, mode); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "mkdir of %s failed: %s", loc->path, + "mkdir of %s failed: %s", real_path, strerror (op_errno)); goto out; } - op_ret = posix_gfid_set (this, real_path, params); + op_ret = posix_gfid_set (this, real_path, loc, xdata); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, - "setting gfid on %s failed", loc->path); + "setting gfid on %s failed", real_path); } #ifndef HAVE_SET_FSID @@ -1391,40 +1251,52 @@ posix_mkdir (call_frame_t *frame, xlator_t *this, if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "chown on %s failed: %s", loc->path, + "chown on %s failed: %s", real_path, strerror (op_errno)); goto out; } #endif - op_ret = posix_lstat_with_gfid (this, real_path, &stbuf); + op_ret = posix_acl_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting ACLs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting xattrs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_pstat (this, NULL, real_path, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "lstat on %s failed: %s", loc->path, + "lstat on %s failed: %s", real_path, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gfid (this, parentpath, &postparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "post-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); goto out; } op_ret = 0; out: - if (pathdup) - GF_FREE (pathdup); - SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, - (loc)?loc->inode:NULL, &stbuf, &preparent, &postparent); + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); if ((op_ret == -1) && (!was_present)) { unlink (real_path); @@ -1436,17 +1308,17 @@ out: int32_t posix_unlink (call_frame_t *frame, xlator_t *this, - loc_t *loc) + loc_t *loc, int xflag, dict_t *xdata) { - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = NULL; - char *pathdup = NULL; - char *parentpath = NULL; - int32_t fd = -1; - struct posix_private *priv = NULL; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + char *par_path = NULL; + int32_t fd = -1; + struct iatt stbuf = {0,}; + struct posix_private *priv = NULL; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -1455,23 +1327,20 @@ posix_unlink (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); - - pathdup = gf_strdup (real_path); - if (!pathdup) - goto out; - - parentpath = dirname (pathdup); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); - op_ret = posix_lstat_with_gfid (this, parentpath, &preparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "pre-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } + if (stbuf.ia_nlink == 1) + posix_handle_unset (this, stbuf.ia_gfid, NULL); + priv = this->private; if (priv->background_unlink) { if (IA_ISREG (loc->inode->ia_type)) { @@ -1480,7 +1349,7 @@ posix_unlink (call_frame_t *frame, xlator_t *this, op_ret = -1; op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "open of %s failed: %s", loc->path, + "open of %s failed: %s", real_path, strerror (op_errno)); goto out; } @@ -1491,30 +1360,27 @@ posix_unlink (call_frame_t *frame, xlator_t *this, if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "unlink of %s failed: %s", loc->path, + "unlink of %s failed: %s", real_path, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gfid (this, parentpath, &postparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "post-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } op_ret = 0; out: - if (pathdup) - GF_FREE (pathdup); - SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, - &preparent, &postparent); + &preparent, &postparent, NULL); if (fd != -1) { close (fd); @@ -1526,15 +1392,16 @@ out: int posix_rmdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, int flags) + loc_t *loc, int flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = NULL; - char * pathdup = NULL; - char * parentpath = NULL; + char * par_path = NULL; + char * gfid_str = NULL; struct iatt preparent = {0,}; struct iatt postparent = {0,}; + struct iatt stbuf; struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; @@ -1543,39 +1410,51 @@ posix_rmdir (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); - priv = this->private; - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); - pathdup = gf_strdup (real_path); - if (!pathdup) + /* The Hidden directory should be for housekeeping purpose and it + should not get deleted from inside process */ + if (__is_root_gfid (loc->pargfid) && + (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { + gf_log (this->name, GF_LOG_WARNING, + "rmdir issued on %s, which is not permitted", + GF_HIDDEN_PATH); + op_errno = EPERM; + op_ret = -1; goto out; + } - parentpath = dirname (pathdup); + priv = this->private; - op_ret = posix_lstat_with_gfid (this, parentpath, &preparent); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); + + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "pre-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } if (flags) { - uint32_t hashval = 0; - char *tmp_path = alloca (strlen (priv->trash_path) + 16); + gfid_str = uuid_utoa (stbuf.ia_gfid); + char *tmp_path = alloca (strlen (priv->trash_path) + + strlen ("/") + + strlen (gfid_str) + 1); mkdir (priv->trash_path, 0755); - hashval = gf_dm_hashfn (real_path, strlen (real_path)); - sprintf (tmp_path, "%s/%u", priv->trash_path, hashval); + sprintf (tmp_path, "%s/%s", priv->trash_path, gfid_str); op_ret = rename (real_path, tmp_path); } else { op_ret = rmdir (real_path); } op_errno = errno; + if (op_ret == 0) { + posix_handle_unset (this, stbuf.ia_gfid, NULL); + } + if (op_errno == EEXIST) /* Solaris sets errno = EEXIST instead of ENOTEMPTY */ op_errno = ENOTEMPTY; @@ -1583,7 +1462,7 @@ posix_rmdir (call_frame_t *frame, xlator_t *this, /* No need to log a common error as ENOTEMPTY */ if (op_ret == -1 && op_errno != ENOTEMPTY) { gf_log (this->name, GF_LOG_ERROR, - "rmdir of %s failed: %s", loc->path, + "rmdir of %s failed: %s", real_path, strerror (op_errno)); } @@ -1591,27 +1470,24 @@ posix_rmdir (call_frame_t *frame, xlator_t *this, gf_log (this->name, (op_errno == ENOTEMPTY) ? GF_LOG_DEBUG : GF_LOG_ERROR, "%s on %s failed", (flags) ? "rename" : "rmdir", - loc->path); + real_path); goto out; } - op_ret = posix_lstat_with_gfid (this, parentpath, &postparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "post-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + par_path, strerror (op_errno)); goto out; } out: - if (pathdup) - GF_FREE (pathdup); - SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, - &preparent, &postparent); + &preparent, &postparent, NULL); return 0; } @@ -1619,17 +1495,16 @@ out: int posix_symlink (call_frame_t *frame, xlator_t *this, - const char *linkname, loc_t *loc, dict_t *params) + const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = 0; + char * par_path = 0; struct iatt stbuf = { 0, }; struct posix_private *priv = NULL; gid_t gid = 0; char was_present = 1; - char *pathdup = NULL; - char *parentpath = NULL; struct iatt preparent = {0,}; struct iatt postparent = {0,}; @@ -1643,52 +1518,43 @@ posix_symlink (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); - op_ret = posix_lstat_with_gfid (this, real_path, &stbuf); if ((op_ret == -1) && (errno == ENOENT)){ was_present = 0; } - gid = frame->root->gid; - - op_ret = setgid_override (this, real_path, &gid); - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; - goto out; - } - SET_FS_ID (frame->root->uid, gid); - pathdup = gf_strdup (real_path); - if (!pathdup) - goto out; - parentpath = dirname (pathdup); + gid = frame->root->gid; - op_ret = posix_lstat_with_gfid (this, parentpath, &preparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "pre-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + } + op_ret = symlink (linkname, real_path); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "symlink of %s --> %s failed: %s", - loc->path, linkname, strerror (op_errno)); + real_path, linkname, strerror (op_errno)); goto out; } - op_ret = posix_gfid_set (this, real_path, params); + op_ret = posix_gfid_set (this, real_path, loc, xdata); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, - "setting gfid on %s failed", loc->path); + "setting gfid on %s failed", real_path); } #ifndef HAVE_SET_FSID @@ -1697,38 +1563,51 @@ posix_symlink (call_frame_t *frame, xlator_t *this, op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "lchown failed on %s: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); goto out; } #endif - op_ret = posix_lstat_with_gfid (this, real_path, &stbuf); + + op_ret = posix_acl_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting ACLs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting xattrs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_pstat (this, NULL, real_path, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "lstat failed on %s: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gfid (this, parentpath, &postparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "post-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } op_ret = 0; out: - if (pathdup) - GF_FREE (pathdup); - SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, - (loc)?loc->inode:NULL, &stbuf, &preparent, &postparent); + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); if ((op_ret == -1) && (!was_present)) { unlink (real_path); @@ -1740,23 +1619,26 @@ out: int posix_rename (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc) + loc_t *oldloc, loc_t *newloc, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char *real_oldpath = NULL; char *real_newpath = NULL; + char *par_oldpath = NULL; + char *par_newpath = NULL; struct iatt stbuf = {0, }; struct posix_private *priv = NULL; char was_present = 1; - char *oldpathdup = NULL; - char *oldparentpath = NULL; - char *newpathdup = NULL; - char *newparentpath = NULL; struct iatt preoldparent = {0, }; struct iatt postoldparent = {0, }; struct iatt prenewparent = {0, }; struct iatt postnewparent = {0, }; + char olddirid[64]; + char newdirid[64]; + uuid_t victim = {0}; + int was_dir = 0; + int nlink = 0; DECLARE_OLD_FS_ID_VAR; @@ -1769,42 +1651,60 @@ posix_rename (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (priv, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_oldpath, this, oldloc->path); - MAKE_REAL_PATH (real_newpath, this, newloc->path); - - oldpathdup = gf_strdup (real_oldpath); - if (!oldpathdup) - goto out; + MAKE_ENTRY_HANDLE (real_oldpath, par_oldpath, this, oldloc, NULL); + MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf); - oldparentpath = dirname (oldpathdup); - - op_ret = posix_lstat_with_gfid (this, oldparentpath, &preoldparent); + op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &preoldparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent of %s failed: %s", - oldloc->path, strerror (op_errno)); + "pre-operation lstat on parent %s failed: %s", + par_oldpath, strerror (op_errno)); goto out; } - newpathdup = gf_strdup (real_newpath); - if (!newpathdup) - goto out; - - newparentpath = dirname (newpathdup); - - op_ret = posix_lstat_with_gfid (this, newparentpath, &prenewparent); + op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &prenewparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "pre-operation lstat on parent of %s failed: %s", - newloc->path, strerror (op_errno)); + par_newpath, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gfid (this, real_newpath, &stbuf); + op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); if ((op_ret == -1) && (errno == ENOENT)){ was_present = 0; + } else { + uuid_copy (victim, stbuf.ia_gfid); + if (IA_ISDIR (stbuf.ia_type)) + was_dir = 1; + nlink = stbuf.ia_nlink; + } + + if (was_present && IA_ISDIR(stbuf.ia_type) && !newloc->inode) { + gf_log (this->name, GF_LOG_WARNING, + "found directory at %s while expecting ENOENT", + real_newpath); + op_ret = -1; + op_errno = EEXIST; + goto out; + } + + if (was_present && IA_ISDIR(stbuf.ia_type) && + uuid_compare (newloc->inode->gfid, stbuf.ia_gfid)) { + gf_log (this->name, GF_LOG_WARNING, + "found directory %s at %s while renaming %s", + uuid_utoa_r (newloc->inode->gfid, olddirid), + real_newpath, + uuid_utoa_r (stbuf.ia_gfid, newdirid)); + op_ret = -1; + op_errno = EEXIST; + goto out; + } + + if (IA_ISDIR (oldloc->inode->ia_type)) { + posix_handle_unset (this, oldloc->inode->gfid, NULL); } op_ret = sys_rename (real_oldpath, real_newpath); @@ -1813,11 +1713,22 @@ posix_rename (call_frame_t *frame, xlator_t *this, gf_log (this->name, (op_errno == ENOTEMPTY ? GF_LOG_DEBUG : GF_LOG_ERROR), "rename of %s to %s failed: %s", - oldloc->path, newloc->path, strerror (op_errno)); + real_oldpath, real_newpath, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gfid (this, real_newpath, &stbuf); + if (was_dir) + posix_handle_unset (this, victim, NULL); + + if (was_present && !was_dir && nlink == 1) + posix_handle_unset (this, victim, NULL); + + if (IA_ISDIR (oldloc->inode->ia_type)) { + posix_handle_soft (this, real_newpath, newloc, + oldloc->inode->gfid, NULL); + } + + op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -1826,39 +1737,33 @@ posix_rename (call_frame_t *frame, xlator_t *this, goto out; } - op_ret = posix_lstat_with_gfid (this, oldparentpath, &postoldparent); + op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &postoldparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent of %s failed: %s", - oldloc->path, strerror (op_errno)); + "post-operation lstat on parent %s failed: %s", + par_oldpath, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gfid (this, newparentpath, &postnewparent); + op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postnewparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent of %s failed: %s", - newloc->path, strerror (op_errno)); + "post-operation lstat on parent %s failed: %s", + par_newpath, strerror (op_errno)); goto out; } op_ret = 0; out: - if (oldpathdup) - GF_FREE (oldpathdup); - - if (newpathdup) - GF_FREE (newpathdup); - SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, &stbuf, &preoldparent, &postoldparent, - &prenewparent, &postnewparent); + &prenewparent, &postnewparent, NULL); if ((op_ret == -1) && !was_present) { unlink (real_newpath); @@ -1870,17 +1775,16 @@ out: int posix_link (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc) + loc_t *oldloc, loc_t *newloc, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char *real_oldpath = 0; char *real_newpath = 0; + char *par_newpath = 0; struct iatt stbuf = {0, }; struct posix_private *priv = NULL; char was_present = 1; - char *newpathdup = NULL; - char *newparentpath = NULL; struct iatt preparent = {0,}; struct iatt postparent = {0,}; @@ -1895,39 +1799,42 @@ posix_link (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (priv, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_oldpath, this, oldloc->path); - MAKE_REAL_PATH (real_newpath, this, newloc->path); + MAKE_INODE_HANDLE (real_oldpath, this, oldloc, &stbuf); - op_ret = posix_lstat_with_gfid (this, real_newpath, &stbuf); + MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf); if ((op_ret == -1) && (errno == ENOENT)) { was_present = 0; } - newpathdup = gf_strdup (real_newpath); - if (!newpathdup) { - op_errno = ENOMEM; - goto out; - } - - newparentpath = dirname (newpathdup); - op_ret = posix_lstat_with_gfid (this, newparentpath, &preparent); + op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &preparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "lstat failed: %s: %s", - newparentpath, strerror (op_errno)); + par_newpath, strerror (op_errno)); goto out; } +#ifdef HAVE_LINKAT + /* + * On most systems (Linux being the notable exception), link(2) + * first resolves symlinks. If the target is a directory or + * is nonexistent, it will fail. linkat(2) operates on the + * symlink instead of its target when the AT_SYMLINK_FOLLOW + * flag is not supplied. + */ + op_ret = linkat (AT_FDCWD, real_oldpath, AT_FDCWD, real_newpath, 0); +#else op_ret = link (real_oldpath, real_newpath); +#endif if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "link %s to %s failed: %s", - oldloc->path, newloc->path, strerror (op_errno)); + real_oldpath, real_newpath, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gfid (this, real_newpath, &stbuf); + op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -1936,24 +1843,22 @@ posix_link (call_frame_t *frame, xlator_t *this, goto out; } - op_ret = posix_lstat_with_gfid (this, newparentpath, &postparent); + op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "lstat failed: %s: %s", - newparentpath, strerror (op_errno)); + par_newpath, strerror (op_errno)); goto out; } op_ret = 0; out: - if (newpathdup) - GF_FREE (newpathdup); SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, (oldloc)?oldloc->inode:NULL, &stbuf, &preparent, - &postparent); + &postparent, NULL); if ((op_ret == -1) && (!was_present)) { unlink (real_newpath); @@ -1964,7 +1869,8 @@ out: int32_t -posix_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) +posix_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -1983,14 +1889,13 @@ posix_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) VALIDATE_OR_GOTO (priv, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); - op_ret = posix_lstat_with_gfid (this, real_path, &prebuf); + MAKE_INODE_HANDLE (real_path, this, loc, &prebuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "pre-operation lstat on %s failed: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); goto out; } @@ -1999,11 +1904,11 @@ posix_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "truncate on %s failed: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); goto out; } - op_ret = posix_lstat_with_gfid (this, real_path, &postbuf); + op_ret = posix_pstat (this, loc->gfid, real_path, &postbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "lstat on %s failed: %s", @@ -2017,30 +1922,29 @@ out: SET_TO_OLD_FS_ID (); STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, - &prebuf, &postbuf); + &prebuf, &postbuf, NULL); return 0; } -int32_t +int posix_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, mode_t mode, - fd_t *fd, dict_t *params) + mode_t umask, fd_t *fd, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; int32_t _fd = -1; int _flags = 0; char * real_path = NULL; + char * par_path = NULL; struct iatt stbuf = {0, }; struct posix_fd * pfd = NULL; struct posix_private * priv = NULL; char was_present = 1; gid_t gid = 0; - char *pathdup = NULL; - char *parentpath = NULL; struct iatt preparent = {0,}; struct iatt postparent = {0,}; @@ -2055,33 +1959,25 @@ posix_create (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); gid = frame->root->gid; - op_ret = setgid_override (this, real_path, &gid); - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; - goto out; - } - SET_FS_ID (frame->root->uid, gid); - pathdup = gf_strdup (real_path); - if (!pathdup) - goto out; - parentpath = dirname (pathdup); - - op_ret = posix_lstat_with_gfid (this, parentpath, &preparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "pre-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "pre-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + } + if (!flags) { _flags = O_CREAT | O_RDWR | O_EXCL; } @@ -2089,7 +1985,7 @@ posix_create (call_frame_t *frame, xlator_t *this, _flags = flags | O_CREAT; } - op_ret = posix_lstat_with_gfid (this, real_path, &stbuf); + op_ret = posix_pstat (this, NULL, real_path, &stbuf); if ((op_ret == -1) && (errno == ENOENT)) { was_present = 0; } @@ -2103,15 +1999,18 @@ posix_create (call_frame_t *frame, xlator_t *this, op_errno = errno; op_ret = -1; gf_log (this->name, GF_LOG_ERROR, - "open on %s failed: %s", loc->path, + "open on %s failed: %s", real_path, strerror (op_errno)); goto out; } - op_ret = posix_gfid_set (this, real_path, params); + if (was_present) + goto fill_stat; + + op_ret = posix_gfid_set (this, real_path, loc, xdata); if (op_ret) { gf_log (this->name, GF_LOG_ERROR, - "setting gfid on %s failed", loc->path); + "setting gfid on %s failed", real_path); } #ifndef HAVE_SET_FSID @@ -2124,7 +2023,22 @@ posix_create (call_frame_t *frame, xlator_t *this, } #endif - op_ret = posix_fstat_with_gfid (this, _fd, &stbuf); + op_ret = posix_acl_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting ACLs on %s failed (%s)", real_path, + strerror (errno)); + } + + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, + "setting xattrs on %s failed (%s)", real_path, + strerror (errno)); + } + +fill_stat: + op_ret = posix_fdstat (this, _fd, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -2132,12 +2046,12 @@ posix_create (call_frame_t *frame, xlator_t *this, goto out; } - op_ret = posix_lstat_with_gfid (this, parentpath, &postparent); + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "post-operation lstat on parent of %s failed: %s", - loc->path, strerror (op_errno)); + "post-operation lstat on parent %s failed: %s", + par_path, strerror (op_errno)); goto out; } @@ -2155,7 +2069,7 @@ posix_create (call_frame_t *frame, xlator_t *this, if (op_ret) gf_log (this->name, GF_LOG_WARNING, "failed to set the fd context path=%s fd=%p", - loc->path, fd); + real_path, fd); LOCK (&priv->lock); { @@ -2166,8 +2080,6 @@ posix_create (call_frame_t *frame, xlator_t *this, op_ret = 0; out: - if (pathdup) - GF_FREE (pathdup); SET_TO_OLD_FS_ID (); if ((-1 == op_ret) && (_fd != -1)) { @@ -2180,14 +2092,14 @@ out: STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, (loc)?loc->inode:NULL, &stbuf, &preparent, - &postparent); + &postparent, xdata); return 0; } int32_t posix_open (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, fd_t *fd, int wbflags) + loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -2195,8 +2107,6 @@ posix_open (call_frame_t *frame, xlator_t *this, int32_t _fd = -1; struct posix_fd *pfd = NULL; struct posix_private *priv = NULL; - char was_present = 1; - gid_t gid = 0; struct iatt stbuf = {0, }; DECLARE_OLD_FS_ID_VAR; @@ -2210,25 +2120,14 @@ posix_open (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, &stbuf); - op_ret = setgid_override (this, real_path, &gid); - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; - goto out; - } - - SET_FS_ID (frame->root->uid, gid); + op_ret = -1; + SET_FS_ID (frame->root->uid, frame->root->gid); if (priv->o_direct) flags |= O_DIRECT; - op_ret = posix_lstat_with_gfid (this, real_path, &stbuf); - if ((op_ret == -1) && (errno == ENOENT)) { - was_present = 0; - } - _fd = open (real_path, flags, 0); if (_fd == -1) { op_ret = -1; @@ -2246,37 +2145,12 @@ posix_open (call_frame_t *frame, xlator_t *this, pfd->flags = flags; pfd->fd = _fd; - if (wbflags == GF_OPEN_FSYNC) - pfd->flushwrites = 1; op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); if (op_ret) gf_log (this->name, GF_LOG_WARNING, "failed to set the fd context path=%s fd=%p", - loc->path, fd); - -#ifndef HAVE_SET_FSID - if (flags & O_CREAT) { - op_ret = chown (real_path, frame->root->uid, gid); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, - "chown on %s failed: %s", - real_path, strerror (op_errno)); - goto out; - } - } -#endif - - if (flags & O_CREAT) { - op_ret = posix_lstat_with_gfid (this, real_path, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, "lstat on (%s) " - "failed: %s", real_path, strerror (op_errno)); - goto out; - } - } + real_path, fd); LOCK (&priv->lock); { @@ -2295,19 +2169,15 @@ out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd); + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, NULL); return 0; } -#define ALIGN_BUF(ptr,bound) ((void *)((unsigned long)(ptr + bound - 1) & \ - (unsigned long)(~(bound - 1)))) - int posix_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset) + fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) { - uint64_t tmp_pfd = 0; int32_t op_ret = -1; int32_t op_errno = 0; int _fd = -1; @@ -2317,7 +2187,6 @@ posix_readv (call_frame_t *frame, xlator_t *this, struct iovec vec = {0,}; struct posix_fd * pfd = NULL; struct iatt stbuf = {0,}; - int align = 1; int ret = -1; VALIDATE_OR_GOTO (frame, out); @@ -2328,14 +2197,13 @@ posix_readv (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; gf_log (this->name, GF_LOG_WARNING, "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; if (!size) { op_errno = EINVAL; @@ -2343,11 +2211,7 @@ posix_readv (call_frame_t *frame, xlator_t *this, goto out; } - if (pfd->flags & O_DIRECT) { - align = 4096; /* align to page boundary */ - } - - iobuf = iobuf_get (this->ctx->iobuf_pool); + iobuf = iobuf_get2 (this->ctx->iobuf_pool, size); if (!iobuf) { op_errno = ENOMEM; goto out; @@ -2381,7 +2245,7 @@ posix_readv (call_frame_t *frame, xlator_t *this, * we read from */ - op_ret = posix_fstat_with_gfid (this, _fd, &stbuf); + op_ret = posix_fdstat (this, _fd, &stbuf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -2391,18 +2255,14 @@ posix_readv (call_frame_t *frame, xlator_t *this, } /* Hack to notify higher layers of EOF. */ - if (stbuf.ia_size == 0) - op_errno = ENOENT; - else if ((offset + vec.iov_len) == stbuf.ia_size) - op_errno = ENOENT; - else if (offset > stbuf.ia_size) + if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size) op_errno = ENOENT; op_ret = vec.iov_len; out: STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, - &vec, 1, &stbuf, iobref); + &vec, 1, &stbuf, iobref, NULL); if (iobref) iobref_unref (iobref); @@ -2440,14 +2300,12 @@ err: return op_ret; } - int32_t __posix_writev (int fd, struct iovec *vector, int count, off_t startoff, int odirect) { int32_t op_ret = 0; int idx = 0; - int align = 4096; int max_buf_size = 0; int retval = 0; char *buf = NULL; @@ -2463,7 +2321,7 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff, max_buf_size = vector[idx].iov_len; } - alloc_buf = GF_MALLOC (1 * (max_buf_size + align), gf_posix_mt_char); + alloc_buf = _page_aligned_alloc (max_buf_size, &buf); if (!alloc_buf) { op_ret = -errno; goto err; @@ -2471,9 +2329,6 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff, internal_off = startoff; for (idx = 0; idx < count; idx++) { - /* page aligned buffer */ - buf = ALIGN_BUF (alloc_buf, align); - memcpy (buf, vector[idx].iov_base, vector[idx].iov_len); /* not sure whether writev works on O_DIRECT'd fd */ @@ -2488,17 +2343,58 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff, } err: - if (alloc_buf) - GF_FREE (alloc_buf); + GF_FREE (alloc_buf); return op_ret; } +dict_t* +_fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append) +{ + dict_t *rsp_xdata = NULL; + int32_t ret = 0; + inode_t *inode = NULL; + + if (fd) + inode = fd->inode; + + if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) { + gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid Args: " + "fd: %p inode: %p gfid:%s", fd, inode?inode:0, + inode?uuid_utoa(inode->gfid):"N/A"); + goto out; + } + + if (!xdata || !dict_get (xdata, GLUSTERFS_OPEN_FD_COUNT)) + goto out; + + rsp_xdata = dict_new(); + if (!rsp_xdata) + goto out; + + ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_OPEN_FD_COUNT, + fd->inode->fd_count); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set " + "dictionary value for %s", uuid_utoa (fd->inode->gfid), + GLUSTERFS_OPEN_FD_COUNT); + } + + ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, + is_append); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set " + "dictionary value for %s", uuid_utoa (fd->inode->gfid), + GLUSTERFS_WRITE_IS_APPEND); + } +out: + return rsp_xdata; +} int32_t -posix_writev (call_frame_t *frame, xlator_t *this, - fd_t *fd, struct iovec *vector, int32_t count, off_t offset, - struct iobref *iobref) +posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, + uint32_t flags, struct iobref *iobref, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -2508,8 +2404,9 @@ posix_writev (call_frame_t *frame, xlator_t *this, struct iatt preop = {0,}; struct iatt postop = {0,}; int ret = -1; - - uint64_t tmp_pfd = 0; + dict_t *rsp_xdata = NULL; + int is_append = 0; + gf_boolean_t locked = _gf_false; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -2521,18 +2418,28 @@ posix_writev (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (priv, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { gf_log (this->name, GF_LOG_WARNING, "pfd is NULL from fd=%p", fd); op_errno = -ret; goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; - op_ret = posix_fstat_with_gfid (this, _fd, &preop); + if (xdata && dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) { + /* The write_is_append check and write must happen + atomically. Else another write can overtake this + write after the check and get written earlier. + + So lock before preop-stat and unlock after write. + */ + locked = _gf_true; + LOCK(&fd->inode->lock); + } + + op_ret = posix_fdstat (this, _fd, &preop); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -2541,8 +2448,19 @@ posix_writev (call_frame_t *frame, xlator_t *this, goto out; } + if (locked) { + if (preop.ia_size == offset || (fd->flags & O_APPEND)) + is_append = 1; + } + op_ret = __posix_writev (_fd, vector, count, offset, (pfd->flags & O_DIRECT)); + + if (locked) { + UNLOCK (&fd->inode->lock); + locked = _gf_false; + } + if (op_ret < 0) { op_errno = -op_ret; op_ret = -1; @@ -2558,17 +2476,24 @@ posix_writev (call_frame_t *frame, xlator_t *this, UNLOCK (&priv->lock); if (op_ret >= 0) { + rsp_xdata = _fill_writev_xdata (fd, xdata, this, is_append); /* wiretv successful, we also need to get the stat of * the file we wrote to */ - if (pfd->flushwrites) { - /* NOTE: ignore the error, if one occurs at this - * point */ - fsync (_fd); + if (flags & (O_SYNC|O_DSYNC)) { + ret = fsync (_fd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "fsync() in writev on fd %d failed: %s", + _fd, strerror (errno)); + op_ret = -1; + op_errno = errno; + goto out; + } } - ret = posix_fstat_with_gfid (this, _fd, &postop); + ret = posix_fdstat (this, _fd, &postop); if (ret == -1) { op_ret = -1; op_errno = errno; @@ -2581,15 +2506,23 @@ posix_writev (call_frame_t *frame, xlator_t *this, out: - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop); + if (locked) { + UNLOCK (&fd->inode->lock); + locked = _gf_false; + } + + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop, + rsp_xdata); + if (rsp_xdata) + dict_unref (rsp_xdata); return 0; } int32_t posix_statfs (call_frame_t *frame, xlator_t *this, - loc_t *loc) + loc_t *loc, dict_t *xdata) { char * real_path = NULL; int32_t op_ret = -1; @@ -2602,7 +2535,7 @@ posix_statfs (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); VALIDATE_OR_GOTO (this->private, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); priv = this->private; @@ -2628,46 +2561,43 @@ posix_statfs (call_frame_t *frame, xlator_t *this, op_ret = 0; out: - STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf); + STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf, NULL); return 0; } int32_t posix_flush (call_frame_t *frame, xlator_t *this, - fd_t *fd) + fd_t *fd, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; - struct posix_fd * pfd = NULL; int ret = -1; - uint64_t tmp_pfd = 0; + struct posix_fd *pfd = NULL; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; gf_log (this->name, GF_LOG_WARNING, "pfd is NULL on fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; op_ret = 0; out: - STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL); return 0; } int32_t -posix_release (xlator_t *this, - fd_t *fd) +posix_release (xlator_t *this, fd_t *fd) { struct posix_private * priv = NULL; struct posix_fd * pfd = NULL; @@ -2679,7 +2609,7 @@ posix_release (xlator_t *this, priv = this->private; - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = fd_ctx_del (fd, this, &tmp_pfd); if (ret < 0) { gf_log (this->name, GF_LOG_WARNING, "pfd is NULL from fd=%p", fd); @@ -2712,18 +2642,45 @@ out: } +int +posix_batch_fsync (call_frame_t *frame, xlator_t *this, + fd_t *fd, int datasync, dict_t *xdata) +{ + call_stub_t *stub = NULL; + struct posix_private *priv = NULL; + + priv = this->private; + + stub = fop_fsync_stub (frame, default_fsync, fd, datasync, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0); + return 0; + } + + pthread_mutex_lock (&priv->fsync_mutex); + { + list_add_tail (&stub->list, &priv->fsyncs); + priv->fsync_queue_count++; + pthread_cond_signal (&priv->fsync_cond); + } + pthread_mutex_unlock (&priv->fsync_mutex); + + return 0; +} + + int32_t posix_fsync (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t datasync) + fd_t *fd, int32_t datasync, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; int _fd = -1; struct posix_fd * pfd = NULL; int ret = -1; - uint64_t tmp_pfd = 0; struct iatt preop = {0,}; struct iatt postop = {0,}; + struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; @@ -2739,18 +2696,23 @@ posix_fsync (call_frame_t *frame, xlator_t *this, goto out; #endif - ret = fd_ctx_get (fd, this, &tmp_pfd); + priv = this->private; + if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) { + posix_batch_fsync (frame, this, fd, datasync, xdata); + return 0; + } + + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; gf_log (this->name, GF_LOG_WARNING, "pfd not found in fd's ctx"); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; - op_ret = posix_fstat_with_gfid (this, _fd, &preop); + op_ret = posix_fdstat (this, _fd, &preop); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_WARNING, @@ -2780,7 +2742,7 @@ posix_fsync (call_frame_t *frame, xlator_t *this, } } - op_ret = posix_fstat_with_gfid (this, _fd, &postop); + op_ret = posix_fdstat (this, _fd, &postop); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_WARNING, @@ -2794,152 +2756,34 @@ posix_fsync (call_frame_t *frame, xlator_t *this, out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, &preop, &postop); + STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, &preop, &postop, + NULL); return 0; } static int gf_posix_xattr_enotsup_log; - -int -set_file_contents (xlator_t *this, char *real_path, - data_pair_t *trav, int flags) -{ - char * key = NULL; - char real_filepath[ZR_PATH_MAX] = {0,}; - int32_t file_fd = -1; - int op_ret = 0; - int ret = -1; - - key = &(trav->key[15]); - sprintf (real_filepath, "%s/%s", real_path, key); - - if (flags & XATTR_REPLACE) { - /* if file exists, replace it - * else, error out */ - file_fd = open (real_filepath, O_TRUNC|O_WRONLY); - - if (file_fd == -1) { - goto create; - } - - if (trav->value->len) { - ret = write (file_fd, trav->value->data, - trav->value->len); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "write failed while doing setxattr " - "for key %s on path %s: %s", - key, real_filepath, strerror (errno)); - goto out; - } - - ret = close (file_fd); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "close failed on %s: %s", - real_filepath, strerror (errno)); - goto out; - } - } - - create: /* we know file doesn't exist, create it */ - - file_fd = open (real_filepath, O_CREAT|O_WRONLY, 0644); - - if (file_fd == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "failed to open file %s with O_CREAT: %s", - key, strerror (errno)); - goto out; - } - - ret = write (file_fd, trav->value->data, trav->value->len); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "write failed on %s while setxattr with " - "key %s: %s", - real_filepath, key, strerror (errno)); - goto out; - } - - ret = close (file_fd); - if (ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, - "close failed on %s while setxattr with " - "key %s: %s", - real_filepath, key, strerror (errno)); - goto out; - } - } - -out: - return op_ret; -} - -int -handle_pair (xlator_t *this, char *real_path, - data_pair_t *trav, int flags) +static int +_handle_setxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) { - int sys_ret = -1; - int ret = 0; + posix_xattr_filler_t *filler = NULL; - if (ZR_FILE_CONTENT_REQUEST(trav->key)) { - ret = set_file_contents (this, real_path, trav, flags); - } else { - sys_ret = sys_lsetxattr (real_path, trav->key, - trav->value->data, - trav->value->len, flags); - - if (sys_ret < 0) { - if (errno == ENOTSUP) { - GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, - this->name,GF_LOG_WARNING, - "Extended attributes not " - "supported"); - } else if (errno == ENOENT) { - gf_log (this->name, GF_LOG_ERROR, - "setxattr on %s failed: %s", real_path, - strerror (errno)); - } else { + filler = tmp; -#ifdef GF_DARWIN_HOST_OS - gf_log (this->name, - ((errno == EINVAL) ? - GF_LOG_DEBUG : GF_LOG_ERROR), - "%s: key:%s error:%s", - real_path, trav->key, - strerror (errno)); -#else /* ! DARWIN */ - gf_log (this->name, GF_LOG_ERROR, - "%s: key:%s error:%s", - real_path, trav->key, - strerror (errno)); -#endif /* DARWIN */ - } - - ret = -errno; - goto out; - } - } -out: - return ret; + return posix_handle_pair (filler->this, filler->real_path, k, v, + filler->flags); } int32_t posix_setxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *dict, int flags) + loc_t *loc, dict_t *dict, int flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = NULL; - data_pair_t * trav = NULL; - int ret = -1; + + posix_xattr_filler_t filler = {0,}; DECLARE_OLD_FS_ID_VAR; SET_FS_ID (frame->root->uid, frame->root->gid); @@ -2949,97 +2793,72 @@ posix_setxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); VALIDATE_OR_GOTO (dict, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + op_ret = -1; dict_del (dict, GFID_XATTR_KEY); - trav = dict->members_list; - - while (trav) { - ret = handle_pair (this, real_path, trav, flags); - if (ret < 0) { - op_errno = -ret; - goto out; - } - trav = trav->next; - } - - op_ret = 0; + filler.real_path = real_path; + filler.this = this; + filler.flags = flags; + op_ret = dict_foreach (dict, _handle_setxattr_keyvalue_pair, + &filler); + if (op_ret < 0) + op_errno = -op_ret; out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL); return 0; } + int -get_file_contents (xlator_t *this, char *real_path, - const char *name, char **contents) +posix_xattr_get_real_filename (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *key, dict_t *dict, dict_t *xdata) { - char real_filepath[ZR_PATH_MAX] = {0,}; - char * key = NULL; - int32_t file_fd = -1; - struct iatt stbuf = {0,}; - int op_ret = 0; - int ret = -1; - - key = (char *) &(name[15]); - sprintf (real_filepath, "%s/%s", real_path, key); - - op_ret = posix_lstat_with_gfid (this, real_filepath, &stbuf); - if (op_ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, "lstat failed on %s: %s", - real_filepath, strerror (errno)); - goto out; - } - - file_fd = open (real_filepath, O_RDONLY); - - if (file_fd == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, "open failed on %s: %s", - real_filepath, strerror (errno)); - goto out; - } - - *contents = GF_CALLOC (stbuf.ia_size + 1, sizeof(char), - gf_posix_mt_char); - if (! *contents) { - op_ret = -errno; - goto out; - } - - ret = read (file_fd, *contents, stbuf.ia_size); - if (ret <= 0) { - op_ret = -1; - gf_log (this->name, GF_LOG_ERROR, "read on %s failed: %s", - real_filepath, strerror (errno)); - goto out; - } - - *contents[stbuf.ia_size] = '\0'; - - op_ret = close (file_fd); - file_fd = -1; - if (op_ret == -1) { - op_ret = -errno; - gf_log (this->name, GF_LOG_ERROR, "close on %s failed: %s", - real_filepath, strerror (errno)); - goto out; - } - -out: - if (op_ret < 0) { - if (*contents) - GF_FREE (*contents); - if (file_fd != -1) - close (file_fd); - } - - return op_ret; + char *real_path = NULL; + struct dirent *dirent = NULL; + DIR *fd = NULL; + const char *fname = NULL; + char *found = NULL; + int ret = -1; + int op_ret = -1; + + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + + fd = opendir (real_path); + if (!fd) + return -errno; + + fname = key + strlen (GF_XATTR_GET_REAL_FILENAME_KEY); + + while ((dirent = readdir (fd))) { + if (strcasecmp (dirent->d_name, fname) == 0) { + found = gf_strdup (dirent->d_name); + if (!found) { + closedir (fd); + return -ENOMEM; + } + break; + } + } + + closedir (fd); + + if (!found) + return -ENOENT; + + ret = dict_set_dynstr (dict, (char *)key, found); + if (ret) { + GF_FREE (found); + return -ENOMEM; + } + ret = strlen (found) + 1; + + return ret; } /** @@ -3049,22 +2868,25 @@ out: */ int32_t posix_getxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name) + loc_t *loc, const char *name, dict_t *xdata) { - struct posix_private *priv = NULL; - int32_t op_ret = -1; - int32_t op_errno = 0; - int32_t list_offset = 0; - size_t size = 0; - size_t remaining_size = 0; - char key[1024] = {0,}; - char host_buf[1024] = {0,}; - char * value = NULL; - char * list = NULL; - char * real_path = NULL; - dict_t * dict = NULL; - char * file_contents = NULL; - int ret = -1; + struct posix_private *priv = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t list_offset = 0; + ssize_t size = 0; + size_t remaining_size = 0; + char key[4096] = {0,}; + char host_buf[1024] = {0,}; + char *value = NULL; + char *list = NULL; + char *real_path = NULL; + dict_t *dict = NULL; + char *file_contents = NULL; + int ret = -1; + char *path = NULL; + char *rpath = NULL; + char *dyn_rpath = NULL; DECLARE_OLD_FS_ID_VAR; @@ -3073,14 +2895,15 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (loc, out); SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + op_ret = -1; priv = this->private; if (loc->inode && IA_ISDIR(loc->inode->ia_type) && name && ZR_FILE_CONTENT_REQUEST(name)) { - ret = get_file_contents (this, real_path, name, - &file_contents); + ret = posix_get_file_contents (this, loc->gfid, &name[15], + &file_contents); if (ret < 0) { op_errno = -ret; gf_log (this->name, GF_LOG_ERROR, @@ -3090,12 +2913,31 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, } } - /* Get the total size */ - dict = get_new_dict (); + dict = dict_new (); if (!dict) { + op_errno = ENOMEM; goto out; } + if (loc->inode && name && + (strncmp (name, GF_XATTR_GET_REAL_FILENAME_KEY, + strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) { + ret = posix_xattr_get_real_filename (frame, this, loc, + name, dict, xdata); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + gf_log (this->name, (op_errno == ENOENT) ? + GF_LOG_DEBUG : GF_LOG_WARNING, + "Failed to get real filename (%s, %s): %s", + loc->path, name, strerror (op_errno)); + goto out; + } + + size = ret; + goto done; + } + if (loc->inode && name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) { if (!list_empty (&loc->inode->fd_list)) { ret = dict_set_uint32 (dict, (char *)name, 1); @@ -3112,20 +2954,135 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, } goto done; } - if (loc->inode && IA_ISREG (loc->inode->ia_type) && name && + if (loc->inode && name && (strcmp (name, GF_XATTR_PATHINFO_KEY) == 0)) { - snprintf (host_buf, 1024, "%s:%s", priv->hostname, - real_path); - ret = dict_set_str (dict, GF_XATTR_PATHINFO_KEY, - host_buf); + if (LOC_HAS_ABSPATH (loc)) + MAKE_REAL_PATH (rpath, this, loc->path); + else + rpath = real_path; + + (void) snprintf (host_buf, 1024, + "<POSIX(%s):%s:%s>", priv->base_path, + ((priv->node_uuid_pathinfo + && !uuid_is_null(priv->glusterd_uuid)) + ? uuid_utoa (priv->glusterd_uuid) + : priv->hostname), + rpath); + + dyn_rpath = gf_strdup (host_buf); + if (!dyn_rpath) { + ret = -1; + goto done; + } + size = strlen (dyn_rpath) + 1; + ret = dict_set_dynstr (dict, GF_XATTR_PATHINFO_KEY, + dyn_rpath); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "could not set value (%s) in dictionary", + dyn_rpath); + GF_FREE (dyn_rpath); + } + + goto done; + } + + if (loc->inode && name && + (strcmp (name, GF_XATTR_NODE_UUID_KEY) == 0) + && !uuid_is_null (priv->glusterd_uuid)) { + (void) snprintf (host_buf, 1024, "%s", + uuid_utoa (priv->glusterd_uuid)); + + dyn_rpath = gf_strdup (host_buf); + if (!dyn_rpath) { + ret = -1; + goto done; + } + + size = strlen (dyn_rpath) + 1; + ret = dict_set_dynstr (dict, GF_XATTR_NODE_UUID_KEY, + dyn_rpath); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "could not set value (%s) in dictionary", + dyn_rpath); + GF_FREE (dyn_rpath); + } + goto done; + } + + if (loc->inode && name && + (strcmp (name, GFID_TO_PATH_KEY) == 0)) { + ret = inode_path (loc->inode, NULL, &path); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "%s: could not get " + "inode path", uuid_utoa (loc->inode->gfid)); + goto done; + } + + ret = dict_set_dynstr (dict, GFID_TO_PATH_KEY, path); if (ret < 0) { gf_log (this->name, GF_LOG_WARNING, "could not set value (%s) in dictionary", host_buf); + GF_FREE (path); } goto done; } + if (name) { + strcpy (key, name); + + size = sys_lgetxattr (real_path, key, NULL, 0); + if (size <= 0) { + op_errno = errno; + if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) { + GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, + this->name, GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting" + " brick with 'user_xattr' " + "flag)"); + } else if (op_errno == ENOATTR || + op_errno == ENODATA) { + gf_log (this->name, GF_LOG_DEBUG, + "No such attribute:%s for file %s", + key, real_path); + } else { + gf_log (this->name, GF_LOG_ERROR, + "getxattr failed on %s: %s (%s)", + real_path, key, strerror (op_errno)); + } + + goto done; + } + value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); + if (!value) { + op_ret = -1; + goto out; + } + size = sys_lgetxattr (real_path, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " + "%s: key = %s (%s)", real_path, key, + strerror (op_errno)); + GF_FREE (value); + goto out; + } + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "dict set operation " + "on %s for the key %s failed.", real_path, key); + GF_FREE (value); + goto out; + } + + goto done; + } + size = sys_llistxattr (real_path, NULL, 0); if (size == -1) { op_errno = errno; @@ -3133,7 +3090,9 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, this->name, GF_LOG_WARNING, "Extended attributes not " - "supported."); + "supported (try remounting" + " brick with 'user_xattr' " + "flag)"); } else { gf_log (this->name, GF_LOG_ERROR, @@ -3161,25 +3120,42 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, break; strcpy (key, list + list_offset); - op_ret = sys_lgetxattr (real_path, key, NULL, 0); - if (op_ret == -1) + size = sys_lgetxattr (real_path, key, NULL, 0); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " + "%s: key = %s (%s)", real_path, key, + strerror (op_errno)); break; + } - value = GF_CALLOC (op_ret + 1, sizeof(char), + value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); if (!value) { op_errno = errno; goto out; } - op_ret = sys_lgetxattr (real_path, key, value, op_ret); - if (op_ret == -1) { + size = sys_lgetxattr (real_path, key, value, size); + if (size == -1) { + op_ret = -1; op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "getxattr failed on " + "%s: key = %s (%s)", real_path, key, + strerror (op_errno)); + GF_FREE (value); break; } - value [op_ret] = '\0'; - dict_set (dict, key, data_from_dynptr (value, op_ret)); + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "dict set operation " + "on %s for the key %s failed.", real_path, key); + GF_FREE (value); + goto out; + } remaining_size -= strlen (key) + 1; list_offset += strlen (key) + 1; @@ -3191,13 +3167,12 @@ done: if (dict) { dict_del (dict, GFID_XATTR_KEY); - dict_ref (dict); } out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict); + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, NULL); if (dict) dict_unref (dict); @@ -3208,17 +3183,16 @@ out: int32_t posix_fgetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name) + fd_t *fd, const char *name, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = ENOENT; - uint64_t tmp_pfd = 0; struct posix_fd * pfd = NULL; int _fd = -1; int32_t list_offset = 0; - size_t size = 0; + ssize_t size = 0; size_t remaining_size = 0; - char key[1024] = {0,}; + char key[4096] = {0,}; char * value = NULL; char * list = NULL; dict_t * dict = NULL; @@ -3232,14 +3206,13 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, SET_FS_ID (frame->root->uid, frame->root->gid); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; gf_log (this->name, GF_LOG_WARNING, "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; @@ -3258,6 +3231,43 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, goto done; } + if (name) { + strcpy (key, name); + + size = sys_fgetxattr (_fd, key, NULL, 0); + if (size <= 0) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " + "key %s (%s)", key, strerror (op_errno)); + goto done; + } + + value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); + if (!value) { + op_ret = -1; + goto out; + } + size = sys_fgetxattr (_fd, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " + "fd %p for the key %s (%s)", fd, key, + strerror (op_errno)); + GF_FREE (value); + goto out; + } + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "dict set operation " + "on key %s failed", key); + GF_FREE (value); + goto out; + } + goto done; + } + size = sys_flistxattr (_fd, NULL, 0); if (size == -1) { op_errno = errno; @@ -3265,7 +3275,8 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, this->name, GF_LOG_WARNING, "Extended attributes not " - "supported."); + "supported (try remounting " + "brick with 'user_xattr' flag)"); } else { gf_log (this->name, GF_LOG_ERROR, @@ -3293,23 +3304,43 @@ posix_fgetxattr (call_frame_t *frame, xlator_t *this, break; strcpy (key, list + list_offset); - op_ret = sys_fgetxattr (_fd, key, NULL, 0); - if (op_ret == -1) + size = sys_fgetxattr (_fd, key, NULL, 0); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " + "fd %p for the key %s (%s)", fd, key, + strerror (op_errno)); break; + } - value = GF_CALLOC (op_ret + 1, sizeof(char), + value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); if (!value) { + op_ret = -1; op_errno = errno; goto out; } - op_ret = sys_fgetxattr (_fd, key, value, op_ret); - if (op_ret == -1) + size = sys_fgetxattr (_fd, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, "fgetxattr failed on " + "the fd %p for the key %s (%s)", fd, key, + strerror (op_errno)); + GF_FREE (value); break; + } - value [op_ret] = '\0'; - dict_set (dict, key, data_from_dynptr (value, op_ret)); + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); + if (op_ret) { + gf_log (this->name, GF_LOG_ERROR, "dict set operation " + "failed on key %s", key); + GF_FREE (value); + goto out; + } remaining_size -= strlen (key) + 1; list_offset += strlen (key) + 1; @@ -3326,7 +3357,7 @@ done: out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict); + STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL); if (dict) dict_unref (dict); @@ -3334,64 +3365,29 @@ out: return 0; } - -int -fhandle_pair (xlator_t *this, int fd, - data_pair_t *trav, int flags) +static int +_handle_fsetxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) { - int sys_ret = -1; - int ret = 0; - - sys_ret = sys_fsetxattr (fd, trav->key, trav->value->data, - trav->value->len, flags); - - if (sys_ret < 0) { - if (errno == ENOTSUP) { - GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, - this->name,GF_LOG_WARNING, - "Extended attributes not " - "supported"); - } else if (errno == ENOENT) { - gf_log (this->name, GF_LOG_ERROR, - "fsetxattr on fd=%d failed: %s", fd, - strerror (errno)); - } else { + posix_xattr_filler_t *filler = NULL; -#ifdef GF_DARWIN_HOST_OS - gf_log (this->name, - ((errno == EINVAL) ? - GF_LOG_DEBUG : GF_LOG_ERROR), - "fd=%d: key:%s error:%s", - fd, trav->key, - strerror (errno)); -#else /* ! DARWIN */ - gf_log (this->name, GF_LOG_ERROR, - "fd=%d: key:%s error:%s", - fd, trav->key, - strerror (errno)); -#endif /* DARWIN */ - } - - ret = -errno; - goto out; - } + filler = tmp; -out: - return ret; + return posix_fhandle_pair (filler->this, filler->fd, k, v, + filler->flags); } - int32_t posix_fsetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *dict, int flags) + fd_t *fd, dict_t *dict, int flags, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; struct posix_fd * pfd = NULL; - uint64_t tmp_pfd = 0; int _fd = -1; - data_pair_t * trav = NULL; - int ret = -1; + int ret = -1; + + posix_xattr_filler_t filler = {0,}; DECLARE_OLD_FS_ID_VAR; SET_FS_ID (frame->root->uid, frame->root->gid); @@ -3401,67 +3397,152 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (fd, out); VALIDATE_OR_GOTO (dict, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; gf_log (this->name, GF_LOG_WARNING, "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; dict_del (dict, GFID_XATTR_KEY); - trav = dict->members_list; - - while (trav) { - ret = fhandle_pair (this, _fd, trav, flags); - if (ret < 0) { - op_errno = -ret; - goto out; - } - trav = trav->next; - } - - op_ret = 0; + filler.fd = _fd; + filler.this = this; + filler.flags = flags; + op_ret = dict_foreach (dict, _handle_fsetxattr_keyvalue_pair, + &filler); + if (op_ret < 0) + op_errno = -op_ret; out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, NULL); return 0; } +int +_posix_remove_xattr (dict_t *dict, char *key, data_t *value, void *data) +{ + int32_t op_ret = 0; + xlator_t *this = NULL; + posix_xattr_filler_t *filler = NULL; + + filler = (posix_xattr_filler_t *) data; + this = filler->this; + + op_ret = sys_lremovexattr (filler->real_path, key); + if (op_ret == -1) { + filler->op_errno = errno; + if (errno != ENOATTR && errno != EPERM) + gf_log (this->name, GF_LOG_ERROR, + "removexattr failed on %s (for %s): %s", + filler->real_path, key, strerror (errno)); + } + + return op_ret; +} + int32_t posix_removexattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name) + loc_t *loc, const char *name, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = NULL; + posix_xattr_filler_t filler = {0,}; DECLARE_OLD_FS_ID_VAR; + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + if (!strcmp (GFID_XATTR_KEY, name)) { gf_log (this->name, GF_LOG_WARNING, "Remove xattr called" - " on gfid for file %s", loc->path); + " on gfid for file %s", real_path); + op_ret = -1; goto out; } - MAKE_REAL_PATH (real_path, this, loc->path); SET_FS_ID (frame->root->uid, frame->root->gid); + /** + * sending an empty key name with xdata containing the + * list of key(s) to be removed implies "bulk remove request" + * for removexattr. + */ + if (name && (strcmp (name, "") == 0) && xdata) { + filler.real_path = real_path; + filler.this = this; + op_ret = dict_foreach (xdata, _posix_remove_xattr, &filler); + if (op_ret) { + op_errno = filler.op_errno; + } + + goto out; + } + op_ret = sys_lremovexattr (real_path, name); if (op_ret == -1) { op_errno = errno; if (op_errno != ENOATTR && op_errno != EPERM) gf_log (this->name, GF_LOG_ERROR, - "removexattr on %s: %s", loc->path, - strerror (op_errno)); + "removexattr on %s (for %s): %s", real_path, + name, strerror (op_errno)); + goto out; + } + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, NULL); + return 0; +} + +int32_t +posix_fremovexattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *name, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + struct posix_fd * pfd = NULL; + int _fd = -1; + int ret = -1; + + DECLARE_OLD_FS_ID_VAR; + + if (!strcmp (GFID_XATTR_KEY, name)) { + gf_log (this->name, GF_LOG_WARNING, "Remove xattr called" + " on gfid for file"); + goto out; + } + + ret = posix_fd_ctx_get (fd, this, &pfd); + if (ret < 0) { + op_errno = -ret; + gf_log (this->name, GF_LOG_WARNING, + "pfd is NULL from fd=%p", fd); + goto out; + } + _fd = pfd->fd; + + + + SET_FS_ID (frame->root->uid, frame->root->gid); + + op_ret = sys_fremovexattr (_fd, name); + if (op_ret == -1) { + op_errno = errno; + if (op_errno != ENOATTR && op_errno != EPERM) + gf_log (this->name, GF_LOG_ERROR, + "fremovexattr (for %s): %s", + name, strerror (op_errno)); goto out; } @@ -3470,38 +3551,36 @@ posix_removexattr (call_frame_t *frame, xlator_t *this, out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, NULL); return 0; } int32_t posix_fsyncdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, int datasync) + fd_t *fd, int datasync, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; - struct posix_fd * pfd = NULL; int ret = -1; - uint64_t tmp_pfd = 0; + struct posix_fd *pfd = NULL; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; gf_log (this->name, GF_LOG_WARNING, "pfd is NULL, fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; op_ret = 0; out: - STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, NULL); return 0; } @@ -3528,9 +3607,31 @@ posix_print_xattr (dict_t *this, static void __add_array (int32_t *dest, int32_t *src, int count) { + int i = 0; + int32_t destval = 0; + for (i = 0; i < count; i++) { + destval = ntoh32 (dest[i]); + if (destval == 0xffffffff) + continue; + dest[i] = hton32 (destval + ntoh32 (src[i])); + } +} + +static void +__or_array (int32_t *dest, int32_t *src, int count) +{ int i = 0; for (i = 0; i < count; i++) { - dest[i] = hton32 (ntoh32 (dest[i]) + ntoh32 (src[i])); + dest[i] = hton32 (ntoh32 (dest[i]) | ntoh32 (src[i])); + } +} + +static void +__and_array (int32_t *dest, int32_t *src, int count) +{ + int i = 0; + for (i = 0; i < count; i++) { + dest[i] = hton32 (ntoh32 (dest[i]) & ntoh32 (src[i])); } } @@ -3543,6 +3644,159 @@ __add_long_array (int64_t *dest, int64_t *src, int count) } } +static int +_posix_handle_xattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) +{ + int size = 0; + int count = 0; + int op_ret = 0; + int op_errno = 0; + gf_xattrop_flags_t optype = 0; + char *array = NULL; + inode_t *inode = NULL; + xlator_t *this = NULL; + posix_xattr_filler_t *filler = NULL; + + filler = tmp; + + optype = (gf_xattrop_flags_t)(filler->flags); + this = filler->this; + inode = filler->inode; + + count = v->len; + array = GF_CALLOC (count, sizeof (char), gf_posix_mt_char); + + LOCK (&inode->lock); + { + if (filler->real_path) { + size = sys_lgetxattr (filler->real_path, k, + (char *)array, v->len); + } else { + size = sys_fgetxattr (filler->fd, k, (char *)array, + v->len); + } + + op_errno = errno; + if ((size == -1) && (op_errno != ENODATA) && + (op_errno != ENOATTR)) { + if (op_errno == ENOTSUP) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, + this->name, GF_LOG_WARNING, + "Extended attributes not " + "supported by filesystem"); + } else if (op_errno != ENOENT || + !posix_special_xattr (marker_xattrs, + k)) { + if (filler->real_path) + gf_log (this->name, GF_LOG_ERROR, + "getxattr failed on %s while doing " + "xattrop: Key:%s (%s)", + filler->real_path, + k, strerror (op_errno)); + else + gf_log (this->name, GF_LOG_ERROR, + "fgetxattr failed on fd=%d while doing " + "xattrop: Key:%s (%s)", + filler->fd, + k, strerror (op_errno)); + } + + op_ret = -1; + goto unlock; + } + + switch (optype) { + + case GF_XATTROP_ADD_ARRAY: + __add_array ((int32_t *) array, (int32_t *) v->data, + v->len / 4); + break; + + case GF_XATTROP_ADD_ARRAY64: + __add_long_array ((int64_t *) array, (int64_t *) v->data, + v->len / 8); + break; + + case GF_XATTROP_OR_ARRAY: + __or_array ((int32_t *) array, + (int32_t *) v->data, + v->len / 4); + break; + + case GF_XATTROP_AND_ARRAY: + __and_array ((int32_t *) array, + (int32_t *) v->data, + v->len / 4); + break; + + default: + gf_log (this->name, GF_LOG_ERROR, + "Unknown xattrop type (%d) on %s. Please send " + "a bug report to gluster-devel@nongnu.org", + optype, filler->real_path); + op_ret = -1; + op_errno = EINVAL; + goto unlock; + } + + if (filler->real_path) { + size = sys_lsetxattr (filler->real_path, k, array, + v->len, 0); + } else { + size = sys_fsetxattr (filler->fd, k, (char *)array, + v->len, 0); + } + } +unlock: + UNLOCK (&inode->lock); + + if (op_ret == -1) + goto out; + + op_errno = errno; + if (size == -1) { + if (filler->real_path) + gf_log (this->name, GF_LOG_ERROR, + "setxattr failed on %s while doing xattrop: " + "key=%s (%s)", filler->real_path, + k, strerror (op_errno)); + else + gf_log (this->name, GF_LOG_ERROR, + "fsetxattr failed on fd=%d while doing xattrop: " + "key=%s (%s)", filler->fd, + k, strerror (op_errno)); + + op_ret = -1; + goto out; + } else { + size = dict_set_bin (d, k, array, v->len); + + if (size != 0) { + if (filler->real_path) + gf_log (this->name, GF_LOG_DEBUG, + "dict_set_bin failed (path=%s): " + "key=%s (%s)", filler->real_path, + k, strerror (-size)); + else + gf_log (this->name, GF_LOG_DEBUG, + "dict_set_bin failed (fd=%d): " + "key=%s (%s)", filler->fd, + k, strerror (-size)); + + op_ret = -1; + op_errno = EINVAL; + goto out; + } + array = NULL; + } + + array = NULL; + +out: + return op_ret; +} + /** * xattrop - xattr operations - for internal use by GlusterFS * @optype: ADD_ARRAY: @@ -3551,190 +3805,61 @@ __add_long_array (int64_t *dest, int64_t *src, int count) */ int -do_xattrop (call_frame_t *frame, xlator_t *this, - loc_t *loc, fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr) +do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr) { - char *real_path = NULL; - char *array = NULL; - int size = 0; - int count = 0; - - int op_ret = 0; - int op_errno = 0; - - int ret = 0; - int _fd = -1; - uint64_t tmp_pfd = 0; - struct posix_fd *pfd = NULL; - - data_pair_t *trav = NULL; - - char * path = NULL; - inode_t * inode = NULL; + int op_ret = 0; + int op_errno = 0; + int _fd = -1; + char *real_path = NULL; + struct posix_fd *pfd = NULL; + inode_t *inode = NULL; + posix_xattr_filler_t filler = {0,}; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (xattr, out); VALIDATE_OR_GOTO (this, out); - trav = xattr->members_list; - if (fd) { - ret = fd_ctx_get (fd, this, &tmp_pfd); - if (ret < 0) { + op_ret = posix_fd_ctx_get (fd, this, &pfd); + if (op_ret < 0) { gf_log (this->name, GF_LOG_WARNING, "failed to get pfd from fd=%p", fd); - op_ret = -1; op_errno = EBADFD; goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; } - if (loc && loc->path) - MAKE_REAL_PATH (real_path, this, loc->path); + if (loc && !uuid_is_null (loc->gfid)) + MAKE_INODE_HANDLE (real_path, this, loc, NULL); - if (loc) { - path = gf_strdup (loc->path); + if (real_path) { inode = loc->inode; } else if (fd) { inode = fd->inode; } - while (trav && inode) { - count = trav->value->len; - array = GF_CALLOC (count, sizeof (char), - gf_posix_mt_char); - - LOCK (&inode->lock); - { - if (loc) { - size = sys_lgetxattr (real_path, trav->key, (char *)array, - trav->value->len); - } else { - size = sys_fgetxattr (_fd, trav->key, (char *)array, - trav->value->len); - } - - op_errno = errno; - if ((size == -1) && (op_errno != ENODATA) && - (op_errno != ENOATTR)) { - if (op_errno == ENOTSUP) { - GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, - this->name,GF_LOG_WARNING, - "Extended attributes not " - "supported by filesystem"); - } else { - if (loc) - gf_log (this->name, GF_LOG_ERROR, - "getxattr failed on %s while doing " - "xattrop: %s", path, - strerror (op_errno)); - else - gf_log (this->name, GF_LOG_ERROR, - "fgetxattr failed on fd=%d while doing " - "xattrop: %s", _fd, - strerror (op_errno)); - } - - op_ret = -1; - goto unlock; - } - - switch (optype) { - - case GF_XATTROP_ADD_ARRAY: - __add_array ((int32_t *) array, (int32_t *) trav->value->data, - trav->value->len / 4); - break; - - case GF_XATTROP_ADD_ARRAY64: - __add_long_array ((int64_t *) array, (int64_t *) trav->value->data, - trav->value->len / 8); - break; - - default: - gf_log (this->name, GF_LOG_ERROR, - "Unknown xattrop type (%d) on %s. Please send " - "a bug report to gluster-devel@nongnu.org", - optype, path); - op_ret = -1; - op_errno = EINVAL; - goto unlock; - } - - if (loc) { - size = sys_lsetxattr (real_path, trav->key, array, - trav->value->len, 0); - } else { - size = sys_fsetxattr (_fd, trav->key, (char *)array, - trav->value->len, 0); - } - } - unlock: - UNLOCK (&inode->lock); - - if (op_ret == -1) - goto out; - - op_errno = errno; - if (size == -1) { - if (loc) - gf_log (this->name, GF_LOG_ERROR, - "setxattr failed on %s while doing xattrop: " - "key=%s (%s)", path, - trav->key, strerror (op_errno)); - else - gf_log (this->name, GF_LOG_ERROR, - "fsetxattr failed on fd=%d while doing xattrop: " - "key=%s (%s)", _fd, - trav->key, strerror (op_errno)); - - op_ret = -1; - goto out; - } else { - size = dict_set_bin (xattr, trav->key, array, - trav->value->len); - - if (size != 0) { - if (loc) - gf_log (this->name, GF_LOG_DEBUG, - "dict_set_bin failed (path=%s): " - "key=%s (%s)", path, - trav->key, strerror (-size)); - else - gf_log (this->name, GF_LOG_DEBUG, - "dict_set_bin failed (fd=%d): " - "key=%s (%s)", _fd, - trav->key, strerror (-size)); - - op_ret = -1; - op_errno = EINVAL; - goto out; - } - array = NULL; - } + filler.this = this; + filler.fd = _fd; + filler.real_path = real_path; + filler.flags = (int)optype; + filler.inode = inode; - array = NULL; - trav = trav->next; - } + op_ret = dict_foreach (xattr, _posix_handle_xattr_keyvalue_pair, + &filler); out: - if (array) - GF_FREE (array); - if (path) - GF_FREE (path); - - STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr); + STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr, NULL); return 0; } int posix_xattrop (call_frame_t *frame, xlator_t *this, - loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr) + loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { do_xattrop (frame, this, loc, NULL, optype, xattr); return 0; @@ -3743,7 +3868,7 @@ posix_xattrop (call_frame_t *frame, xlator_t *this, int posix_fxattrop (call_frame_t *frame, xlator_t *this, - fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr) + fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { do_xattrop (frame, this, NULL, fd, optype, xattr); return 0; @@ -3752,7 +3877,7 @@ posix_fxattrop (call_frame_t *frame, xlator_t *this, int posix_access (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t mask) + loc_t *loc, int32_t mask, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -3765,13 +3890,13 @@ posix_access (call_frame_t *frame, xlator_t *this, VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (loc, out); - MAKE_REAL_PATH (real_path, this, loc->path); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); op_ret = access (real_path, mask & 07); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "access failed on %s: %s", - loc->path, strerror (op_errno)); + real_path, strerror (op_errno)); goto out; } op_ret = 0; @@ -3779,14 +3904,14 @@ posix_access (call_frame_t *frame, xlator_t *this, out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (access, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, NULL); return 0; } int32_t posix_ftruncate (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset) + fd_t *fd, off_t offset, dict_t *xdata) { int32_t op_ret = -1; int32_t op_errno = 0; @@ -3795,7 +3920,6 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this, struct iatt postop = {0,}; struct posix_fd *pfd = NULL; int ret = -1; - uint64_t tmp_pfd = 0; struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; @@ -3808,18 +3932,17 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { gf_log (this->name, GF_LOG_WARNING, "pfd is NULL, fd=%p", fd); op_errno = -ret; goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; - op_ret = posix_fstat_with_gfid (this, _fd, &preop); + op_ret = posix_fdstat (this, _fd, &preop); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -3833,12 +3956,12 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this, if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, - "ftruncate failed on fd=%p: %s", - fd, strerror (errno)); + "ftruncate failed on fd=%p (%"PRId64": %s", + fd, offset, strerror (errno)); goto out; } - op_ret = posix_fstat_with_gfid (this, _fd, &postop); + op_ret = posix_fdstat (this, _fd, &postop); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, @@ -3852,7 +3975,8 @@ posix_ftruncate (call_frame_t *frame, xlator_t *this, out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, &preop, &postop); + STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, &preop, + &postop, NULL); return 0; } @@ -3860,14 +3984,13 @@ out: int32_t posix_fstat (call_frame_t *frame, xlator_t *this, - fd_t *fd) + fd_t *fd, dict_t *xdata) { int _fd = -1; int32_t op_ret = -1; int32_t op_errno = 0; struct iatt buf = {0,}; struct posix_fd *pfd = NULL; - uint64_t tmp_pfd = 0; int ret = -1; struct posix_private *priv = NULL; @@ -3881,18 +4004,17 @@ posix_fstat (call_frame_t *frame, xlator_t *this, priv = this->private; VALIDATE_OR_GOTO (priv, out); - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { gf_log (this->name, GF_LOG_WARNING, "pfd is NULL, fd=%p", fd); op_errno = -ret; goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; _fd = pfd->fd; - op_ret = posix_fstat_with_gfid (this, _fd, &buf); + op_ret = posix_fdstat (this, _fd, &buf); if (op_ret == -1) { op_errno = errno; gf_log (this->name, GF_LOG_ERROR, "fstat failed on fd=%p: %s", @@ -3905,7 +4027,7 @@ posix_fstat (call_frame_t *frame, xlator_t *this, out: SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf); + STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf, NULL); return 0; } @@ -3913,7 +4035,7 @@ static int gf_posix_lk_log; int32_t posix_lk (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct gf_flock *lock) + fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) { struct gf_flock nullock = {0, }; @@ -3922,33 +4044,35 @@ posix_lk (call_frame_t *frame, xlator_t *this, "not loaded. You need to use it for proper " "functioning of your application."); - STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock); + STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock, NULL); return 0; } int32_t posix_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock) + const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) { GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, "\"features/locks\" translator is " "not loaded. You need to use it for proper " "functioning of your application."); - STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS); + STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS, NULL); return 0; } int32_t posix_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock) + const char *volume, fd_t *fd, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) { GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, "\"features/locks\" translator is " "not loaded. You need to use it for proper " "functioning of your application."); - STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS); + STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS, NULL); return 0; } @@ -3956,111 +4080,57 @@ posix_finodelk (call_frame_t *frame, xlator_t *this, int32_t posix_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type) + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, "\"features/locks\" translator is " "not loaded. You need to use it for proper " "functioning of your application."); - STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS); + STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS, NULL); return 0; } int32_t posix_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type) + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, "\"features/locks\" translator is " "not loaded. You need to use it for proper " "functioning of your application."); - STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS); + STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS, NULL); return 0; } -int32_t -posix_do_readdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, int whichop) +int +posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, + gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs) { - uint64_t tmp_pfd = 0; - struct posix_fd *pfd = NULL; - DIR *dir = NULL; - int ret = -1; - size_t filled = 0; - int count = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - gf_dirent_t *this_entry = NULL; - gf_dirent_t entries; - struct dirent *entry = NULL; - off_t in_case = -1; + off_t in_case = -1; + size_t filled = 0; + int count = 0; + char entrybuf[sizeof(struct dirent) + 256 + 8]; + struct dirent *entry = NULL; int32_t this_size = -1; - char *real_path = NULL; - int real_path_len = -1; - char *entry_path = NULL; - int entry_path_len = -1; - struct posix_private *priv = NULL; - struct iatt stbuf = {0, }; - char base_path[PATH_MAX] = {0,}; - gf_dirent_t *tmp_entry = NULL; - struct stat statbuf = {0, }; - char hidden_path[PATH_MAX] = {0, }; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - INIT_LIST_HEAD (&entries.list); - - priv = this->private; - - ret = fd_ctx_get (fd, this, &tmp_pfd); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "pfd is NULL, fd=%p", fd); - op_errno = -ret; - goto out; - } - pfd = (struct posix_fd *)(long)tmp_pfd; - if (!pfd->path) { - op_errno = EBADFD; - gf_log (this->name, GF_LOG_WARNING, - "pfd does not have path set (possibly file " - "fd, fd=%p)", fd); - goto out; - } - - real_path = pfd->path; - real_path_len = strlen (real_path); - - entry_path_len = real_path_len + NAME_MAX; - entry_path = alloca (entry_path_len); - - strncpy(base_path, POSIX_BASE_PATH(this), sizeof(base_path)); - base_path[strlen(base_path)] = '/'; + gf_dirent_t *this_entry = NULL; + uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + struct stat stbuf = {0,}; + char *hpath = NULL; + int len = 0; + int ret = 0; - if (!entry_path) { - op_errno = errno; - goto out; + if (skip_dirs) { + len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); + hpath = alloca (len + 256); /* NAME_MAX */ + posix_handle_path (this, fd->inode->gfid, NULL, hpath, len); + len = strlen (hpath); + hpath[len] = '/'; } - strncpy (entry_path, real_path, entry_path_len); - entry_path[real_path_len] = '/'; - - dir = pfd->dir; - - if (!dir) { - gf_log (this->name, GF_LOG_WARNING, - "dir is NULL for fd=%p", fd); - op_errno = EINVAL; - goto out; - } - - if (!off) { rewinddir (dir); } else { @@ -4071,40 +4141,56 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this, in_case = telldir (dir); if (in_case == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_ERROR, + gf_log (THIS->name, GF_LOG_ERROR, "telldir failed on dir=%p: %s", dir, strerror (errno)); goto out; } errno = 0; - entry = readdir (dir); + entry = NULL; + readdir_r (dir, (struct dirent *)entrybuf, &entry); if (!entry) { if (errno == EBADF) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, + gf_log (THIS->name, GF_LOG_WARNING, "readdir failed on dir=%p: %s", - dir, strerror (op_errno)); + dir, strerror (errno)); goto out; } break; } - if ((!strcmp(real_path, base_path)) - && (!strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR))) +#ifdef __NetBSD__ + /* + * NetBSD with UFS1 backend uses backing files for + * extended attributes. They can be found in a + * .attribute file located at the root of the filesystem + * We hide it to glusterfs clients, since chaos will occur + * when the cluster/dht xlator decides to distribute + * exended attribute backing file accross storage servers. + */ + if ((uuid_compare (fd->inode->gfid, rootgfid) == 0) + && (!strcmp(entry->d_name, ".attribute"))) + continue; +#endif /* __NetBSD__ */ + + if ((uuid_compare (fd->inode->gfid, rootgfid) == 0) + && (!strcmp (GF_HIDDEN_PATH, entry->d_name))) { continue; + } - if ((!strcmp (real_path, base_path)) - && (!strncmp (GF_HIDDEN_PATH, entry->d_name, - strlen(GF_HIDDEN_PATH)))) { - snprintf (hidden_path, PATH_MAX, "%s/%s", real_path, - entry->d_name); - ret = lstat (hidden_path, &statbuf); - if (!ret && S_ISDIR (statbuf.st_mode)) + if (skip_dirs) { + if (DT_ISDIR (entry->d_type)) { continue; + } else if (hpath) { + strcpy (&hpath[len+1],entry->d_name); + ret = lstat (hpath, &stbuf); + if (!ret && S_ISDIR (stbuf.st_mode)) + continue; + } } + this_size = max (sizeof (gf_dirent_t), sizeof (gfs3_dirplist)) + strlen (entry->d_name) + 1; @@ -4114,42 +4200,180 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this, break; } - stbuf.ia_ino = entry->d_ino; - - entry->d_ino = stbuf.ia_ino; - this_entry = gf_dirent_for_name (entry->d_name); if (!this_entry) { - gf_log (this->name, GF_LOG_ERROR, + gf_log (THIS->name, GF_LOG_ERROR, "could not create gf_dirent for entry %s: (%s)", entry->d_name, strerror (errno)); goto out; } this_entry->d_off = telldir (dir); this_entry->d_ino = entry->d_ino; + this_entry->d_type = entry->d_type; - list_add_tail (&this_entry->list, &entries.list); + list_add_tail (&this_entry->list, &entries->list); filled += this_size; count ++; } - if (whichop == GF_FOP_READDIRP) { - list_for_each_entry (tmp_entry, &entries.list, list) { - strcpy (entry_path + real_path_len + 1, - tmp_entry->d_name); - posix_lstat_with_gfid (this, entry_path, &stbuf); - tmp_entry->d_stat = stbuf; + if ((!readdir (dir) && (errno == 0))) + /* Indicate EOF */ + errno = ENOENT; +out: + return count; +} + +dict_t * +posix_entry_xattr_fill (xlator_t *this, inode_t *inode, + fd_t *fd, char *name, dict_t *dict, + struct iatt *stbuf) +{ + loc_t tmp_loc = {0,}; + char *entry_path = NULL; + + /* if we don't send the 'loc', open-fd-count be a problem. */ + tmp_loc.inode = inode; + + MAKE_HANDLE_PATH (entry_path, this, fd->inode->gfid, name); + + return posix_lookup_xattr_fill (this, entry_path, + &tmp_loc, dict, stbuf); + +} + + +int +posix_readdirp_fill (xlator_t *this, fd_t *fd, gf_dirent_t *entries, dict_t *dict) +{ + gf_dirent_t *entry = NULL; + inode_table_t *itable = NULL; + inode_t *inode = NULL; + char *hpath = NULL; + int len = 0; + struct iatt stbuf = {0, }; + uuid_t gfid; + + if (list_empty(&entries->list)) + return 0; + + itable = fd->inode->table; + + len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); + hpath = alloca (len + 256); /* NAME_MAX */ + posix_handle_path (this, fd->inode->gfid, NULL, hpath, len); + len = strlen (hpath); + hpath[len] = '/'; + + list_for_each_entry (entry, &entries->list, list) { + memset (gfid, 0, 16); + inode = inode_grep (fd->inode->table, fd->inode, + entry->d_name); + if (inode) + uuid_copy (gfid, inode->gfid); + + strcpy (&hpath[len+1], entry->d_name); + + posix_pstat (this, gfid, hpath, &stbuf); + + if (!inode) + inode = inode_find (itable, stbuf.ia_gfid); + + if (!inode) + inode = inode_new (itable); + + entry->inode = inode; + + if (dict) { + entry->dict = + posix_entry_xattr_fill (this, entry->inode, + fd, entry->d_name, + dict, &stbuf); + dict_ref (entry->dict); } + + entry->d_stat = stbuf; + if (stbuf.ia_ino) + entry->d_ino = stbuf.ia_ino; + inode = NULL; + } + + return 0; +} + + +int32_t +posix_do_readdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, int whichop, dict_t *dict) +{ + struct posix_fd *pfd = NULL; + DIR *dir = NULL; + int ret = -1; + int count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + gf_dirent_t entries; + int32_t skip_dirs = 0; + + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + INIT_LIST_HEAD (&entries.list); + + ret = posix_fd_ctx_get (fd, this, &pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "pfd is NULL, fd=%p", fd); + op_errno = -ret; + goto out; } + + dir = pfd->dir; + + if (!dir) { + gf_log (this->name, GF_LOG_WARNING, + "dir is NULL for fd=%p", fd); + op_errno = EINVAL; + goto out; + } + + /* When READDIR_FILTER option is set to on, we can filter out + * directory's entry from the entry->list. + */ + ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs); + + LOCK (&fd->lock); + { + /* posix_fill_readdir performs multiple separate individual + readdir() calls to fill up the buffer. + + In case of NFS where the same anonymous FD is shared between + different applications, reading a common directory can + result in the anonymous fd getting re-used unsafely between + the two readdir requests (in two different io-threads). + + It would also help, in the future, to replace the loop + around readdir() with a single large getdents() call. + */ + count = posix_fill_readdir (fd, dir, off, size, &entries, this, + skip_dirs); + } + UNLOCK (&fd->lock); + + /* pick ENOENT to indicate EOF */ + op_errno = errno; op_ret = count; - errno = 0; - if ((!readdir (dir) && (errno == 0))) - op_errno = ENOENT; + + if (whichop != GF_FOP_READDIRP) + goto out; + + posix_readdirp_fill (this, fd, &entries, dict); out: - STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries); + STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, NULL); gf_dirent_free (&entries); @@ -4159,18 +4383,18 @@ out: int32_t posix_readdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off) + fd_t *fd, size_t size, off_t off, dict_t *xdata) { - posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR); + posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR, xdata); return 0; } int32_t posix_readdirp (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off) + fd_t *fd, size_t size, off_t off, dict_t *dict) { - posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP); + posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP, dict); return 0; } @@ -4179,7 +4403,6 @@ posix_priv (xlator_t *this) { struct posix_private *priv = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN]; - char key[GF_DUMP_MAX_BUF_LEN]; snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); @@ -4193,16 +4416,11 @@ posix_priv (xlator_t *this) if (!priv) return 0; - gf_proc_dump_build_key(key, key_prefix, "base_path"); - gf_proc_dump_write(key,"%s", priv->base_path); - gf_proc_dump_build_key(key, key_prefix, "base_path_length"); - gf_proc_dump_write(key,"%d", priv->base_path_length); - gf_proc_dump_build_key(key, key_prefix, "max_read"); - gf_proc_dump_write(key,"%d", priv->read_value); - gf_proc_dump_build_key(key, key_prefix, "max_write"); - gf_proc_dump_write(key,"%d", priv->write_value); - gf_proc_dump_build_key(key, key_prefix, "nr_files"); - gf_proc_dump_write(key,"%ld", priv->nr_files); + gf_proc_dump_write("base_path","%s", priv->base_path); + gf_proc_dump_write("base_path_length","%d", priv->base_path_length); + gf_proc_dump_write("max_read","%d", priv->read_value); + gf_proc_dump_write("max_write","%d", priv->write_value); + gf_proc_dump_write("nr_files","%ld", priv->nr_files); return 0; } @@ -4216,65 +4434,72 @@ posix_inode (xlator_t *this) int32_t posix_rchecksum (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset, int32_t len) + fd_t *fd, off_t offset, int32_t len, dict_t *xdata) { - char *buf = NULL; - - int _fd = -1; - uint64_t tmp_pfd = 0; - - struct posix_fd *pfd = NULL; - - int op_ret = -1; - int op_errno = 0; - - int ret = 0; - - int32_t weak_checksum = 0; - uint8_t strong_checksum[MD5_DIGEST_LEN]; + char *alloc_buf = NULL; + char *buf = NULL; + int _fd = -1; + struct posix_fd *pfd = NULL; + int op_ret = -1; + int op_errno = 0; + int ret = 0; + int32_t weak_checksum = 0; + unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0}; + struct posix_private *priv = NULL; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (fd, out); - memset (strong_checksum, 0, MD5_DIGEST_LEN); - buf = GF_CALLOC (1, len, gf_posix_mt_char); + priv = this->private; + memset (strong_checksum, 0, MD5_DIGEST_LENGTH); - if (!buf) { + alloc_buf = _page_aligned_alloc (len, &buf); + if (!alloc_buf) { op_errno = ENOMEM; goto out; } - ret = fd_ctx_get (fd, this, &tmp_pfd); + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { gf_log (this->name, GF_LOG_WARNING, "pfd is NULL, fd=%p", fd); op_errno = -ret; goto out; } - pfd = (struct posix_fd *)(long) tmp_pfd; _fd = pfd->fd; - ret = pread (_fd, buf, len, offset); - if (ret < 0) { - gf_log (this->name, GF_LOG_WARNING, - "pread of %d bytes returned %d (%s)", - len, ret, strerror (errno)); + LOCK (&fd->lock); + { + if (priv->aio_capable && priv->aio_init_done) + __posix_fd_set_odirect (fd, pfd, 0, offset, len); + + ret = pread (_fd, buf, len, offset); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "pread of %d bytes returned %d (%s)", + len, ret, strerror (errno)); + + op_errno = errno; + } - op_errno = errno; - goto out; } + UNLOCK (&fd->lock); - weak_checksum = gf_rsync_weak_checksum (buf, len); - gf_rsync_strong_checksum (buf, len, strong_checksum); + if (ret < 0) + goto out; - GF_FREE (buf); + weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) len); + gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) len, (unsigned char *) strong_checksum); op_ret = 0; out: STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, - weak_checksum, strong_checksum); + weak_checksum, strong_checksum, NULL); + + GF_FREE (alloc_buf); + return 0; } @@ -4322,21 +4547,123 @@ mem_acct_init (xlator_t *this) return ret; } +static int +posix_set_owner (xlator_t *this, uid_t uid, gid_t gid) +{ + struct posix_private *priv = NULL; + int ret = -1; + + priv = this->private; + + ret = sys_chown (priv->base_path, uid, gid); + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Failed to set " + "uid/gid for brick path %s, %s", + priv->base_path, strerror (errno)); + + return ret; +} + + +static int +set_batch_fsync_mode (struct posix_private *priv, const char *str) +{ + if (strcmp (str, "none") == 0) + priv->batch_fsync_mode = BATCH_NONE; + else if (strcmp (str, "syncfs") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS; + else if (strcmp (str, "syncfs-single-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC; + else if (strcmp (str, "syncfs-reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC; + else if (strcmp (str, "reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_REVERSE_FSYNC; + else + return -1; + + return 0; +} + + +int +reconfigure (xlator_t *this, dict_t *options) +{ + int ret = -1; + struct posix_private *priv = NULL; + uid_t uid = -1; + gid_t gid = -1; + char *batch_fsync_mode_str = NULL; + + priv = this->private; + + GF_OPTION_RECONF ("brick-uid", uid, options, uint32, out); + GF_OPTION_RECONF ("brick-gid", gid, options, uint32, out); + posix_set_owner (this, uid, gid); + + GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec, + options, uint32, out); + + GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str, + options, str, out); + + if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) { + gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s", + batch_fsync_mode_str); + goto out; + } + + GF_OPTION_RECONF ("linux-aio", priv->aio_configured, + options, bool, out); + + if (priv->aio_configured) + posix_aio_on (this); + else + posix_aio_off (this); + + GF_OPTION_RECONF ("node-uuid-pathinfo", priv->node_uuid_pathinfo, + options, bool, out); + + if (priv->node_uuid_pathinfo && + (uuid_is_null (priv->glusterd_uuid))) { + gf_log (this->name, GF_LOG_INFO, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to <hostname>:<export>"); + } + + GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval, + options, uint32, out); + posix_spawn_health_check_thread (this); + + ret = 0; +out: + return ret; +} + + /** * init - */ int init (xlator_t *this) { - struct posix_private *_private = NULL; - data_t *dir_data = NULL; - data_t *tmp_data = NULL; - struct stat buf = {0,}; - gf_boolean_t tmp_bool = 0; - int dict_ret = 0; - int ret = 0; - int op_ret = -1; - int32_t janitor_sleep = 0; + struct posix_private *_private = NULL; + data_t *dir_data = NULL; + data_t *tmp_data = NULL; + struct stat buf = {0,}; + gf_boolean_t tmp_bool = 0; + int dict_ret = 0; + int ret = 0; + int op_ret = -1; + ssize_t size = -1; + int32_t janitor_sleep = 0; + uuid_t old_uuid = {0,}; + uuid_t dict_uuid = {0,}; + uuid_t gfid = {0,}; + uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + char *guuid = NULL; + uid_t uid = -1; + gid_t gid = -1; + char *batch_fsync_mode_str; dir_data = dict_get (this->options, "directory"); @@ -4371,11 +4698,12 @@ init (xlator_t *this) goto out; } - /* Check for Extended attribute support, if not present, log it */ op_ret = sys_lsetxattr (dir_data->data, "trusted.glusterfs.test", "working", 8, 0); - if (op_ret < 0) { + if (op_ret == 0) { + sys_lremovexattr (dir_data->data, "trusted.glusterfs.test"); + } else { tmp_data = dict_get (this->options, "mandate-attribute"); if (tmp_data) { @@ -4406,29 +4734,104 @@ init (xlator_t *this) } } - _private = GF_CALLOC (1, sizeof (*_private), - gf_posix_mt_posix_private); - if (!_private) { + tmp_data = dict_get (this->options, "volume-id"); + if (tmp_data) { + op_ret = uuid_parse (tmp_data->data, dict_uuid); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "wrong volume-id (%s) set in volume file", + tmp_data->data); + ret = -1; + goto out; + } + size = sys_lgetxattr (dir_data->data, + "trusted.glusterfs.volume-id", old_uuid, 16); + if (size == 16) { + if (uuid_compare (old_uuid, dict_uuid)) { + gf_log (this->name, GF_LOG_ERROR, + "mismatching volume-id (%s) received. " + "already is a part of volume %s ", + tmp_data->data, uuid_utoa (old_uuid)); + ret = -1; + goto out; + } + } else if ((size == -1) && (errno == ENODATA)) { + + gf_log (this->name, GF_LOG_ERROR, + "Extended attribute trusted.glusterfs." + "volume-id is absent"); + ret = -1; + goto out; + + } else if ((size == -1) && (errno != ENODATA)) { + /* Wrong 'volume-id' is set, it should be error */ + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to fetch volume-id (%s)", + dir_data->data, strerror (errno)); + ret = -1; + goto out; + } else { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "failed to fetch proper volume id from export"); + goto out; + } + } + + /* Now check if the export directory has some other 'gfid', + other than that of root '/' */ + size = sys_lgetxattr (dir_data->data, "trusted.gfid", gfid, 16); + if (size == 16) { + if (!__is_root_gfid (gfid)) { + gf_log (this->name, GF_LOG_WARNING, + "%s: gfid (%s) is not that of glusterfs '/' ", + dir_data->data, uuid_utoa (gfid)); + ret = -1; + goto out; + } + } else if (size != -1) { + /* Wrong 'gfid' is set, it should be error */ + gf_log (this->name, GF_LOG_WARNING, + "%s: wrong value set as gfid", + dir_data->data); ret = -1; goto out; + } else if ((size == -1) && (errno != ENODATA)) { + /* Wrong 'gfid' is set, it should be error */ + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to fetch gfid (%s)", + dir_data->data, strerror (errno)); + ret = -1; + goto out; + } else { + /* First time volume, set the GFID */ + size = sys_lsetxattr (dir_data->data, "trusted.gfid", rootgfid, + 16, XATTR_CREATE); + if (size) { + gf_log (this->name, GF_LOG_ERROR, + "%s: failed to set gfid (%s)", + dir_data->data, strerror (errno)); + ret = -1; + goto out; + } } - _private->base_path = gf_strdup (dir_data->data); - _private->base_path_length = strlen (_private->base_path); - - _private->trash_path = GF_CALLOC (1, _private->base_path_length - + strlen ("/") - + strlen (GF_REPLICATE_TRASH_DIR) - + 1, - gf_posix_mt_trash_path); + size = sys_lgetxattr (dir_data->data, POSIX_ACL_ACCESS_XATTR, + NULL, 0); + if ((size < 0) && (errno == ENOTSUP)) + gf_log (this->name, GF_LOG_WARNING, + "Posix access control list is not supported."); - if (!_private->trash_path) { + ret = 0; + _private = GF_CALLOC (1, sizeof (*_private), + gf_posix_mt_posix_private); + if (!_private) { ret = -1; goto out; } - strncpy (_private->trash_path, _private->base_path, _private->base_path_length); - strcat (_private->trash_path, "/" GF_REPLICATE_TRASH_DIR); + _private->base_path = gf_strdup (dir_data->data); + _private->base_path_length = strlen (_private->base_path); LOCK_INIT (&_private->lock); @@ -4494,6 +4897,19 @@ init (xlator_t *this) "for every open)"); } + ret = dict_get_str (this->options, "glusterd-uuid", &guuid); + if (!ret) { + if (uuid_parse (guuid, _private->glusterd_uuid)) + gf_log (this->name, GF_LOG_WARNING, "Cannot parse " + "glusterd (node) UUID, node-uuid xattr " + "request would return - \"No such attribute\""); + } else { + gf_log (this->name, GF_LOG_DEBUG, "No glusterd (node) UUID " + "passed - node-uuid xattr request will return " + "\"No such attribute\""); + } + ret = 0; + _private->janitor_sleep_duration = 600; dict_ret = dict_get_int32 (this->options, "janitor-sleep-duration", @@ -4505,7 +4921,16 @@ init (xlator_t *this) _private->janitor_sleep_duration = janitor_sleep; } - + /* performing open dir on brick dir locks the brick dir + * and prevents it from being unmounted + */ + _private->mount_lock = opendir (dir_data->data); + if (!_private->mount_lock) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Could not lock brick directory"); + goto out; + } #ifndef GF_DARWIN_HOST_OS { struct rlimit lim; @@ -4535,11 +4960,84 @@ init (xlator_t *this) #endif this->private = (void *)_private; + op_ret = posix_handle_init (this); + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "Posix handle setup failed"); + ret = -1; + goto out; + } + + op_ret = posix_handle_trash_init (this); + if (op_ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "Posix landfill setup failed"); + ret = -1; + goto out; + } + + _private->aio_init_done = _gf_false; + _private->aio_capable = _gf_false; + + GF_OPTION_INIT ("brick-uid", uid, uint32, out); + GF_OPTION_INIT ("brick-gid", gid, uint32, out); + posix_set_owner (this, uid, gid); + + GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out); + + if (_private->aio_configured) { + op_ret = posix_aio_on (this); + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "Posix AIO init failed"); + ret = -1; + goto out; + } + } + + GF_OPTION_INIT ("node-uuid-pathinfo", + _private->node_uuid_pathinfo, bool, out); + if (_private->node_uuid_pathinfo && + (uuid_is_null (_private->glusterd_uuid))) { + gf_log (this->name, GF_LOG_INFO, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to <hostname>:<export>"); + } + + _private->health_check_active = _gf_false; + GF_OPTION_INIT ("health-check-interval", + _private->health_check_interval, uint32, out); + if (_private->health_check_interval) + posix_spawn_health_check_thread (this); + pthread_mutex_init (&_private->janitor_lock, NULL); pthread_cond_init (&_private->janitor_cond, NULL); INIT_LIST_HEAD (&_private->janitor_fds); posix_spawn_janitor_thread (this); + + pthread_mutex_init (&_private->fsync_mutex, NULL); + pthread_cond_init (&_private->fsync_cond, NULL); + INIT_LIST_HEAD (&_private->fsyncs); + + ret = gf_thread_create (&_private->fsyncer, NULL, posix_fsyncer, this); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "fsyncer thread" + " creation failed (%s)", strerror (errno)); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out); + + if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) { + gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s", + batch_fsync_mode_str); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, + uint32, out); out: return ret; } @@ -4551,7 +5049,9 @@ fini (xlator_t *this) if (!priv) return; this->private = NULL; - sys_lremovexattr (priv->base_path, "trusted.glusterfs.test"); + /*unlock brick dir*/ + if (priv->mount_lock) + closedir (priv->mount_lock); GF_FREE (priv); return; } @@ -4588,6 +5088,7 @@ struct xlator_fops fops = { .getxattr = posix_getxattr, .fgetxattr = posix_fgetxattr, .removexattr = posix_removexattr, + .fremovexattr = posix_fremovexattr, .fsyncdir = posix_fsyncdir, .access = posix_access, .ftruncate = posix_ftruncate, @@ -4602,6 +5103,9 @@ struct xlator_fops fops = { .fxattrop = posix_fxattrop, .setattr = posix_setattr, .fsetattr = posix_fsetattr, + .fallocate = _posix_fallocate, + .discard = posix_discard, + .zerofill = posix_zerofill, }; struct xlator_cbks cbks = { @@ -4625,5 +5129,64 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_BOOL }, { .key = {"janitor-sleep-duration"}, .type = GF_OPTION_TYPE_INT }, + { .key = {"volume-id"}, + .type = GF_OPTION_TYPE_ANY }, + { .key = {"glusterd-uuid"}, + .type = GF_OPTION_TYPE_STR }, + { + .key = {"linux-aio"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Support for native Linux AIO" + }, + { + .key = {"brick-uid"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .validate = GF_OPT_VALIDATE_MIN, + .description = "Support for setting uid of brick's owner" + }, + { + .key = {"brick-gid"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .validate = GF_OPT_VALIDATE_MIN, + .description = "Support for setting gid of brick's owner" + }, + { .key = {"node-uuid-pathinfo"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "return glusterd's node-uuid in pathinfo xattr" + " string instead of hostname" + }, + { + .key = {"health-check-interval"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "30", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Interval in seconds for a filesystem health check, " + "set to 0 to disable" + }, + { .key = {"batch-fsync-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "reverse-fsync", + .description = "Possible values:\n" + "\t- syncfs: Perform one syncfs() on behalf oa batch" + "of fsyncs.\n" + "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch" + " of fsyncs and one fsync() per batch.\n" + "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch" + " of fsyncs and fsync() each file in the batch in reverse order.\n" + " in reverse order.\n" + "\t- reverse-fsync: Perform fsync() of each file in the batch in" + " reverse order." + }, + { .key = {"batch-fsync-delay-usec"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + .description = "Num of usecs to wait for aggregating fsync" + " requests", + }, { .key = {NULL} } }; |
