diff options
Diffstat (limited to 'xlators/storage/posix/src/posix.c')
| -rw-r--r-- | xlators/storage/posix/src/posix.c | 1099 |
1 files changed, 886 insertions, 213 deletions
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 816011542..fb45c7a67 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -23,6 +23,8 @@ #include <pthread.h> #include <ftw.h> #include <sys/stat.h> +#include <signal.h> +#include <sys/uio.h> #ifndef GF_BSD_HOST_OS #include <alloca.h> @@ -50,6 +52,7 @@ #include "glusterfs3-xdr.h" #include "hashfn.h" #include "posix-aio.h" +#include "glusterfs-acl.h" extern char *marker_xattrs[]; #define ALIGN_SIZE 4096 @@ -128,7 +131,7 @@ posix_lookup (call_frame_t *frame, xlator_t *this, MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &buf); if (uuid_is_null (loc->inode->gfid)) { - posix_gfid_set (this, real_path, loc, xdata); + posix_gfid_heal (this, real_path, loc, xdata); MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &buf); } @@ -561,6 +564,289 @@ out: return 0; } +static int32_t +posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + off_t offset, size_t len, struct iatt *statpre, + struct iatt *statpost) +{ + struct posix_fd *pfd = NULL; + int32_t ret = -1; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = posix_fd_ctx_get (fd, this, &pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "pfd is NULL from fd=%p", fd); + goto out; + } + + ret = posix_fdstat (this, pfd->fd, statpre); + if (ret == -1) { + ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "fallocate (fstat) failed on fd=%p: %s", fd, + strerror (errno)); + goto out; + } + + ret = sys_fallocate(pfd->fd, flags, offset, len); + if (ret == -1) { + ret = -errno; + goto out; + } + + ret = posix_fdstat (this, pfd->fd, statpost); + if (ret == -1) { + ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "fallocate (fstat) failed on fd=%p: %s", fd, + strerror (errno)); + goto out; + } + +out: + SET_TO_OLD_FS_ID (); + + return ret; +} + +char* +_page_aligned_alloc (size_t size, char **aligned_buf) +{ + char *alloc_buf = NULL; + char *buf = NULL; + + alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char); + if (!alloc_buf) + goto out; + /* page aligned buffer */ + buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE); + *aligned_buf = buf; +out: + return alloc_buf; +} + +static int32_t +_posix_do_zerofill(int fd, off_t offset, size_t len, int o_direct) +{ + size_t num_vect = 0; + int32_t num_loop = 1; + int32_t idx = 0; + int32_t op_ret = -1; + int32_t vect_size = VECTOR_SIZE; + size_t remain = 0; + size_t extra = 0; + struct iovec *vector = NULL; + char *iov_base = NULL; + char *alloc_buf = NULL; + + if (len == 0) + return 0; + if (len < VECTOR_SIZE) + vect_size = len; + + num_vect = len / (vect_size); + remain = len % vect_size ; + if (num_vect > MAX_NO_VECT) { + extra = num_vect % MAX_NO_VECT; + num_loop = num_vect / MAX_NO_VECT; + num_vect = MAX_NO_VECT; + } + + vector = GF_CALLOC (num_vect, sizeof(struct iovec), + gf_common_mt_iovec); + if (!vector) + return -1; + if (o_direct) { + alloc_buf = _page_aligned_alloc(vect_size, &iov_base); + if (!alloc_buf) { + gf_log ("_posix_do_zerofill", GF_LOG_DEBUG, + "memory alloc failed, vect_size %d: %s", + vect_size, strerror(errno)); + GF_FREE(vector); + return -1; + } + } else { + iov_base = GF_CALLOC (vect_size, sizeof(char), + gf_common_mt_char); + if (!iov_base) { + GF_FREE(vector); + return -1; + } + } + + for (idx = 0; idx < num_vect; idx++) { + vector[idx].iov_base = iov_base; + vector[idx].iov_len = vect_size; + } + lseek(fd, offset, SEEK_SET); + for (idx = 0; idx < num_loop; idx++) { + op_ret = writev(fd, vector, num_vect); + if (op_ret < 0) + goto err; + } + if (extra) { + op_ret = writev(fd, vector, extra); + if (op_ret < 0) + goto err; + } + if (remain) { + vector[0].iov_len = remain; + op_ret = writev(fd, vector , 1); + if (op_ret < 0) + goto err; + } +err: + if (o_direct) + GF_FREE(alloc_buf); + else + GF_FREE(iov_base); + GF_FREE(vector); + return op_ret; +} + +static int32_t +posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, + off_t offset, size_t len, struct iatt *statpre, + struct iatt *statpost) +{ + struct posix_fd *pfd = NULL; + int32_t ret = -1; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = posix_fd_ctx_get (fd, this, &pfd); + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "pfd is NULL from fd=%p", fd); + goto out; + } + + ret = posix_fdstat (this, pfd->fd, statpre); + if (ret == -1) { + ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "pre-operation fstat failed on fd = %p: %s", fd, + strerror (errno)); + goto out; + } + ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT); + if (ret < 0) { + ret = -errno; + gf_log(this->name, GF_LOG_ERROR, + "zerofill failed on fd %d length %ld %s", + pfd->fd, len, strerror(errno)); + goto out; + } + if (pfd->flags & (O_SYNC|O_DSYNC)) { + ret = fsync (pfd->fd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "fsync() in writev on fd %d failed: %s", + pfd->fd, strerror (errno)); + ret = -errno; + goto out; + } + } + + ret = posix_fdstat (this, pfd->fd, statpost); + if (ret == -1) { + ret = -errno; + gf_log (this->name, GF_LOG_ERROR, + "post operation fstat failed on fd=%p: %s", fd, + strerror (errno)); + goto out; + } + +out: + SET_TO_OLD_FS_ID (); + + return ret; +} + +static int32_t +_posix_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size, + off_t offset, size_t len, dict_t *xdata) +{ + int32_t ret; + int32_t flags = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + + if (keep_size) + flags = FALLOC_FL_KEEP_SIZE; + + ret = posix_do_fallocate(frame, this, fd, flags, offset, len, + &statpre, &statpost); + if (ret < 0) + goto err; + + STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL); + return 0; + +err: + STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL); + return 0; +} + +static int32_t +posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + int32_t ret; + int32_t flags = FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + + ret = posix_do_fallocate(frame, this, fd, flags, offset, len, + &statpre, &statpost); + if (ret < 0) + goto err; + + STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, NULL); + return 0; + +err: + STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, NULL); + return 0; + +} + +static int32_t +posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + int32_t ret = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + + ret = posix_do_zerofill(frame, this, fd, offset, len, + &statpre, &statpost); + if (ret < 0) + goto err; + + STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL); + return 0; + +err: + STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL); + return 0; + +} + int32_t posix_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata) @@ -1969,11 +2255,7 @@ posix_readv (call_frame_t *frame, xlator_t *this, } /* Hack to notify higher layers of EOF. */ - if (stbuf.ia_size == 0) - op_errno = ENOENT; - else if ((offset + vec.iov_len) == stbuf.ia_size) - op_errno = ENOENT; - else if (offset > stbuf.ia_size) + if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size) op_errno = ENOENT; op_ret = vec.iov_len; @@ -2018,22 +2300,6 @@ err: return op_ret; } -char* -_page_aligned_alloc (size_t size, char **aligned_buf) -{ - char *alloc_buf = NULL; - char *buf = NULL; - - alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char); - if (!alloc_buf) - goto out; - /* page aligned buffer */ - buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE); - *aligned_buf = buf; -out: - return alloc_buf; -} - int32_t __posix_writev (int fd, struct iovec *vector, int count, off_t startoff, int odirect) @@ -2082,6 +2348,48 @@ err: return op_ret; } +dict_t* +_fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append) +{ + dict_t *rsp_xdata = NULL; + int32_t ret = 0; + inode_t *inode = NULL; + + if (fd) + inode = fd->inode; + + if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) { + gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid Args: " + "fd: %p inode: %p gfid:%s", fd, inode?inode:0, + inode?uuid_utoa(inode->gfid):"N/A"); + goto out; + } + + if (!xdata || !dict_get (xdata, GLUSTERFS_OPEN_FD_COUNT)) + goto out; + + rsp_xdata = dict_new(); + if (!rsp_xdata) + goto out; + + ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_OPEN_FD_COUNT, + fd->inode->fd_count); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set " + "dictionary value for %s", uuid_utoa (fd->inode->gfid), + GLUSTERFS_OPEN_FD_COUNT); + } + + ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, + is_append); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set " + "dictionary value for %s", uuid_utoa (fd->inode->gfid), + GLUSTERFS_WRITE_IS_APPEND); + } +out: + return rsp_xdata; +} int32_t posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, @@ -2096,6 +2404,9 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt preop = {0,}; struct iatt postop = {0,}; int ret = -1; + dict_t *rsp_xdata = NULL; + int is_append = 0; + gf_boolean_t locked = _gf_false; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -2117,6 +2428,17 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, _fd = pfd->fd; + if (xdata && dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) { + /* The write_is_append check and write must happen + atomically. Else another write can overtake this + write after the check and get written earlier. + + So lock before preop-stat and unlock after write. + */ + locked = _gf_true; + LOCK(&fd->inode->lock); + } + op_ret = posix_fdstat (this, _fd, &preop); if (op_ret == -1) { op_errno = errno; @@ -2126,8 +2448,19 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, goto out; } + if (locked) { + if (preop.ia_size == offset || (fd->flags & O_APPEND)) + is_append = 1; + } + op_ret = __posix_writev (_fd, vector, count, offset, (pfd->flags & O_DIRECT)); + + if (locked) { + UNLOCK (&fd->inode->lock); + locked = _gf_false; + } + if (op_ret < 0) { op_errno = -op_ret; op_ret = -1; @@ -2143,14 +2476,21 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, UNLOCK (&priv->lock); if (op_ret >= 0) { + rsp_xdata = _fill_writev_xdata (fd, xdata, this, is_append); /* wiretv successful, we also need to get the stat of * the file we wrote to */ - if (pfd->flushwrites) { - /* NOTE: ignore the error, if one occurs at this - * point */ - fsync (_fd); + if (flags & (O_SYNC|O_DSYNC)) { + ret = fsync (_fd); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "fsync() in writev on fd %d failed: %s", + _fd, strerror (errno)); + op_ret = -1; + op_errno = errno; + goto out; + } } ret = posix_fdstat (this, _fd, &postop); @@ -2166,9 +2506,16 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, out: + if (locked) { + UNLOCK (&fd->inode->lock); + locked = _gf_false; + } + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop, - NULL); + rsp_xdata); + if (rsp_xdata) + dict_unref (rsp_xdata); return 0; } @@ -2295,6 +2642,33 @@ out: } +int +posix_batch_fsync (call_frame_t *frame, xlator_t *this, + fd_t *fd, int datasync, dict_t *xdata) +{ + call_stub_t *stub = NULL; + struct posix_private *priv = NULL; + + priv = this->private; + + stub = fop_fsync_stub (frame, default_fsync, fd, datasync, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0); + return 0; + } + + pthread_mutex_lock (&priv->fsync_mutex); + { + list_add_tail (&stub->list, &priv->fsyncs); + priv->fsync_queue_count++; + pthread_cond_signal (&priv->fsync_cond); + } + pthread_mutex_unlock (&priv->fsync_mutex); + + return 0; +} + + int32_t posix_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, dict_t *xdata) @@ -2306,6 +2680,7 @@ posix_fsync (call_frame_t *frame, xlator_t *this, int ret = -1; struct iatt preop = {0,}; struct iatt postop = {0,}; + struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; @@ -2321,6 +2696,12 @@ posix_fsync (call_frame_t *frame, xlator_t *this, goto out; #endif + priv = this->private; + if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) { + posix_batch_fsync (frame, this, fd, datasync, xdata); + return 0; + } + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; @@ -2382,6 +2763,17 @@ out: } static int gf_posix_xattr_enotsup_log; +static int +_handle_setxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) +{ + posix_xattr_filler_t *filler = NULL; + + filler = tmp; + + return posix_handle_pair (filler->this, filler->real_path, k, v, + filler->flags); +} int32_t posix_setxattr (call_frame_t *frame, xlator_t *this, @@ -2390,7 +2782,8 @@ posix_setxattr (call_frame_t *frame, xlator_t *this, int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = NULL; - int ret = -1; + + posix_xattr_filler_t filler = {0,}; DECLARE_OLD_FS_ID_VAR; SET_FS_ID (frame->root->uid, frame->root->gid); @@ -2405,17 +2798,13 @@ posix_setxattr (call_frame_t *frame, xlator_t *this, op_ret = -1; dict_del (dict, GFID_XATTR_KEY); - - int _handle_every_keyvalue_pair (dict_t *d, char *k, data_t *v, - void *tmp) - { - ret = posix_handle_pair (this, real_path, k, v, flags); - if (ret < 0) { - op_errno = -ret; - } - return ret; - } - op_ret = dict_foreach (dict, _handle_every_keyvalue_pair, NULL); + filler.real_path = real_path; + filler.this = this; + filler.flags = flags; + op_ret = dict_foreach (dict, _handle_setxattr_keyvalue_pair, + &filler); + if (op_ret < 0) + op_errno = -op_ret; out: SET_TO_OLD_FS_ID (); @@ -2425,6 +2814,53 @@ out: return 0; } + +int +posix_xattr_get_real_filename (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *key, dict_t *dict, dict_t *xdata) +{ + char *real_path = NULL; + struct dirent *dirent = NULL; + DIR *fd = NULL; + const char *fname = NULL; + char *found = NULL; + int ret = -1; + int op_ret = -1; + + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + + fd = opendir (real_path); + if (!fd) + return -errno; + + fname = key + strlen (GF_XATTR_GET_REAL_FILENAME_KEY); + + while ((dirent = readdir (fd))) { + if (strcasecmp (dirent->d_name, fname) == 0) { + found = gf_strdup (dirent->d_name); + if (!found) { + closedir (fd); + return -ENOMEM; + } + break; + } + } + + closedir (fd); + + if (!found) + return -ENOENT; + + ret = dict_set_dynstr (dict, (char *)key, found); + if (ret) { + GF_FREE (found); + return -ENOMEM; + } + ret = strlen (found) + 1; + + return ret; +} + /** * posix_getxattr - this function returns a dictionary with all the * key:value pair present as xattr. used for @@ -2479,9 +2915,29 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, dict = dict_new (); if (!dict) { + op_errno = ENOMEM; goto out; } + if (loc->inode && name && + (strncmp (name, GF_XATTR_GET_REAL_FILENAME_KEY, + strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) { + ret = posix_xattr_get_real_filename (frame, this, loc, + name, dict, xdata); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + gf_log (this->name, (op_errno == ENOENT) ? + GF_LOG_DEBUG : GF_LOG_WARNING, + "Failed to get real filename (%s, %s): %s", + loc->path, name, strerror (op_errno)); + goto out; + } + + size = ret; + goto done; + } + if (loc->inode && name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) { if (!list_empty (&loc->inode->fd_list)) { ret = dict_set_uint32 (dict, (char *)name, 1); @@ -2505,8 +2961,13 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, else rpath = real_path; - (void) snprintf (host_buf, 1024, "<POSIX(%s):%s:%s>", - priv->base_path, priv->hostname, rpath); + (void) snprintf (host_buf, 1024, + "<POSIX(%s):%s:%s>", priv->base_path, + ((priv->node_uuid_pathinfo + && !uuid_is_null(priv->glusterd_uuid)) + ? uuid_utoa (priv->glusterd_uuid) + : priv->hostname), + rpath); dyn_rpath = gf_strdup (host_buf); if (!dyn_rpath) { @@ -2582,6 +3043,11 @@ posix_getxattr (call_frame_t *frame, xlator_t *this, "supported (try remounting" " brick with 'user_xattr' " "flag)"); + } else if (op_errno == ENOATTR || + op_errno == ENODATA) { + gf_log (this->name, GF_LOG_DEBUG, + "No such attribute:%s for file %s", + key, real_path); } else { gf_log (this->name, GF_LOG_ERROR, "getxattr failed on %s: %s (%s)", @@ -2899,6 +3365,17 @@ out: return 0; } +static int +_handle_fsetxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) +{ + posix_xattr_filler_t *filler = NULL; + + filler = tmp; + + return posix_fhandle_pair (filler->this, filler->fd, k, v, + filler->flags); +} int32_t posix_fsetxattr (call_frame_t *frame, xlator_t *this, @@ -2908,7 +3385,9 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this, int32_t op_errno = 0; struct posix_fd * pfd = NULL; int _fd = -1; - int ret = -1; + int ret = -1; + + posix_xattr_filler_t filler = {0,}; DECLARE_OLD_FS_ID_VAR; SET_FS_ID (frame->root->uid, frame->root->gid); @@ -2929,17 +3408,13 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this, dict_del (dict, GFID_XATTR_KEY); - int _handle_every_keyvalue_pair (dict_t *d, char *k, data_t *v, - void *tmp) - { - ret = posix_fhandle_pair (this, _fd, k, v, flags); - if (ret < 0) { - op_errno = -ret; - } - return ret; - } - - op_ret = dict_foreach (dict, _handle_every_keyvalue_pair, NULL); + filler.fd = _fd; + filler.this = this; + filler.flags = flags; + op_ret = dict_foreach (dict, _handle_fsetxattr_keyvalue_pair, + &filler); + if (op_ret < 0) + op_errno = -op_ret; out: SET_TO_OLD_FS_ID (); @@ -2949,6 +3424,28 @@ out: return 0; } +int +_posix_remove_xattr (dict_t *dict, char *key, data_t *value, void *data) +{ + int32_t op_ret = 0; + xlator_t *this = NULL; + posix_xattr_filler_t *filler = NULL; + + filler = (posix_xattr_filler_t *) data; + this = filler->this; + + op_ret = sys_lremovexattr (filler->real_path, key); + if (op_ret == -1) { + filler->op_errno = errno; + if (errno != ENOATTR && errno != EPERM) + gf_log (this->name, GF_LOG_ERROR, + "removexattr failed on %s (for %s): %s", + filler->real_path, key, strerror (errno)); + } + + return op_ret; +} + int32_t posix_removexattr (call_frame_t *frame, xlator_t *this, @@ -2957,6 +3454,7 @@ posix_removexattr (call_frame_t *frame, xlator_t *this, int32_t op_ret = -1; int32_t op_errno = 0; char * real_path = NULL; + posix_xattr_filler_t filler = {0,}; DECLARE_OLD_FS_ID_VAR; @@ -2972,6 +3470,22 @@ posix_removexattr (call_frame_t *frame, xlator_t *this, SET_FS_ID (frame->root->uid, frame->root->gid); + /** + * sending an empty key name with xdata containing the + * list of key(s) to be removed implies "bulk remove request" + * for removexattr. + */ + if (name && (strcmp (name, "") == 0) && xdata) { + filler.real_path = real_path; + filler.this = this; + op_ret = dict_foreach (xdata, _posix_remove_xattr, &filler); + if (op_ret) { + op_errno = filler.op_errno; + } + + goto out; + } + op_ret = sys_lremovexattr (real_path, name); if (op_ret == -1) { op_errno = errno; @@ -3130,6 +3644,159 @@ __add_long_array (int64_t *dest, int64_t *src, int count) } } +static int +_posix_handle_xattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) +{ + int size = 0; + int count = 0; + int op_ret = 0; + int op_errno = 0; + gf_xattrop_flags_t optype = 0; + char *array = NULL; + inode_t *inode = NULL; + xlator_t *this = NULL; + posix_xattr_filler_t *filler = NULL; + + filler = tmp; + + optype = (gf_xattrop_flags_t)(filler->flags); + this = filler->this; + inode = filler->inode; + + count = v->len; + array = GF_CALLOC (count, sizeof (char), gf_posix_mt_char); + + LOCK (&inode->lock); + { + if (filler->real_path) { + size = sys_lgetxattr (filler->real_path, k, + (char *)array, v->len); + } else { + size = sys_fgetxattr (filler->fd, k, (char *)array, + v->len); + } + + op_errno = errno; + if ((size == -1) && (op_errno != ENODATA) && + (op_errno != ENOATTR)) { + if (op_errno == ENOTSUP) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, + this->name, GF_LOG_WARNING, + "Extended attributes not " + "supported by filesystem"); + } else if (op_errno != ENOENT || + !posix_special_xattr (marker_xattrs, + k)) { + if (filler->real_path) + gf_log (this->name, GF_LOG_ERROR, + "getxattr failed on %s while doing " + "xattrop: Key:%s (%s)", + filler->real_path, + k, strerror (op_errno)); + else + gf_log (this->name, GF_LOG_ERROR, + "fgetxattr failed on fd=%d while doing " + "xattrop: Key:%s (%s)", + filler->fd, + k, strerror (op_errno)); + } + + op_ret = -1; + goto unlock; + } + + switch (optype) { + + case GF_XATTROP_ADD_ARRAY: + __add_array ((int32_t *) array, (int32_t *) v->data, + v->len / 4); + break; + + case GF_XATTROP_ADD_ARRAY64: + __add_long_array ((int64_t *) array, (int64_t *) v->data, + v->len / 8); + break; + + case GF_XATTROP_OR_ARRAY: + __or_array ((int32_t *) array, + (int32_t *) v->data, + v->len / 4); + break; + + case GF_XATTROP_AND_ARRAY: + __and_array ((int32_t *) array, + (int32_t *) v->data, + v->len / 4); + break; + + default: + gf_log (this->name, GF_LOG_ERROR, + "Unknown xattrop type (%d) on %s. Please send " + "a bug report to gluster-devel@nongnu.org", + optype, filler->real_path); + op_ret = -1; + op_errno = EINVAL; + goto unlock; + } + + if (filler->real_path) { + size = sys_lsetxattr (filler->real_path, k, array, + v->len, 0); + } else { + size = sys_fsetxattr (filler->fd, k, (char *)array, + v->len, 0); + } + } +unlock: + UNLOCK (&inode->lock); + + if (op_ret == -1) + goto out; + + op_errno = errno; + if (size == -1) { + if (filler->real_path) + gf_log (this->name, GF_LOG_ERROR, + "setxattr failed on %s while doing xattrop: " + "key=%s (%s)", filler->real_path, + k, strerror (op_errno)); + else + gf_log (this->name, GF_LOG_ERROR, + "fsetxattr failed on fd=%d while doing xattrop: " + "key=%s (%s)", filler->fd, + k, strerror (op_errno)); + + op_ret = -1; + goto out; + } else { + size = dict_set_bin (d, k, array, v->len); + + if (size != 0) { + if (filler->real_path) + gf_log (this->name, GF_LOG_DEBUG, + "dict_set_bin failed (path=%s): " + "key=%s (%s)", filler->real_path, + k, strerror (-size)); + else + gf_log (this->name, GF_LOG_DEBUG, + "dict_set_bin failed (fd=%d): " + "key=%s (%s)", filler->fd, + k, strerror (-size)); + + op_ret = -1; + op_errno = EINVAL; + goto out; + } + array = NULL; + } + + array = NULL; + +out: + return op_ret; +} + /** * xattrop - xattr operations - for internal use by GlusterFS * @optype: ADD_ARRAY: @@ -3141,32 +3808,24 @@ int do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr) { - char *real_path = NULL; - char *array = NULL; - int size = 0; - int count = 0; - - int op_ret = 0; - int op_errno = 0; - - int ret = 0; - int _fd = -1; - struct posix_fd *pfd = NULL; - - char * path = NULL; - inode_t * inode = NULL; + int op_ret = 0; + int op_errno = 0; + int _fd = -1; + char *real_path = NULL; + struct posix_fd *pfd = NULL; + inode_t *inode = NULL; + posix_xattr_filler_t filler = {0,}; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (xattr, out); VALIDATE_OR_GOTO (this, out); if (fd) { - ret = posix_fd_ctx_get (fd, this, &pfd); - if (ret < 0) { + op_ret = posix_fd_ctx_get (fd, this, &pfd); + if (op_ret < 0) { gf_log (this->name, GF_LOG_WARNING, "failed to get pfd from fd=%p", fd); - op_ret = -1; op_errno = EBADFD; goto out; } @@ -3177,152 +3836,21 @@ do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, MAKE_INODE_HANDLE (real_path, this, loc, NULL); if (real_path) { - path = gf_strdup (real_path); inode = loc->inode; } else if (fd) { inode = fd->inode; } - int _handle_every_keyvalue_pair (dict_t *d, char *k, data_t *v, - void *tmp) - { - - count = v->len; - array = GF_CALLOC (count, sizeof (char), gf_posix_mt_char); - - LOCK (&inode->lock); - { - if (loc) { - size = sys_lgetxattr (real_path, k, - (char *)array, v->len); - } else { - size = sys_fgetxattr (_fd, k, (char *)array, - v->len); - } - - op_errno = errno; - if ((size == -1) && (op_errno != ENODATA) && - (op_errno != ENOATTR)) { - if (op_errno == ENOTSUP) { - GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, - this->name,GF_LOG_WARNING, - "Extended attributes not " - "supported by filesystem"); - } else if (op_errno != ENOENT || - !posix_special_xattr (marker_xattrs, - k)) { - if (loc) - gf_log (this->name, GF_LOG_ERROR, - "getxattr failed on %s while doing " - "xattrop: Key:%s (%s)", path, - k, strerror (op_errno)); - else - gf_log (this->name, GF_LOG_ERROR, - "fgetxattr failed on fd=%d while doing " - "xattrop: Key:%s (%s)", _fd, - k, strerror (op_errno)); - } - - op_ret = -1; - goto unlock; - } - - switch (optype) { - - case GF_XATTROP_ADD_ARRAY: - __add_array ((int32_t *) array, (int32_t *) v->data, - v->len / 4); - break; - - case GF_XATTROP_ADD_ARRAY64: - __add_long_array ((int64_t *) array, (int64_t *) v->data, - v->len / 8); - break; - - case GF_XATTROP_OR_ARRAY: - __or_array ((int32_t *) array, - (int32_t *) v->data, - v->len / 4); - break; - - case GF_XATTROP_AND_ARRAY: - __and_array ((int32_t *) array, - (int32_t *) v->data, - v->len / 4); - break; - - default: - gf_log (this->name, GF_LOG_ERROR, - "Unknown xattrop type (%d) on %s. Please send " - "a bug report to gluster-devel@nongnu.org", - optype, path); - op_ret = -1; - op_errno = EINVAL; - goto unlock; - } - - if (loc) { - size = sys_lsetxattr (real_path, k, array, - v->len, 0); - } else { - size = sys_fsetxattr (_fd, k, (char *)array, - v->len, 0); - } - } - unlock: - UNLOCK (&inode->lock); - - if (op_ret == -1) - goto out; - - op_errno = errno; - if (size == -1) { - if (loc) - gf_log (this->name, GF_LOG_ERROR, - "setxattr failed on %s while doing xattrop: " - "key=%s (%s)", path, - k, strerror (op_errno)); - else - gf_log (this->name, GF_LOG_ERROR, - "fsetxattr failed on fd=%d while doing xattrop: " - "key=%s (%s)", _fd, - k, strerror (op_errno)); - - op_ret = -1; - goto out; - } else { - size = dict_set_bin (xattr, k, array, v->len); - - if (size != 0) { - if (loc) - gf_log (this->name, GF_LOG_DEBUG, - "dict_set_bin failed (path=%s): " - "key=%s (%s)", path, - k, strerror (-size)); - else - gf_log (this->name, GF_LOG_DEBUG, - "dict_set_bin failed (fd=%d): " - "key=%s (%s)", _fd, - k, strerror (-size)); - - op_ret = -1; - op_errno = EINVAL; - goto out; - } - array = NULL; - } + filler.this = this; + filler.fd = _fd; + filler.real_path = real_path; + filler.flags = (int)optype; + filler.inode = inode; - array = NULL; - - out: - return op_ret; - } - op_ret = dict_foreach (xattr, _handle_every_keyvalue_pair, NULL); + op_ret = dict_foreach (xattr, _posix_handle_xattr_keyvalue_pair, + &filler); out: - GF_FREE (array); - - GF_FREE (path); STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr, NULL); return 0; @@ -3727,6 +4255,8 @@ posix_readdirp_fill (xlator_t *this, fd_t *fd, gf_dirent_t *entries, dict_t *dic struct iatt stbuf = {0, }; uuid_t gfid; + if (list_empty(&entries->list)) + return 0; itable = fd->inode->table; @@ -3815,8 +4345,23 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this, */ ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs); - count = posix_fill_readdir (fd, dir, off, size, &entries, this, - skip_dirs); + LOCK (&fd->lock); + { + /* posix_fill_readdir performs multiple separate individual + readdir() calls to fill up the buffer. + + In case of NFS where the same anonymous FD is shared between + different applications, reading a common directory can + result in the anonymous fd getting re-used unsafely between + the two readdir requests (in two different io-threads). + + It would also help, in the future, to replace the loop + around readdir() with a single large getdents() call. + */ + count = posix_fill_readdir (fd, dir, off, size, &entries, this, + skip_dirs); + } + UNLOCK (&fd->lock); /* pick ENOENT to indicate EOF */ op_errno = errno; @@ -4019,6 +4564,27 @@ posix_set_owner (xlator_t *this, uid_t uid, gid_t gid) return ret; } + +static int +set_batch_fsync_mode (struct posix_private *priv, const char *str) +{ + if (strcmp (str, "none") == 0) + priv->batch_fsync_mode = BATCH_NONE; + else if (strcmp (str, "syncfs") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS; + else if (strcmp (str, "syncfs-single-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC; + else if (strcmp (str, "syncfs-reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC; + else if (strcmp (str, "reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_REVERSE_FSYNC; + else + return -1; + + return 0; +} + + int reconfigure (xlator_t *this, dict_t *options) { @@ -4026,6 +4592,7 @@ reconfigure (xlator_t *this, dict_t *options) struct posix_private *priv = NULL; uid_t uid = -1; gid_t gid = -1; + char *batch_fsync_mode_str = NULL; priv = this->private; @@ -4033,6 +4600,18 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("brick-gid", gid, options, uint32, out); posix_set_owner (this, uid, gid); + GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec, + options, uint32, out); + + GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str, + options, str, out); + + if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) { + gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s", + batch_fsync_mode_str); + goto out; + } + GF_OPTION_RECONF ("linux-aio", priv->aio_configured, options, bool, out); @@ -4041,6 +4620,20 @@ reconfigure (xlator_t *this, dict_t *options) else posix_aio_off (this); + GF_OPTION_RECONF ("node-uuid-pathinfo", priv->node_uuid_pathinfo, + options, bool, out); + + if (priv->node_uuid_pathinfo && + (uuid_is_null (priv->glusterd_uuid))) { + gf_log (this->name, GF_LOG_INFO, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to <hostname>:<export>"); + } + + GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval, + options, uint32, out); + posix_spawn_health_check_thread (this); + ret = 0; out: return ret; @@ -4070,6 +4663,7 @@ init (xlator_t *this) char *guuid = NULL; uid_t uid = -1; gid_t gid = -1; + char *batch_fsync_mode_str; dir_data = dict_get (this->options, "directory"); @@ -4222,7 +4816,7 @@ init (xlator_t *this) } } - size = sys_lgetxattr (dir_data->data, "system.posix_acl_access", + size = sys_lgetxattr (dir_data->data, POSIX_ACL_ACCESS_XATTR, NULL, 0); if ((size < 0) && (errno == ENOTSUP)) gf_log (this->name, GF_LOG_WARNING, @@ -4402,11 +4996,48 @@ init (xlator_t *this) } } + GF_OPTION_INIT ("node-uuid-pathinfo", + _private->node_uuid_pathinfo, bool, out); + if (_private->node_uuid_pathinfo && + (uuid_is_null (_private->glusterd_uuid))) { + gf_log (this->name, GF_LOG_INFO, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to <hostname>:<export>"); + } + + _private->health_check_active = _gf_false; + GF_OPTION_INIT ("health-check-interval", + _private->health_check_interval, uint32, out); + if (_private->health_check_interval) + posix_spawn_health_check_thread (this); + pthread_mutex_init (&_private->janitor_lock, NULL); pthread_cond_init (&_private->janitor_cond, NULL); INIT_LIST_HEAD (&_private->janitor_fds); posix_spawn_janitor_thread (this); + + pthread_mutex_init (&_private->fsync_mutex, NULL); + pthread_cond_init (&_private->fsync_cond, NULL); + INIT_LIST_HEAD (&_private->fsyncs); + + ret = gf_thread_create (&_private->fsyncer, NULL, posix_fsyncer, this); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "fsyncer thread" + " creation failed (%s)", strerror (errno)); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out); + + if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) { + gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s", + batch_fsync_mode_str); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, + uint32, out); out: return ret; } @@ -4472,6 +5103,9 @@ struct xlator_fops fops = { .fxattrop = posix_fxattrop, .setattr = posix_setattr, .fsetattr = posix_fsetattr, + .fallocate = _posix_fallocate, + .discard = posix_discard, + .zerofill = posix_zerofill, }; struct xlator_cbks cbks = { @@ -4508,12 +5142,51 @@ struct volume_options options[] = { { .key = {"brick-uid"}, .type = GF_OPTION_TYPE_INT, - .description = "Support for setting uid of brick's root" + .min = 0, + .validate = GF_OPT_VALIDATE_MIN, + .description = "Support for setting uid of brick's owner" }, { .key = {"brick-gid"}, .type = GF_OPTION_TYPE_INT, - .description = "Support for setting gid of brick's root" + .min = 0, + .validate = GF_OPT_VALIDATE_MIN, + .description = "Support for setting gid of brick's owner" }, + { .key = {"node-uuid-pathinfo"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "return glusterd's node-uuid in pathinfo xattr" + " string instead of hostname" + }, + { + .key = {"health-check-interval"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "30", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Interval in seconds for a filesystem health check, " + "set to 0 to disable" + }, + { .key = {"batch-fsync-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "reverse-fsync", + .description = "Possible values:\n" + "\t- syncfs: Perform one syncfs() on behalf oa batch" + "of fsyncs.\n" + "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch" + " of fsyncs and one fsync() per batch.\n" + "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch" + " of fsyncs and fsync() each file in the batch in reverse order.\n" + " in reverse order.\n" + "\t- reverse-fsync: Perform fsync() of each file in the batch in" + " reverse order." + }, + { .key = {"batch-fsync-delay-usec"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + .description = "Num of usecs to wait for aggregating fsync" + " requests", + }, { .key = {NULL} } }; |
