summaryrefslogtreecommitdiffstats
path: root/xlators/storage/posix/src/posix.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/storage/posix/src/posix.c')
-rw-r--r--xlators/storage/posix/src/posix.c1278
1 files changed, 1031 insertions, 247 deletions
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index e2f1bbd5f..fb45c7a67 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _CONFIG_H
#define _CONFIG_H
#include "config.h"
@@ -33,6 +23,8 @@
#include <pthread.h>
#include <ftw.h>
#include <sys/stat.h>
+#include <signal.h>
+#include <sys/uio.h>
#ifndef GF_BSD_HOST_OS
#include <alloca.h>
@@ -60,8 +52,10 @@
#include "glusterfs3-xdr.h"
#include "hashfn.h"
#include "posix-aio.h"
+#include "glusterfs-acl.h"
extern char *marker_xattrs[];
+#define ALIGN_SIZE 4096
#undef HAVE_SET_FSID
#ifdef HAVE_SET_FSID
@@ -115,7 +109,6 @@ posix_lookup (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (loc, out);
- VALIDATE_OR_GOTO (loc->path, out);
/* The Hidden directory should be for housekeeping purpose and it
should not get any gfid on it */
@@ -138,7 +131,7 @@ posix_lookup (call_frame_t *frame, xlator_t *this,
MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &buf);
if (uuid_is_null (loc->inode->gfid)) {
- posix_gfid_set (this, real_path, loc, xdata);
+ posix_gfid_heal (this, real_path, loc, xdata);
MAKE_ENTRY_HANDLE (real_path, par_path, this,
loc, &buf);
}
@@ -219,7 +212,8 @@ posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
if (op_ret == -1) {
op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
+ gf_log (this->name, (op_errno == ENOENT)?
+ GF_LOG_DEBUG:GF_LOG_ERROR,
"lstat on %s failed: %s", real_path,
strerror (op_errno));
goto out;
@@ -570,6 +564,289 @@ out:
return 0;
}
+static int32_t
+posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ off_t offset, size_t len, struct iatt *statpre,
+ struct iatt *statpost)
+{
+ struct posix_fd *pfd = NULL;
+ int32_t ret = -1;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ ret = posix_fd_ctx_get (fd, this, &pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ ret = posix_fdstat (this, pfd->fd, statpre);
+ if (ret == -1) {
+ ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fallocate (fstat) failed on fd=%p: %s", fd,
+ strerror (errno));
+ goto out;
+ }
+
+ ret = sys_fallocate(pfd->fd, flags, offset, len);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = posix_fdstat (this, pfd->fd, statpost);
+ if (ret == -1) {
+ ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "fallocate (fstat) failed on fd=%p: %s", fd,
+ strerror (errno));
+ goto out;
+ }
+
+out:
+ SET_TO_OLD_FS_ID ();
+
+ return ret;
+}
+
+char*
+_page_aligned_alloc (size_t size, char **aligned_buf)
+{
+ char *alloc_buf = NULL;
+ char *buf = NULL;
+
+ alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char);
+ if (!alloc_buf)
+ goto out;
+ /* page aligned buffer */
+ buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE);
+ *aligned_buf = buf;
+out:
+ return alloc_buf;
+}
+
+static int32_t
+_posix_do_zerofill(int fd, off_t offset, size_t len, int o_direct)
+{
+ size_t num_vect = 0;
+ int32_t num_loop = 1;
+ int32_t idx = 0;
+ int32_t op_ret = -1;
+ int32_t vect_size = VECTOR_SIZE;
+ size_t remain = 0;
+ size_t extra = 0;
+ struct iovec *vector = NULL;
+ char *iov_base = NULL;
+ char *alloc_buf = NULL;
+
+ if (len == 0)
+ return 0;
+ if (len < VECTOR_SIZE)
+ vect_size = len;
+
+ num_vect = len / (vect_size);
+ remain = len % vect_size ;
+ if (num_vect > MAX_NO_VECT) {
+ extra = num_vect % MAX_NO_VECT;
+ num_loop = num_vect / MAX_NO_VECT;
+ num_vect = MAX_NO_VECT;
+ }
+
+ vector = GF_CALLOC (num_vect, sizeof(struct iovec),
+ gf_common_mt_iovec);
+ if (!vector)
+ return -1;
+ if (o_direct) {
+ alloc_buf = _page_aligned_alloc(vect_size, &iov_base);
+ if (!alloc_buf) {
+ gf_log ("_posix_do_zerofill", GF_LOG_DEBUG,
+ "memory alloc failed, vect_size %d: %s",
+ vect_size, strerror(errno));
+ GF_FREE(vector);
+ return -1;
+ }
+ } else {
+ iov_base = GF_CALLOC (vect_size, sizeof(char),
+ gf_common_mt_char);
+ if (!iov_base) {
+ GF_FREE(vector);
+ return -1;
+ }
+ }
+
+ for (idx = 0; idx < num_vect; idx++) {
+ vector[idx].iov_base = iov_base;
+ vector[idx].iov_len = vect_size;
+ }
+ lseek(fd, offset, SEEK_SET);
+ for (idx = 0; idx < num_loop; idx++) {
+ op_ret = writev(fd, vector, num_vect);
+ if (op_ret < 0)
+ goto err;
+ }
+ if (extra) {
+ op_ret = writev(fd, vector, extra);
+ if (op_ret < 0)
+ goto err;
+ }
+ if (remain) {
+ vector[0].iov_len = remain;
+ op_ret = writev(fd, vector , 1);
+ if (op_ret < 0)
+ goto err;
+ }
+err:
+ if (o_direct)
+ GF_FREE(alloc_buf);
+ else
+ GF_FREE(iov_base);
+ GF_FREE(vector);
+ return op_ret;
+}
+
+static int32_t
+posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, struct iatt *statpre,
+ struct iatt *statpost)
+{
+ struct posix_fd *pfd = NULL;
+ int32_t ret = -1;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID (frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+ VALIDATE_OR_GOTO (fd, out);
+
+ ret = posix_fd_ctx_get (fd, this, &pfd);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ ret = posix_fdstat (this, pfd->fd, statpre);
+ if (ret == -1) {
+ ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "pre-operation fstat failed on fd = %p: %s", fd,
+ strerror (errno));
+ goto out;
+ }
+ ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT);
+ if (ret < 0) {
+ ret = -errno;
+ gf_log(this->name, GF_LOG_ERROR,
+ "zerofill failed on fd %d length %ld %s",
+ pfd->fd, len, strerror(errno));
+ goto out;
+ }
+ if (pfd->flags & (O_SYNC|O_DSYNC)) {
+ ret = fsync (pfd->fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fsync() in writev on fd %d failed: %s",
+ pfd->fd, strerror (errno));
+ ret = -errno;
+ goto out;
+ }
+ }
+
+ ret = posix_fdstat (this, pfd->fd, statpost);
+ if (ret == -1) {
+ ret = -errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "post operation fstat failed on fd=%p: %s", fd,
+ strerror (errno));
+ goto out;
+ }
+
+out:
+ SET_TO_OLD_FS_ID ();
+
+ return ret;
+}
+
+static int32_t
+_posix_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t ret;
+ int32_t flags = 0;
+ struct iatt statpre = {0,};
+ struct iatt statpost = {0,};
+
+ if (keep_size)
+ flags = FALLOC_FL_KEEP_SIZE;
+
+ ret = posix_do_fallocate(frame, this, fd, flags, offset, len,
+ &statpre, &statpost);
+ if (ret < 0)
+ goto err;
+
+ STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int32_t ret;
+ int32_t flags = FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE;
+ struct iatt statpre = {0,};
+ struct iatt statpost = {0,};
+
+ ret = posix_do_fallocate(frame, this, fd, flags, offset, len,
+ &statpre, &statpost);
+ if (ret < 0)
+ goto err;
+
+ STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, NULL);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, NULL);
+ return 0;
+
+}
+
+static int32_t
+posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ struct iatt statpre = {0,};
+ struct iatt statpost = {0,};
+
+ ret = posix_do_zerofill(frame, this, fd, offset, len,
+ &statpre, &statpost);
+ if (ret < 0)
+ goto err;
+
+ STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(zerofill, frame, -1, -ret, NULL, NULL, NULL);
+ return 0;
+
+}
+
int32_t
posix_opendir (call_frame_t *frame, xlator_t *this,
loc_t *loc, fd_t *fd, dict_t *xdata)
@@ -1541,7 +1818,7 @@ posix_link (call_frame_t *frame, xlator_t *this,
/*
* On most systems (Linux being the notable exception), link(2)
* first resolves symlinks. If the target is a directory or
- * is nonexistent, it will fail. linkat(2) operates on the
+ * is nonexistent, it will fail. linkat(2) operates on the
* symlink instead of its target when the AT_SYMLINK_FOLLOW
* flag is not supplied.
*/
@@ -1727,6 +2004,9 @@ posix_create (call_frame_t *frame, xlator_t *this,
goto out;
}
+ if (was_present)
+ goto fill_stat;
+
op_ret = posix_gfid_set (this, real_path, loc, xdata);
if (op_ret) {
gf_log (this->name, GF_LOG_ERROR,
@@ -1757,6 +2037,7 @@ posix_create (call_frame_t *frame, xlator_t *this,
strerror (errno));
}
+fill_stat:
op_ret = posix_fdstat (this, _fd, &stbuf);
if (op_ret == -1) {
op_errno = errno;
@@ -1974,11 +2255,7 @@ posix_readv (call_frame_t *frame, xlator_t *this,
}
/* Hack to notify higher layers of EOF. */
- if (stbuf.ia_size == 0)
- op_errno = ENOENT;
- else if ((offset + vec.iov_len) == stbuf.ia_size)
- op_errno = ENOENT;
- else if (offset > stbuf.ia_size)
+ if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size)
op_errno = ENOENT;
op_ret = vec.iov_len;
@@ -2023,14 +2300,12 @@ err:
return op_ret;
}
-
int32_t
__posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
int odirect)
{
int32_t op_ret = 0;
int idx = 0;
- int align = 4096;
int max_buf_size = 0;
int retval = 0;
char *buf = NULL;
@@ -2046,7 +2321,7 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
max_buf_size = vector[idx].iov_len;
}
- alloc_buf = GF_MALLOC (1 * (max_buf_size + align), gf_posix_mt_char);
+ alloc_buf = _page_aligned_alloc (max_buf_size, &buf);
if (!alloc_buf) {
op_ret = -errno;
goto err;
@@ -2054,9 +2329,6 @@ __posix_writev (int fd, struct iovec *vector, int count, off_t startoff,
internal_off = startoff;
for (idx = 0; idx < count; idx++) {
- /* page aligned buffer */
- buf = GF_ALIGN_BUF (alloc_buf, align);
-
memcpy (buf, vector[idx].iov_base, vector[idx].iov_len);
/* not sure whether writev works on O_DIRECT'd fd */
@@ -2076,6 +2348,48 @@ err:
return op_ret;
}
+dict_t*
+_fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append)
+{
+ dict_t *rsp_xdata = NULL;
+ int32_t ret = 0;
+ inode_t *inode = NULL;
+
+ if (fd)
+ inode = fd->inode;
+
+ if (!fd || !fd->inode || uuid_is_null (fd->inode->gfid)) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid Args: "
+ "fd: %p inode: %p gfid:%s", fd, inode?inode:0,
+ inode?uuid_utoa(inode->gfid):"N/A");
+ goto out;
+ }
+
+ if (!xdata || !dict_get (xdata, GLUSTERFS_OPEN_FD_COUNT))
+ goto out;
+
+ rsp_xdata = dict_new();
+ if (!rsp_xdata)
+ goto out;
+
+ ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_OPEN_FD_COUNT,
+ fd->inode->fd_count);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set "
+ "dictionary value for %s", uuid_utoa (fd->inode->gfid),
+ GLUSTERFS_OPEN_FD_COUNT);
+ }
+
+ ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND,
+ is_append);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING, "%s: Failed to set "
+ "dictionary value for %s", uuid_utoa (fd->inode->gfid),
+ GLUSTERFS_WRITE_IS_APPEND);
+ }
+out:
+ return rsp_xdata;
+}
int32_t
posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
@@ -2090,6 +2404,9 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iatt preop = {0,};
struct iatt postop = {0,};
int ret = -1;
+ dict_t *rsp_xdata = NULL;
+ int is_append = 0;
+ gf_boolean_t locked = _gf_false;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
@@ -2111,6 +2428,17 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
_fd = pfd->fd;
+ if (xdata && dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) {
+ /* The write_is_append check and write must happen
+ atomically. Else another write can overtake this
+ write after the check and get written earlier.
+
+ So lock before preop-stat and unlock after write.
+ */
+ locked = _gf_true;
+ LOCK(&fd->inode->lock);
+ }
+
op_ret = posix_fdstat (this, _fd, &preop);
if (op_ret == -1) {
op_errno = errno;
@@ -2120,8 +2448,19 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
goto out;
}
+ if (locked) {
+ if (preop.ia_size == offset || (fd->flags & O_APPEND))
+ is_append = 1;
+ }
+
op_ret = __posix_writev (_fd, vector, count, offset,
(pfd->flags & O_DIRECT));
+
+ if (locked) {
+ UNLOCK (&fd->inode->lock);
+ locked = _gf_false;
+ }
+
if (op_ret < 0) {
op_errno = -op_ret;
op_ret = -1;
@@ -2137,14 +2476,21 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
UNLOCK (&priv->lock);
if (op_ret >= 0) {
+ rsp_xdata = _fill_writev_xdata (fd, xdata, this, is_append);
/* wiretv successful, we also need to get the stat of
* the file we wrote to
*/
- if (pfd->flushwrites) {
- /* NOTE: ignore the error, if one occurs at this
- * point */
- fsync (_fd);
+ if (flags & (O_SYNC|O_DSYNC)) {
+ ret = fsync (_fd);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "fsync() in writev on fd %d failed: %s",
+ _fd, strerror (errno));
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
}
ret = posix_fdstat (this, _fd, &postop);
@@ -2160,9 +2506,16 @@ posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
out:
+ if (locked) {
+ UNLOCK (&fd->inode->lock);
+ locked = _gf_false;
+ }
+
STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop,
- NULL);
+ rsp_xdata);
+ if (rsp_xdata)
+ dict_unref (rsp_xdata);
return 0;
}
@@ -2289,6 +2642,33 @@ out:
}
+int
+posix_batch_fsync (call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int datasync, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ stub = fop_fsync_stub (frame, default_fsync, fd, datasync, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ pthread_mutex_lock (&priv->fsync_mutex);
+ {
+ list_add_tail (&stub->list, &priv->fsyncs);
+ priv->fsync_queue_count++;
+ pthread_cond_signal (&priv->fsync_cond);
+ }
+ pthread_mutex_unlock (&priv->fsync_mutex);
+
+ return 0;
+}
+
+
int32_t
posix_fsync (call_frame_t *frame, xlator_t *this,
fd_t *fd, int32_t datasync, dict_t *xdata)
@@ -2300,6 +2680,7 @@ posix_fsync (call_frame_t *frame, xlator_t *this,
int ret = -1;
struct iatt preop = {0,};
struct iatt postop = {0,};
+ struct posix_private *priv = NULL;
DECLARE_OLD_FS_ID_VAR;
@@ -2315,6 +2696,12 @@ posix_fsync (call_frame_t *frame, xlator_t *this,
goto out;
#endif
+ priv = this->private;
+ if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) {
+ posix_batch_fsync (frame, this, fd, datasync, xdata);
+ return 0;
+ }
+
ret = posix_fd_ctx_get (fd, this, &pfd);
if (ret < 0) {
op_errno = -ret;
@@ -2376,6 +2763,17 @@ out:
}
static int gf_posix_xattr_enotsup_log;
+static int
+_handle_setxattr_keyvalue_pair (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = tmp;
+
+ return posix_handle_pair (filler->this, filler->real_path, k, v,
+ filler->flags);
+}
int32_t
posix_setxattr (call_frame_t *frame, xlator_t *this,
@@ -2384,8 +2782,8 @@ posix_setxattr (call_frame_t *frame, xlator_t *this,
int32_t op_ret = -1;
int32_t op_errno = 0;
char * real_path = NULL;
- data_pair_t * trav = NULL;
- int ret = -1;
+
+ posix_xattr_filler_t filler = {0,};
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID (frame->root->uid, frame->root->gid);
@@ -2400,18 +2798,13 @@ posix_setxattr (call_frame_t *frame, xlator_t *this,
op_ret = -1;
dict_del (dict, GFID_XATTR_KEY);
- trav = dict->members_list;
-
- while (trav) {
- ret = posix_handle_pair (this, real_path, trav, flags);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- trav = trav->next;
- }
-
- op_ret = 0;
+ filler.real_path = real_path;
+ filler.this = this;
+ filler.flags = flags;
+ op_ret = dict_foreach (dict, _handle_setxattr_keyvalue_pair,
+ &filler);
+ if (op_ret < 0)
+ op_errno = -op_ret;
out:
SET_TO_OLD_FS_ID ();
@@ -2421,6 +2814,53 @@ out:
return 0;
}
+
+int
+posix_xattr_get_real_filename (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *dict, dict_t *xdata)
+{
+ char *real_path = NULL;
+ struct dirent *dirent = NULL;
+ DIR *fd = NULL;
+ const char *fname = NULL;
+ char *found = NULL;
+ int ret = -1;
+ int op_ret = -1;
+
+ MAKE_INODE_HANDLE (real_path, this, loc, NULL);
+
+ fd = opendir (real_path);
+ if (!fd)
+ return -errno;
+
+ fname = key + strlen (GF_XATTR_GET_REAL_FILENAME_KEY);
+
+ while ((dirent = readdir (fd))) {
+ if (strcasecmp (dirent->d_name, fname) == 0) {
+ found = gf_strdup (dirent->d_name);
+ if (!found) {
+ closedir (fd);
+ return -ENOMEM;
+ }
+ break;
+ }
+ }
+
+ closedir (fd);
+
+ if (!found)
+ return -ENOENT;
+
+ ret = dict_set_dynstr (dict, (char *)key, found);
+ if (ret) {
+ GF_FREE (found);
+ return -ENOMEM;
+ }
+ ret = strlen (found) + 1;
+
+ return ret;
+}
+
/**
* posix_getxattr - this function returns a dictionary with all the
* key:value pair present as xattr. used for
@@ -2475,9 +2915,29 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
dict = dict_new ();
if (!dict) {
+ op_errno = ENOMEM;
goto out;
}
+ if (loc->inode && name &&
+ (strncmp (name, GF_XATTR_GET_REAL_FILENAME_KEY,
+ strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) {
+ ret = posix_xattr_get_real_filename (frame, this, loc,
+ name, dict, xdata);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ gf_log (this->name, (op_errno == ENOENT) ?
+ GF_LOG_DEBUG : GF_LOG_WARNING,
+ "Failed to get real filename (%s, %s): %s",
+ loc->path, name, strerror (op_errno));
+ goto out;
+ }
+
+ size = ret;
+ goto done;
+ }
+
if (loc->inode && name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) {
if (!list_empty (&loc->inode->fd_list)) {
ret = dict_set_uint32 (dict, (char *)name, 1);
@@ -2501,8 +2961,13 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
else
rpath = real_path;
- (void) snprintf (host_buf, 1024, "<POSIX(%s):%s:%s>",
- priv->base_path, priv->hostname, rpath);
+ (void) snprintf (host_buf, 1024,
+ "<POSIX(%s):%s:%s>", priv->base_path,
+ ((priv->node_uuid_pathinfo
+ && !uuid_is_null(priv->glusterd_uuid))
+ ? uuid_utoa (priv->glusterd_uuid)
+ : priv->hostname),
+ rpath);
dyn_rpath = gf_strdup (host_buf);
if (!dyn_rpath) {
@@ -2578,6 +3043,11 @@ posix_getxattr (call_frame_t *frame, xlator_t *this,
"supported (try remounting"
" brick with 'user_xattr' "
"flag)");
+ } else if (op_errno == ENOATTR ||
+ op_errno == ENODATA) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No such attribute:%s for file %s",
+ key, real_path);
} else {
gf_log (this->name, GF_LOG_ERROR,
"getxattr failed on %s: %s (%s)",
@@ -2895,6 +3365,17 @@ out:
return 0;
}
+static int
+_handle_fsetxattr_keyvalue_pair (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = tmp;
+
+ return posix_fhandle_pair (filler->this, filler->fd, k, v,
+ filler->flags);
+}
int32_t
posix_fsetxattr (call_frame_t *frame, xlator_t *this,
@@ -2904,8 +3385,9 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this,
int32_t op_errno = 0;
struct posix_fd * pfd = NULL;
int _fd = -1;
- data_pair_t * trav = NULL;
- int ret = -1;
+ int ret = -1;
+
+ posix_xattr_filler_t filler = {0,};
DECLARE_OLD_FS_ID_VAR;
SET_FS_ID (frame->root->uid, frame->root->gid);
@@ -2926,18 +3408,13 @@ posix_fsetxattr (call_frame_t *frame, xlator_t *this,
dict_del (dict, GFID_XATTR_KEY);
- trav = dict->members_list;
-
- while (trav) {
- ret = posix_fhandle_pair (this, _fd, trav, flags);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- trav = trav->next;
- }
-
- op_ret = 0;
+ filler.fd = _fd;
+ filler.this = this;
+ filler.flags = flags;
+ op_ret = dict_foreach (dict, _handle_fsetxattr_keyvalue_pair,
+ &filler);
+ if (op_ret < 0)
+ op_errno = -op_ret;
out:
SET_TO_OLD_FS_ID ();
@@ -2947,6 +3424,28 @@ out:
return 0;
}
+int
+_posix_remove_xattr (dict_t *dict, char *key, data_t *value, void *data)
+{
+ int32_t op_ret = 0;
+ xlator_t *this = NULL;
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = (posix_xattr_filler_t *) data;
+ this = filler->this;
+
+ op_ret = sys_lremovexattr (filler->real_path, key);
+ if (op_ret == -1) {
+ filler->op_errno = errno;
+ if (errno != ENOATTR && errno != EPERM)
+ gf_log (this->name, GF_LOG_ERROR,
+ "removexattr failed on %s (for %s): %s",
+ filler->real_path, key, strerror (errno));
+ }
+
+ return op_ret;
+}
+
int32_t
posix_removexattr (call_frame_t *frame, xlator_t *this,
@@ -2955,6 +3454,7 @@ posix_removexattr (call_frame_t *frame, xlator_t *this,
int32_t op_ret = -1;
int32_t op_errno = 0;
char * real_path = NULL;
+ posix_xattr_filler_t filler = {0,};
DECLARE_OLD_FS_ID_VAR;
@@ -2970,6 +3470,22 @@ posix_removexattr (call_frame_t *frame, xlator_t *this,
SET_FS_ID (frame->root->uid, frame->root->gid);
+ /**
+ * sending an empty key name with xdata containing the
+ * list of key(s) to be removed implies "bulk remove request"
+ * for removexattr.
+ */
+ if (name && (strcmp (name, "") == 0) && xdata) {
+ filler.real_path = real_path;
+ filler.this = this;
+ op_ret = dict_foreach (xdata, _posix_remove_xattr, &filler);
+ if (op_ret) {
+ op_errno = filler.op_errno;
+ }
+
+ goto out;
+ }
+
op_ret = sys_lremovexattr (real_path, name);
if (op_ret == -1) {
op_errno = errno;
@@ -2997,7 +3513,6 @@ posix_fremovexattr (call_frame_t *frame, xlator_t *this,
int32_t op_errno = 0;
struct posix_fd * pfd = NULL;
int _fd = -1;
- uint64_t tmp_pfd = 0;
int ret = -1;
DECLARE_OLD_FS_ID_VAR;
@@ -3008,15 +3523,13 @@ posix_fremovexattr (call_frame_t *frame, xlator_t *this,
goto out;
}
- ret = fd_ctx_get (fd, this, &tmp_pfd);
+ ret = posix_fd_ctx_get (fd, this, &pfd);
if (ret < 0) {
op_errno = -ret;
gf_log (this->name, GF_LOG_WARNING,
"pfd is NULL from fd=%p", fd);
goto out;
}
- pfd = (struct posix_fd *)(long)tmp_pfd;
-
_fd = pfd->fd;
@@ -3094,9 +3607,31 @@ posix_print_xattr (dict_t *this,
static void
__add_array (int32_t *dest, int32_t *src, int count)
{
+ int i = 0;
+ int32_t destval = 0;
+ for (i = 0; i < count; i++) {
+ destval = ntoh32 (dest[i]);
+ if (destval == 0xffffffff)
+ continue;
+ dest[i] = hton32 (destval + ntoh32 (src[i]));
+ }
+}
+
+static void
+__or_array (int32_t *dest, int32_t *src, int count)
+{
int i = 0;
for (i = 0; i < count; i++) {
- dest[i] = hton32 (ntoh32 (dest[i]) + ntoh32 (src[i]));
+ dest[i] = hton32 (ntoh32 (dest[i]) | ntoh32 (src[i]));
+ }
+}
+
+static void
+__and_array (int32_t *dest, int32_t *src, int count)
+{
+ int i = 0;
+ for (i = 0; i < count; i++) {
+ dest[i] = hton32 (ntoh32 (dest[i]) & ntoh32 (src[i]));
}
}
@@ -3109,6 +3644,159 @@ __add_long_array (int64_t *dest, int64_t *src, int count)
}
}
+static int
+_posix_handle_xattr_keyvalue_pair (dict_t *d, char *k, data_t *v,
+ void *tmp)
+{
+ int size = 0;
+ int count = 0;
+ int op_ret = 0;
+ int op_errno = 0;
+ gf_xattrop_flags_t optype = 0;
+ char *array = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = tmp;
+
+ optype = (gf_xattrop_flags_t)(filler->flags);
+ this = filler->this;
+ inode = filler->inode;
+
+ count = v->len;
+ array = GF_CALLOC (count, sizeof (char), gf_posix_mt_char);
+
+ LOCK (&inode->lock);
+ {
+ if (filler->real_path) {
+ size = sys_lgetxattr (filler->real_path, k,
+ (char *)array, v->len);
+ } else {
+ size = sys_fgetxattr (filler->fd, k, (char *)array,
+ v->len);
+ }
+
+ op_errno = errno;
+ if ((size == -1) && (op_errno != ENODATA) &&
+ (op_errno != ENOATTR)) {
+ if (op_errno == ENOTSUP) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
+ this->name, GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported by filesystem");
+ } else if (op_errno != ENOENT ||
+ !posix_special_xattr (marker_xattrs,
+ k)) {
+ if (filler->real_path)
+ gf_log (this->name, GF_LOG_ERROR,
+ "getxattr failed on %s while doing "
+ "xattrop: Key:%s (%s)",
+ filler->real_path,
+ k, strerror (op_errno));
+ else
+ gf_log (this->name, GF_LOG_ERROR,
+ "fgetxattr failed on fd=%d while doing "
+ "xattrop: Key:%s (%s)",
+ filler->fd,
+ k, strerror (op_errno));
+ }
+
+ op_ret = -1;
+ goto unlock;
+ }
+
+ switch (optype) {
+
+ case GF_XATTROP_ADD_ARRAY:
+ __add_array ((int32_t *) array, (int32_t *) v->data,
+ v->len / 4);
+ break;
+
+ case GF_XATTROP_ADD_ARRAY64:
+ __add_long_array ((int64_t *) array, (int64_t *) v->data,
+ v->len / 8);
+ break;
+
+ case GF_XATTROP_OR_ARRAY:
+ __or_array ((int32_t *) array,
+ (int32_t *) v->data,
+ v->len / 4);
+ break;
+
+ case GF_XATTROP_AND_ARRAY:
+ __and_array ((int32_t *) array,
+ (int32_t *) v->data,
+ v->len / 4);
+ break;
+
+ default:
+ gf_log (this->name, GF_LOG_ERROR,
+ "Unknown xattrop type (%d) on %s. Please send "
+ "a bug report to gluster-devel@nongnu.org",
+ optype, filler->real_path);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unlock;
+ }
+
+ if (filler->real_path) {
+ size = sys_lsetxattr (filler->real_path, k, array,
+ v->len, 0);
+ } else {
+ size = sys_fsetxattr (filler->fd, k, (char *)array,
+ v->len, 0);
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+
+ if (op_ret == -1)
+ goto out;
+
+ op_errno = errno;
+ if (size == -1) {
+ if (filler->real_path)
+ gf_log (this->name, GF_LOG_ERROR,
+ "setxattr failed on %s while doing xattrop: "
+ "key=%s (%s)", filler->real_path,
+ k, strerror (op_errno));
+ else
+ gf_log (this->name, GF_LOG_ERROR,
+ "fsetxattr failed on fd=%d while doing xattrop: "
+ "key=%s (%s)", filler->fd,
+ k, strerror (op_errno));
+
+ op_ret = -1;
+ goto out;
+ } else {
+ size = dict_set_bin (d, k, array, v->len);
+
+ if (size != 0) {
+ if (filler->real_path)
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_set_bin failed (path=%s): "
+ "key=%s (%s)", filler->real_path,
+ k, strerror (-size));
+ else
+ gf_log (this->name, GF_LOG_DEBUG,
+ "dict_set_bin failed (fd=%d): "
+ "key=%s (%s)", filler->fd,
+ k, strerror (-size));
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ array = NULL;
+ }
+
+ array = NULL;
+
+out:
+ return op_ret;
+}
+
/**
* xattrop - xattr operations - for internal use by GlusterFS
* @optype: ADD_ARRAY:
@@ -3120,36 +3808,24 @@ int
do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
gf_xattrop_flags_t optype, dict_t *xattr)
{
- char *real_path = NULL;
- char *array = NULL;
- int size = 0;
- int count = 0;
-
- int op_ret = 0;
- int op_errno = 0;
-
- int ret = 0;
- int _fd = -1;
- struct posix_fd *pfd = NULL;
-
- data_pair_t *trav = NULL;
-
- char * path = NULL;
- inode_t * inode = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+ int _fd = -1;
+ char *real_path = NULL;
+ struct posix_fd *pfd = NULL;
+ inode_t *inode = NULL;
+ posix_xattr_filler_t filler = {0,};
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (xattr, out);
VALIDATE_OR_GOTO (this, out);
- trav = xattr->members_list;
-
if (fd) {
- ret = posix_fd_ctx_get (fd, this, &pfd);
- if (ret < 0) {
+ op_ret = posix_fd_ctx_get (fd, this, &pfd);
+ if (op_ret < 0) {
gf_log (this->name, GF_LOG_WARNING,
"failed to get pfd from fd=%p",
fd);
- op_ret = -1;
op_errno = EBADFD;
goto out;
}
@@ -3160,136 +3836,21 @@ do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
MAKE_INODE_HANDLE (real_path, this, loc, NULL);
if (real_path) {
- path = gf_strdup (real_path);
inode = loc->inode;
} else if (fd) {
inode = fd->inode;
}
- while (trav && inode) {
- count = trav->value->len;
- array = GF_CALLOC (count, sizeof (char),
- gf_posix_mt_char);
-
- LOCK (&inode->lock);
- {
- if (loc) {
- size = sys_lgetxattr (real_path, trav->key, (char *)array,
- trav->value->len);
- } else {
- size = sys_fgetxattr (_fd, trav->key, (char *)array,
- trav->value->len);
- }
-
- op_errno = errno;
- if ((size == -1) && (op_errno != ENODATA) &&
- (op_errno != ENOATTR)) {
- if (op_errno == ENOTSUP) {
- GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log,
- this->name,GF_LOG_WARNING,
- "Extended attributes not "
- "supported by filesystem");
- } else if (op_errno != ENOENT ||
- !posix_special_xattr (marker_xattrs,
- trav->key)) {
- if (loc)
- gf_log (this->name, GF_LOG_ERROR,
- "getxattr failed on %s while doing "
- "xattrop: Key:%s (%s)", path,
- trav->key, strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_ERROR,
- "fgetxattr failed on fd=%d while doing "
- "xattrop: Key:%s (%s)", _fd,
- trav->key, strerror (op_errno));
- }
-
- op_ret = -1;
- goto unlock;
- }
-
- switch (optype) {
-
- case GF_XATTROP_ADD_ARRAY:
- __add_array ((int32_t *) array, (int32_t *) trav->value->data,
- trav->value->len / 4);
- break;
-
- case GF_XATTROP_ADD_ARRAY64:
- __add_long_array ((int64_t *) array, (int64_t *) trav->value->data,
- trav->value->len / 8);
- break;
-
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "Unknown xattrop type (%d) on %s. Please send "
- "a bug report to gluster-devel@nongnu.org",
- optype, path);
- op_ret = -1;
- op_errno = EINVAL;
- goto unlock;
- }
-
- if (loc) {
- size = sys_lsetxattr (real_path, trav->key, array,
- trav->value->len, 0);
- } else {
- size = sys_fsetxattr (_fd, trav->key, (char *)array,
- trav->value->len, 0);
- }
- }
- unlock:
- UNLOCK (&inode->lock);
-
- if (op_ret == -1)
- goto out;
-
- op_errno = errno;
- if (size == -1) {
- if (loc)
- gf_log (this->name, GF_LOG_ERROR,
- "setxattr failed on %s while doing xattrop: "
- "key=%s (%s)", path,
- trav->key, strerror (op_errno));
- else
- gf_log (this->name, GF_LOG_ERROR,
- "fsetxattr failed on fd=%d while doing xattrop: "
- "key=%s (%s)", _fd,
- trav->key, strerror (op_errno));
-
- op_ret = -1;
- goto out;
- } else {
- size = dict_set_bin (xattr, trav->key, array,
- trav->value->len);
-
- if (size != 0) {
- if (loc)
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_set_bin failed (path=%s): "
- "key=%s (%s)", path,
- trav->key, strerror (-size));
- else
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_set_bin failed (fd=%d): "
- "key=%s (%s)", _fd,
- trav->key, strerror (-size));
+ filler.this = this;
+ filler.fd = _fd;
+ filler.real_path = real_path;
+ filler.flags = (int)optype;
+ filler.inode = inode;
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
- array = NULL;
- }
-
- array = NULL;
- trav = trav->next;
- }
+ op_ret = dict_foreach (xattr, _posix_handle_xattr_keyvalue_pair,
+ &filler);
out:
- GF_FREE (array);
-
- GF_FREE (path);
STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr, NULL);
return 0;
@@ -3547,7 +4108,7 @@ posix_fentrylk (call_frame_t *frame, xlator_t *this,
int
posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size,
- gf_dirent_t *entries)
+ gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs)
{
off_t in_case = -1;
size_t filled = 0;
@@ -3557,6 +4118,18 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size,
int32_t this_size = -1;
gf_dirent_t *this_entry = NULL;
uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
+ struct stat stbuf = {0,};
+ char *hpath = NULL;
+ int len = 0;
+ int ret = 0;
+
+ if (skip_dirs) {
+ len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0);
+ hpath = alloca (len + 256); /* NAME_MAX */
+ posix_handle_path (this, fd->inode->gfid, NULL, hpath, len);
+ len = strlen (hpath);
+ hpath[len] = '/';
+ }
if (!off) {
rewinddir (dir);
@@ -3607,6 +4180,17 @@ posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size,
continue;
}
+ if (skip_dirs) {
+ if (DT_ISDIR (entry->d_type)) {
+ continue;
+ } else if (hpath) {
+ strcpy (&hpath[len+1],entry->d_name);
+ ret = lstat (hpath, &stbuf);
+ if (!ret && S_ISDIR (stbuf.st_mode))
+ continue;
+ }
+ }
+
this_size = max (sizeof (gf_dirent_t),
sizeof (gfs3_dirplist))
+ strlen (entry->d_name) + 1;
@@ -3671,6 +4255,8 @@ posix_readdirp_fill (xlator_t *this, fd_t *fd, gf_dirent_t *entries, dict_t *dic
struct iatt stbuf = {0, };
uuid_t gfid;
+ if (list_empty(&entries->list))
+ return 0;
itable = fd->inode->table;
@@ -3728,6 +4314,7 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this,
int32_t op_ret = -1;
int32_t op_errno = 0;
gf_dirent_t entries;
+ int32_t skip_dirs = 0;
VALIDATE_OR_GOTO (frame, out);
@@ -3753,7 +4340,28 @@ posix_do_readdir (call_frame_t *frame, xlator_t *this,
goto out;
}
- count = posix_fill_readdir (fd, dir, off, size, &entries);
+ /* When READDIR_FILTER option is set to on, we can filter out
+ * directory's entry from the entry->list.
+ */
+ ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs);
+
+ LOCK (&fd->lock);
+ {
+ /* posix_fill_readdir performs multiple separate individual
+ readdir() calls to fill up the buffer.
+
+ In case of NFS where the same anonymous FD is shared between
+ different applications, reading a common directory can
+ result in the anonymous fd getting re-used unsafely between
+ the two readdir requests (in two different io-threads).
+
+ It would also help, in the future, to replace the loop
+ around readdir() with a single large getdents() call.
+ */
+ count = posix_fill_readdir (fd, dir, off, size, &entries, this,
+ skip_dirs);
+ }
+ UNLOCK (&fd->lock);
/* pick ENOENT to indicate EOF */
op_errno = errno;
@@ -3828,23 +4436,26 @@ int32_t
posix_rchecksum (call_frame_t *frame, xlator_t *this,
fd_t *fd, off_t offset, int32_t len, dict_t *xdata)
{
- char *buf = NULL;
- int _fd = -1;
- struct posix_fd *pfd = NULL;
- int op_ret = -1;
- int op_errno = 0;
- int ret = 0;
- int32_t weak_checksum = 0;
- unsigned char strong_checksum[MD5_DIGEST_LENGTH];
+ char *alloc_buf = NULL;
+ char *buf = NULL;
+ int _fd = -1;
+ struct posix_fd *pfd = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+ int ret = 0;
+ int32_t weak_checksum = 0;
+ unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0};
+ struct posix_private *priv = NULL;
VALIDATE_OR_GOTO (frame, out);
VALIDATE_OR_GOTO (this, out);
VALIDATE_OR_GOTO (fd, out);
+ priv = this->private;
memset (strong_checksum, 0, MD5_DIGEST_LENGTH);
- buf = GF_CALLOC (1, len, gf_posix_mt_char);
- if (!buf) {
+ alloc_buf = _page_aligned_alloc (len, &buf);
+ if (!alloc_buf) {
op_errno = ENOMEM;
goto out;
}
@@ -3859,15 +4470,25 @@ posix_rchecksum (call_frame_t *frame, xlator_t *this,
_fd = pfd->fd;
- ret = pread (_fd, buf, len, offset);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "pread of %d bytes returned %d (%s)",
- len, ret, strerror (errno));
+ LOCK (&fd->lock);
+ {
+ if (priv->aio_capable && priv->aio_init_done)
+ __posix_fd_set_odirect (fd, pfd, 0, offset, len);
+
+ ret = pread (_fd, buf, len, offset);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "pread of %d bytes returned %d (%s)",
+ len, ret, strerror (errno));
+
+ op_errno = errno;
+ }
- op_errno = errno;
- goto out;
}
+ UNLOCK (&fd->lock);
+
+ if (ret < 0)
+ goto out;
weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) len);
gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) len, (unsigned char *) strong_checksum);
@@ -3877,7 +4498,7 @@ out:
STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno,
weak_checksum, strong_checksum, NULL);
- GF_FREE (buf);
+ GF_FREE (alloc_buf);
return 0;
}
@@ -3926,15 +4547,71 @@ mem_acct_init (xlator_t *this)
return ret;
}
+static int
+posix_set_owner (xlator_t *this, uid_t uid, gid_t gid)
+{
+ struct posix_private *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ ret = sys_chown (priv->base_path, uid, gid);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "Failed to set "
+ "uid/gid for brick path %s, %s",
+ priv->base_path, strerror (errno));
+
+ return ret;
+}
+
+
+static int
+set_batch_fsync_mode (struct posix_private *priv, const char *str)
+{
+ if (strcmp (str, "none") == 0)
+ priv->batch_fsync_mode = BATCH_NONE;
+ else if (strcmp (str, "syncfs") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS;
+ else if (strcmp (str, "syncfs-single-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC;
+ else if (strcmp (str, "syncfs-reverse-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC;
+ else if (strcmp (str, "reverse-fsync") == 0)
+ priv->batch_fsync_mode = BATCH_REVERSE_FSYNC;
+ else
+ return -1;
+
+ return 0;
+}
+
int
reconfigure (xlator_t *this, dict_t *options)
{
int ret = -1;
struct posix_private *priv = NULL;
+ uid_t uid = -1;
+ gid_t gid = -1;
+ char *batch_fsync_mode_str = NULL;
priv = this->private;
+ GF_OPTION_RECONF ("brick-uid", uid, options, uint32, out);
+ GF_OPTION_RECONF ("brick-gid", gid, options, uint32, out);
+ posix_set_owner (this, uid, gid);
+
+ GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec,
+ options, uint32, out);
+
+ GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str,
+ options, str, out);
+
+ if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s",
+ batch_fsync_mode_str);
+ goto out;
+ }
+
GF_OPTION_RECONF ("linux-aio", priv->aio_configured,
options, bool, out);
@@ -3943,6 +4620,20 @@ reconfigure (xlator_t *this, dict_t *options)
else
posix_aio_off (this);
+ GF_OPTION_RECONF ("node-uuid-pathinfo", priv->node_uuid_pathinfo,
+ options, bool, out);
+
+ if (priv->node_uuid_pathinfo &&
+ (uuid_is_null (priv->glusterd_uuid))) {
+ gf_log (this->name, GF_LOG_INFO,
+ "glusterd uuid is NULL, pathinfo xattr would"
+ " fallback to <hostname>:<export>");
+ }
+
+ GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval,
+ options, uint32, out);
+ posix_spawn_health_check_thread (this);
+
ret = 0;
out:
return ret;
@@ -3970,6 +4661,9 @@ init (xlator_t *this)
uuid_t gfid = {0,};
uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1};
char *guuid = NULL;
+ uid_t uid = -1;
+ gid_t gid = -1;
+ char *batch_fsync_mode_str;
dir_data = dict_get (this->options, "directory");
@@ -4062,16 +4756,13 @@ init (xlator_t *this)
goto out;
}
} else if ((size == -1) && (errno == ENODATA)) {
- /* Using the export for first time */
- size = sys_lsetxattr (dir_data->data,
- "trusted.glusterfs.volume-id",
- dict_uuid, 16, 0);
- if (size == -1) {
+
gf_log (this->name, GF_LOG_ERROR,
- "failed to set volume id on export");
+ "Extended attribute trusted.glusterfs."
+ "volume-id is absent");
ret = -1;
goto out;
- }
+
} else if ((size == -1) && (errno != ENODATA)) {
/* Wrong 'volume-id' is set, it should be error */
gf_log (this->name, GF_LOG_WARNING,
@@ -4125,7 +4816,7 @@ init (xlator_t *this)
}
}
- size = sys_lgetxattr (dir_data->data, "system.posix_acl_access",
+ size = sys_lgetxattr (dir_data->data, POSIX_ACL_ACCESS_XATTR,
NULL, 0);
if ((size < 0) && (errno == ENOTSUP))
gf_log (this->name, GF_LOG_WARNING,
@@ -4288,6 +4979,10 @@ init (xlator_t *this)
_private->aio_init_done = _gf_false;
_private->aio_capable = _gf_false;
+ GF_OPTION_INIT ("brick-uid", uid, uint32, out);
+ GF_OPTION_INIT ("brick-gid", gid, uint32, out);
+ posix_set_owner (this, uid, gid);
+
GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out);
if (_private->aio_configured) {
@@ -4301,11 +4996,48 @@ init (xlator_t *this)
}
}
+ GF_OPTION_INIT ("node-uuid-pathinfo",
+ _private->node_uuid_pathinfo, bool, out);
+ if (_private->node_uuid_pathinfo &&
+ (uuid_is_null (_private->glusterd_uuid))) {
+ gf_log (this->name, GF_LOG_INFO,
+ "glusterd uuid is NULL, pathinfo xattr would"
+ " fallback to <hostname>:<export>");
+ }
+
+ _private->health_check_active = _gf_false;
+ GF_OPTION_INIT ("health-check-interval",
+ _private->health_check_interval, uint32, out);
+ if (_private->health_check_interval)
+ posix_spawn_health_check_thread (this);
+
pthread_mutex_init (&_private->janitor_lock, NULL);
pthread_cond_init (&_private->janitor_cond, NULL);
INIT_LIST_HEAD (&_private->janitor_fds);
posix_spawn_janitor_thread (this);
+
+ pthread_mutex_init (&_private->fsync_mutex, NULL);
+ pthread_cond_init (&_private->fsync_cond, NULL);
+ INIT_LIST_HEAD (&_private->fsyncs);
+
+ ret = gf_thread_create (&_private->fsyncer, NULL, posix_fsyncer, this);
+ if (ret) {
+ gf_log (this->name, GF_LOG_ERROR, "fsyncer thread"
+ " creation failed (%s)", strerror (errno));
+ goto out;
+ }
+
+ GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out);
+
+ if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s",
+ batch_fsync_mode_str);
+ goto out;
+ }
+
+ GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec,
+ uint32, out);
out:
return ret;
}
@@ -4371,6 +5103,9 @@ struct xlator_fops fops = {
.fxattrop = posix_fxattrop,
.setattr = posix_setattr,
.fsetattr = posix_fsetattr,
+ .fallocate = _posix_fallocate,
+ .discard = posix_discard,
+ .zerofill = posix_zerofill,
};
struct xlator_cbks cbks = {
@@ -4399,10 +5134,59 @@ struct volume_options options[] = {
{ .key = {"glusterd-uuid"},
.type = GF_OPTION_TYPE_STR },
{
- .key = {"linux-aio"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "Support for native Linux AIO"
+ .key = {"linux-aio"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Support for native Linux AIO"
+ },
+ {
+ .key = {"brick-uid"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Support for setting uid of brick's owner"
+ },
+ {
+ .key = {"brick-gid"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Support for setting gid of brick's owner"
+ },
+ { .key = {"node-uuid-pathinfo"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "return glusterd's node-uuid in pathinfo xattr"
+ " string instead of hostname"
+ },
+ {
+ .key = {"health-check-interval"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = "30",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Interval in seconds for a filesystem health check, "
+ "set to 0 to disable"
+ },
+ { .key = {"batch-fsync-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "reverse-fsync",
+ .description = "Possible values:\n"
+ "\t- syncfs: Perform one syncfs() on behalf oa batch"
+ "of fsyncs.\n"
+ "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch"
+ " of fsyncs and one fsync() per batch.\n"
+ "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch"
+ " of fsyncs and fsync() each file in the batch in reverse order.\n"
+ " in reverse order.\n"
+ "\t- reverse-fsync: Perform fsync() of each file in the batch in"
+ " reverse order."
+ },
+ { .key = {"batch-fsync-delay-usec"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0",
+ .description = "Num of usecs to wait for aggregating fsync"
+ " requests",
},
{ .key = {NULL} }
};