summaryrefslogtreecommitdiffstats
path: root/xlators/storage/posix/src/posix-inode-fd-ops.c
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/storage/posix/src/posix-inode-fd-ops.c')
-rw-r--r--xlators/storage/posix/src/posix-inode-fd-ops.c6004
1 files changed, 6004 insertions, 0 deletions
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
new file mode 100644
index 00000000000..6d54d37e5aa
--- /dev/null
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
@@ -0,0 +1,6004 @@
+/*
+ Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#define __XOPEN_SOURCE 500
+
+/* for SEEK_HOLE and SEEK_DATA */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <openssl/md5.h>
+#include <stdint.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <errno.h>
+#include <libgen.h>
+#include <pthread.h>
+#include <ftw.h>
+#include <sys/stat.h>
+#include <signal.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <regex.h>
+
+#ifndef GF_BSD_HOST_OS
+#include <alloca.h>
+#endif /* GF_BSD_HOST_OS */
+
+#ifdef HAVE_LINKAT
+#include <fcntl.h>
+#endif /* HAVE_LINKAT */
+
+#include <glusterfs/checksum.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include "posix-handle.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/locking.h>
+#include <glusterfs/timer.h>
+#include "glusterfs3-xdr.h"
+#include <glusterfs/glusterfs-acl.h>
+#include "posix-messages.h"
+#include "posix-metadata.h"
+#include <glusterfs/events.h>
+#include "posix-gfid-path.h"
+#include <glusterfs/compat-uuid.h>
+#include <glusterfs/common-utils.h>
+
+extern char *marker_xattrs[];
+#define ALIGN_SIZE 4096
+
+#undef HAVE_SET_FSID
+#ifdef HAVE_SET_FSID
+
+#define DECLARE_OLD_FS_ID_VAR \
+ uid_t old_fsuid; \
+ gid_t old_fsgid;
+
+#define SET_FS_ID(uid, gid) \
+ do { \
+ old_fsuid = setfsuid(uid); \
+ old_fsgid = setfsgid(gid); \
+ } while (0)
+
+#define SET_TO_OLD_FS_ID() \
+ do { \
+ setfsuid(old_fsuid); \
+ setfsgid(old_fsgid); \
+ } while (0)
+
+#else
+
+#define DECLARE_OLD_FS_ID_VAR
+#define SET_FS_ID(uid, gid)
+#define SET_TO_OLD_FS_ID()
+
+#endif
+
+/* Setting microseconds or nanoseconds depending on what's supported:
+ The passed in `tv` can be
+ struct timespec
+ if supported (better, because it supports nanosecond resolution) or
+ struct timeval
+ otherwise. */
+#if HAVE_UTIMENSAT
+#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) tv.tv_nsec = nanosecs
+#else
+#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \
+ tv.tv_usec = nanosecs / 1000
+#endif
+
+static char *disallow_removexattrs[] = {GF_XATTR_VOL_ID_KEY, GFID_XATTR_KEY,
+ NULL};
+
+void
+posix_cs_build_xattr_rsp(xlator_t *this, dict_t **rsp, dict_t *req, int fd,
+ char *loc)
+{
+ int ret = 0;
+ uuid_t uuid;
+
+ if (!dict_get_sizen(req, GF_CS_OBJECT_STATUS))
+ return;
+
+ if (!(*rsp)) {
+ *rsp = dict_new();
+ if (!(*rsp)) {
+ return;
+ }
+ }
+
+ if (fd != -1) {
+ if (dict_get_sizen(req, GF_CS_XATTR_ARCHIVE_UUID)) {
+ ret = sys_fgetxattr(fd, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16);
+ if (ret > 0) {
+ ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid,
+ true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "%s: Failed to set "
+ "dictionary value for %s for fd %d",
+ uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, fd);
+ }
+ } else {
+ gf_msg_debug(this->name, 0, "getxattr failed on %s for fd %d",
+ GF_CS_XATTR_ARCHIVE_UUID, fd);
+ }
+ }
+ } else {
+ if (dict_get_sizen(req, GF_CS_XATTR_ARCHIVE_UUID)) {
+ ret = sys_lgetxattr(loc, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16);
+ if (ret > 0) {
+ ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid,
+ true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "%s: Failed to set "
+ "dictionary value for %s for loc %s",
+ uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, loc);
+ }
+ } else {
+ gf_msg_debug(this->name, 0, "getxattr failed on %s for %s",
+ GF_CS_XATTR_ARCHIVE_UUID, loc);
+ }
+ }
+ }
+ return;
+}
+
+int32_t
+posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ struct iatt buf = {
+ 0,
+ };
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct posix_private *priv = NULL;
+ char *real_path = NULL;
+ dict_t *xattr_rsp = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ MAKE_INODE_HANDLE(real_path, this, loc, &buf);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ if (op_errno == ENOENT) {
+ gf_msg_debug(this->name, 0,
+ "lstat on gfid-handle %s (path: %s)"
+ "failed: %s",
+ real_path ? real_path : "<null>", loc->path,
+ strerror(op_errno));
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_LSTAT_FAILED,
+ "lstat on gfid-handle %s (path: %s) failed",
+ real_path ? real_path : "<null>", loc->path);
+ }
+ goto out;
+ }
+ if (xdata) {
+ xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
+ &buf);
+
+ posix_cs_maintenance(this, NULL, loc, NULL, &buf, real_path, xdata,
+ &xattr_rsp, _gf_true);
+
+ posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, -1, real_path);
+ }
+
+ posix_update_iatt_buf(&buf, -1, real_path, xdata);
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, &buf, xattr_rsp);
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ return 0;
+}
+
+static int
+posix_do_chmod(xlator_t *this, const char *path, struct iatt *stbuf)
+{
+ int32_t ret = -1;
+ mode_t mode = 0;
+ mode_t mode_bit = 0;
+ struct posix_private *priv = NULL;
+ struct stat stat;
+ int is_symlink = 0;
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+ ret = sys_lstat(path, &stat);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_LSTAT_FAILED,
+ "lstat failed: %s", path);
+ goto out;
+ }
+
+ if (S_ISLNK(stat.st_mode))
+ is_symlink = 1;
+
+ if (S_ISDIR(stat.st_mode)) {
+ mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type);
+ mode_bit = (mode & priv->create_directory_mask) |
+ priv->force_directory_mode;
+ mode = posix_override_umask(mode, mode_bit);
+ } else {
+ mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type);
+ mode_bit = (mode & priv->create_mask) | priv->force_create_mode;
+ mode = posix_override_umask(mode, mode_bit);
+ }
+ ret = lchmod(path, mode);
+ if ((ret == -1) && (errno == ENOSYS)) {
+ /* in Linux symlinks are always in mode 0777 and no
+ such call as lchmod exists.
+ */
+ gf_msg_debug(this->name, 0, "%s (%s)", path, strerror(errno));
+ if (is_symlink) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = sys_chmod(path, mode);
+ }
+out:
+ return ret;
+}
+
+static int
+posix_do_chown(xlator_t *this, const char *path, struct iatt *stbuf,
+ int32_t valid)
+{
+ int32_t ret = -1;
+ uid_t uid = -1;
+ gid_t gid = -1;
+
+ if (valid & GF_SET_ATTR_UID)
+ uid = stbuf->ia_uid;
+
+ if (valid & GF_SET_ATTR_GID)
+ gid = stbuf->ia_gid;
+
+ ret = sys_lchown(path, uid, gid);
+
+ return ret;
+}
+
+static int
+posix_do_utimes(xlator_t *this, const char *path, struct iatt *stbuf, int valid)
+{
+ int32_t ret = -1;
+#if defined(HAVE_UTIMENSAT)
+ struct timespec tv[2] = {{
+ 0,
+ },
+ {
+ 0,
+ }};
+#else
+ struct timeval tv[2] = {{
+ 0,
+ },
+ {
+ 0,
+ }};
+#endif
+ struct stat stat;
+ int is_symlink = 0;
+
+ ret = sys_lstat(path, &stat);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, "%s",
+ path);
+ goto out;
+ }
+
+ if (S_ISLNK(stat.st_mode))
+ is_symlink = 1;
+
+ if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) {
+ tv[0].tv_sec = stbuf->ia_atime;
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[0], stbuf->ia_atime_nsec);
+ } else {
+ /* atime is not given, use current values */
+ tv[0].tv_sec = ST_ATIM_SEC(&stat);
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[0], ST_ATIM_NSEC(&stat));
+ }
+
+ if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) {
+ tv[1].tv_sec = stbuf->ia_mtime;
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[1], stbuf->ia_mtime_nsec);
+ } else {
+ /* mtime is not given, use current values */
+ tv[1].tv_sec = ST_MTIM_SEC(&stat);
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[1], ST_MTIM_NSEC(&stat));
+ }
+
+ ret = PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv);
+ if ((ret == -1) && (errno == ENOSYS)) {
+ gf_msg_debug(this->name, 0, "%s (%s)", path, strerror(errno));
+ if (is_symlink) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv);
+ }
+
+out:
+ return ret;
+}
+
+int
+posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = 0;
+ struct iatt statpre = {
+ 0,
+ };
+ struct iatt statpost = {
+ 0,
+ };
+ dict_t *xattr_rsp = NULL;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+ MAKE_INODE_HANDLE(real_path, this, loc, &statpre);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "setattr (lstat) on gfid-handle %s (path: %s) failed",
+ real_path ? real_path : "<null>", loc->path);
+ goto out;
+ }
+
+ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
+ op_ret = posix_do_chown(this, real_path, stbuf, valid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHOWN_FAILED,
+ "setattr (chown) on %s "
+ "failed",
+ real_path);
+ goto out;
+ }
+ }
+
+ if (valid & GF_SET_ATTR_MODE) {
+ op_ret = posix_do_chmod(this, real_path, stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHMOD_FAILED,
+ "setattr (chmod) on gfid-handle %s (path: %s) "
+ "failed",
+ real_path, loc->path);
+ goto out;
+ }
+ }
+
+ if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
+ op_ret = posix_do_utimes(this, real_path, stbuf, valid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UTIMES_FAILED,
+ "setattr (utimes) on gfid-handle %s (path: %s) "
+ "failed",
+ real_path, loc->path);
+ goto out;
+ }
+ posix_update_utime_in_mdata(this, real_path, -1, loc->inode,
+ &frame->root->ctime, stbuf, valid);
+ }
+
+ if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) {
+ posix_update_ctime_in_mdata(this, real_path, -1, loc->inode,
+ &frame->root->ctime, stbuf, valid);
+ }
+
+ if (!valid) {
+ op_ret = sys_lchown(real_path, -1, -1);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED,
+ "lchown (gfid-handle: %s, path: %s, -1, -1) "
+ "failed",
+ real_path, loc->path);
+
+ goto out;
+ }
+ }
+
+ op_ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &statpost,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "setattr (lstat) on gfid-handle %s (path: %s) failed", real_path,
+ loc->path);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, real_path, -1, loc->inode, &statpost);
+
+ if (xdata)
+ xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
+ &statpost);
+ posix_update_iatt_buf(&statpre, -1, real_path, xdata);
+ posix_update_iatt_buf(&statpost, -1, real_path, xdata);
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, &statpre, &statpost,
+ xattr_rsp);
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ return 0;
+}
+
+int32_t
+posix_do_fchown(xlator_t *this, int fd, struct iatt *stbuf, int32_t valid)
+{
+ int ret = -1;
+ uid_t uid = -1;
+ gid_t gid = -1;
+
+ if (valid & GF_SET_ATTR_UID)
+ uid = stbuf->ia_uid;
+
+ if (valid & GF_SET_ATTR_GID)
+ gid = stbuf->ia_gid;
+
+ ret = sys_fchown(fd, uid, gid);
+
+ return ret;
+}
+
+int32_t
+posix_do_fchmod(xlator_t *this, int fd, struct iatt *stbuf)
+{
+ int32_t ret = -1;
+ mode_t mode = 0;
+ mode_t mode_bit = 0;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+ mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type);
+ mode_bit = (mode & priv->create_mask) | priv->force_create_mode;
+ mode = posix_override_umask(mode, mode_bit);
+ ret = sys_fchmod(fd, mode);
+out:
+ return ret;
+}
+
+static int
+posix_do_futimes(xlator_t *this, int fd, struct iatt *stbuf, int valid)
+{
+ int32_t ret = -1;
+ struct timeval tv[2] = {{
+ 0,
+ },
+ {
+ 0,
+ }};
+ struct stat stat = {
+ 0,
+ };
+ gf_boolean_t fstat_executed = _gf_false;
+
+ if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) {
+ tv[0].tv_sec = stbuf->ia_atime;
+ tv[0].tv_usec = stbuf->ia_atime_nsec / 1000;
+ } else {
+ ret = sys_fstat(fd, &stat);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED,
+ "%d", fd);
+ goto out;
+ }
+ fstat_executed = _gf_true;
+ /* atime is not given, use current values */
+ tv[0].tv_sec = ST_ATIM_SEC(&stat);
+ tv[0].tv_usec = ST_ATIM_NSEC(&stat) / 1000;
+ }
+
+ if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) {
+ tv[1].tv_sec = stbuf->ia_mtime;
+ tv[1].tv_usec = stbuf->ia_mtime_nsec / 1000;
+ } else {
+ if (!fstat_executed) {
+ ret = sys_fstat(fd, &stat);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED,
+ "%d", fd);
+ goto out;
+ }
+ }
+ /* mtime is not given, use current values */
+ tv[1].tv_sec = ST_MTIM_SEC(&stat);
+ tv[1].tv_usec = ST_MTIM_NSEC(&stat) / 1000;
+ }
+
+ ret = sys_futimes(fd, tv);
+ if (ret == -1)
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FUTIMES_FAILED, "%d", fd);
+
+out:
+ return ret;
+}
+
+int
+posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct iatt statpre = {
+ 0,
+ };
+ struct iatt statpost = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+ struct posix_fd *pfd = NULL;
+ dict_t *xattr_rsp = NULL;
+ int32_t ret = -1;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ op_ret = posix_fdstat(this, fd->inode, pfd->fd, &statpre);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fsetattr (fstat) failed on fd=%p", fd);
+ goto out;
+ }
+
+ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
+ op_ret = posix_do_fchown(this, pfd->fd, stbuf, valid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHOWN_FAILED,
+ "fsetattr (fchown) failed"
+ " on fd=%p",
+ fd);
+ goto out;
+ }
+ }
+
+ if (valid & GF_SET_ATTR_MODE) {
+ op_ret = posix_do_fchmod(this, pfd->fd, stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHMOD_FAILED,
+ "fsetattr (fchmod) failed"
+ " on fd=%p",
+ fd);
+ goto out;
+ }
+ }
+
+ if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
+ op_ret = posix_do_futimes(this, pfd->fd, stbuf, valid);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FUTIMES_FAILED,
+ "fsetattr (futimes) on "
+ "failed fd=%p",
+ fd);
+ goto out;
+ }
+ posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode,
+ &frame->root->ctime, stbuf, valid);
+ }
+
+ if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) {
+ posix_update_ctime_in_mdata(this, NULL, pfd->fd, fd->inode,
+ &frame->root->ctime, stbuf, valid);
+ }
+
+ if (!valid) {
+ op_ret = sys_fchown(pfd->fd, -1, -1);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHOWN_FAILED,
+ "fchown (%d, -1, -1) failed", pfd->fd);
+
+ goto out;
+ }
+ }
+
+ op_ret = posix_fdstat(this, fd->inode, pfd->fd, &statpost);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fsetattr (fstat) failed on fd=%p", fd);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &statpost);
+
+ if (xdata)
+ xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, pfd->fd, xdata,
+ &statpost);
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, &statpre, &statpost,
+ xattr_rsp);
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ return 0;
+}
+
+static int32_t
+posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ off_t offset, size_t len, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata, dict_t **rsp_xdata)
+{
+ int32_t ret = -1;
+ int32_t op_errno = 0;
+ struct posix_fd *pfd = NULL;
+ gf_boolean_t locked = _gf_false;
+ posix_inode_ctx_t *ctx = NULL;
+ struct posix_private *priv = NULL;
+ gf_boolean_t check_space_error = _gf_false;
+ struct stat statbuf = {
+ 0,
+ };
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ priv = this->private;
+
+ /* fallocate case is special so call posix_disk_space_check separately
+ for every fallocate fop instead of calling posix_disk_space with
+ thread after every 5 sec sleep to working correctly storage.reserve
+ option behaviour
+ */
+ if (priv->disk_reserve)
+ posix_disk_space_check(this);
+
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, unlock);
+
+overwrite:
+ check_space_error = _gf_true;
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ ret = posix_inode_ctx_get_all(fd->inode, this, &ctx);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (xdata && dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) {
+ locked = _gf_true;
+ pthread_mutex_lock(&ctx->write_atomic_lock);
+ }
+
+ ret = posix_fdstat(this, fd->inode, pfd->fd, statpre);
+ if (ret == -1) {
+ ret = -errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fallocate (fstat) failed on fd=%p", fd);
+ goto unlock;
+ }
+
+ if (xdata) {
+ ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, statpre, NULL,
+ xdata, rsp_xdata, _gf_false);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd);
+ ret = -EIO;
+ goto unlock;
+ }
+ }
+
+ ret = sys_fallocate(pfd->fd, flags, offset, len);
+ if (ret == -1) {
+ ret = -errno;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, P_MSG_FALLOCATE_FAILED,
+ "fallocate failed on %s offset: %jd, "
+ "len:%zu, flags: %d",
+ uuid_utoa(fd->inode->gfid), offset, len, flags);
+ goto unlock;
+ }
+
+ ret = posix_fdstat(this, fd->inode, pfd->fd, statpost);
+ if (ret == -1) {
+ ret = -errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fallocate (fstat) failed on fd=%p", fd);
+ goto unlock;
+ }
+
+ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost);
+
+unlock:
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+
+ if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) {
+#ifdef FALLOC_FL_KEEP_SIZE
+ if (flags & FALLOC_FL_KEEP_SIZE) {
+ goto overwrite;
+ }
+#endif
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ if (sys_fstat(pfd->fd, &statbuf) < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED,
+ "%d", pfd->fd);
+ goto out;
+ }
+
+ if (offset + len <= statbuf.st_size) {
+ gf_msg_debug(this->name, 0,
+ "io vector size will not"
+ " change disk size so allow overwrite for"
+ " fd %d",
+ pfd->fd);
+ goto overwrite;
+ }
+ }
+
+out:
+ SET_TO_OLD_FS_ID();
+ if (ret == ENOSPC)
+ ret = -ENOSPC;
+
+ return ret;
+}
+
+char *
+_page_aligned_alloc(size_t size, char **aligned_buf)
+{
+ char *alloc_buf = NULL;
+ char *buf = NULL;
+
+ alloc_buf = GF_CALLOC(1, (size + ALIGN_SIZE), gf_posix_mt_char);
+ if (!alloc_buf)
+ goto out;
+ /* page aligned buffer */
+ buf = GF_ALIGN_BUF(alloc_buf, ALIGN_SIZE);
+ *aligned_buf = buf;
+out:
+ return alloc_buf;
+}
+
+static int32_t
+_posix_do_zerofill(int fd, off_t offset, off_t len, int o_direct)
+{
+ off_t num_vect = 0;
+ off_t num_loop = 1;
+ off_t idx = 0;
+ int32_t op_ret = -1;
+ int32_t vect_size = VECTOR_SIZE;
+ off_t remain = 0;
+ off_t extra = 0;
+ struct iovec *vector = NULL;
+ char *iov_base = NULL;
+ char *alloc_buf = NULL;
+
+ if (len == 0)
+ return 0;
+ if (len < VECTOR_SIZE)
+ vect_size = len;
+
+ num_vect = len / (vect_size);
+ remain = len % vect_size;
+ if (num_vect > MAX_NO_VECT) {
+ extra = num_vect % MAX_NO_VECT;
+ num_loop = num_vect / MAX_NO_VECT;
+ num_vect = MAX_NO_VECT;
+ }
+
+ vector = GF_CALLOC(num_vect, sizeof(struct iovec), gf_common_mt_iovec);
+ if (!vector)
+ return -1;
+ if (o_direct) {
+ alloc_buf = _page_aligned_alloc(vect_size, &iov_base);
+ if (!alloc_buf) {
+ GF_FREE(vector);
+ return -1;
+ }
+ } else {
+ iov_base = GF_CALLOC(vect_size, sizeof(char), gf_common_mt_char);
+ if (!iov_base) {
+ GF_FREE(vector);
+ return -1;
+ }
+ }
+
+ for (idx = 0; idx < num_vect; idx++) {
+ vector[idx].iov_base = iov_base;
+ vector[idx].iov_len = vect_size;
+ }
+ if (sys_lseek(fd, offset, SEEK_SET) < 0) {
+ op_ret = -1;
+ goto err;
+ }
+
+ for (idx = 0; idx < num_loop; idx++) {
+ op_ret = sys_writev(fd, vector, num_vect);
+ if (op_ret < 0)
+ goto err;
+ if (op_ret != (vect_size * num_vect)) {
+ op_ret = -1;
+ errno = ENOSPC;
+ goto err;
+ }
+ }
+ if (extra) {
+ op_ret = sys_writev(fd, vector, extra);
+ if (op_ret < 0)
+ goto err;
+ if (op_ret != (vect_size * extra)) {
+ op_ret = -1;
+ errno = ENOSPC;
+ goto err;
+ }
+ }
+ if (remain) {
+ vector[0].iov_len = remain;
+ op_ret = sys_writev(fd, vector, 1);
+ if (op_ret < 0)
+ goto err;
+ if (op_ret != remain) {
+ op_ret = -1;
+ errno = ENOSPC;
+ goto err;
+ }
+ }
+err:
+ if (o_direct)
+ GF_FREE(alloc_buf);
+ else
+ GF_FREE(iov_base);
+ GF_FREE(vector);
+ return op_ret;
+}
+
+static int32_t
+posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata, dict_t **rsp_xdata)
+{
+ int32_t ret = -1;
+ int32_t op_errno = 0;
+ int32_t flags = 0;
+ struct posix_fd *pfd = NULL;
+ gf_boolean_t locked = _gf_false;
+ posix_inode_ctx_t *ctx = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ ret = posix_inode_ctx_get_all(fd->inode, this, &ctx);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) {
+ locked = _gf_true;
+ pthread_mutex_lock(&ctx->write_atomic_lock);
+ }
+
+ ret = posix_fdstat(this, fd->inode, pfd->fd, statpre);
+ if (ret == -1) {
+ ret = -errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd = %p", fd);
+ goto out;
+ }
+
+ if (xdata) {
+ ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, statpre, NULL,
+ xdata, rsp_xdata, _gf_false);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state "
+ "check failed, fd %p",
+ fd);
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ posix_update_iatt_buf(statpre, pfd->fd, NULL, xdata);
+ /* See if we can use FALLOC_FL_ZERO_RANGE to perform the zero fill.
+ * If it fails, fall back to _posix_do_zerofill() and an optional fsync.
+ */
+ flags = FALLOC_FL_ZERO_RANGE;
+ ret = sys_fallocate(pfd->fd, flags, offset, len);
+ if (ret == 0) {
+ goto fsync;
+ } else {
+ ret = -errno;
+ if ((ret != -ENOSYS) && (ret != -EOPNOTSUPP)) {
+ goto out;
+ }
+ }
+
+ ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT);
+ if (ret < 0) {
+ ret = -errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ZEROFILL_FAILED,
+ "zerofill failed on fd %d length %" PRId64, pfd->fd, len);
+ goto out;
+ }
+
+fsync:
+ if (pfd->flags & (O_SYNC | O_DSYNC)) {
+ ret = sys_fsync(pfd->fd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_WRITEV_FAILED,
+ "fsync() in writev on fd"
+ "%d failed",
+ pfd->fd);
+ ret = -errno;
+ goto out;
+ }
+ }
+
+ ret = posix_fdstat(this, fd->inode, pfd->fd, statpost);
+ if (ret == -1) {
+ ret = -errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost);
+
+out:
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+ SET_TO_OLD_FS_ID();
+
+ return ret;
+}
+
+int32_t
+posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t ret;
+ int32_t flags = 0;
+ struct iatt statpre = {
+ 0,
+ };
+ struct iatt statpost = {
+ 0,
+ };
+ dict_t *rsp_xdata = NULL;
+
+#ifdef FALLOC_FL_KEEP_SIZE
+ if (keep_size)
+ flags = FALLOC_FL_KEEP_SIZE;
+#endif /* FALLOC_FL_KEEP_SIZE */
+
+ ret = posix_do_fallocate(frame, this, fd, flags, offset, len, &statpre,
+ &statpost, xdata, &rsp_xdata);
+ if (ret < 0)
+ goto err;
+
+ STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, rsp_xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, rsp_xdata);
+ return 0;
+}
+
+int32_t
+posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int32_t ret;
+ dict_t *rsp_xdata = NULL;
+#ifndef FALLOC_FL_KEEP_SIZE
+ ret = EOPNOTSUPP;
+
+#else /* FALLOC_FL_KEEP_SIZE */
+ int32_t flags = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE;
+ struct iatt statpre = {
+ 0,
+ };
+ struct iatt statpost = {
+ 0,
+ };
+
+ ret = posix_do_fallocate(frame, this, fd, flags, offset, len, &statpre,
+ &statpost, xdata, &rsp_xdata);
+ if (ret < 0)
+ goto err;
+
+ STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, rsp_xdata);
+ return 0;
+
+err:
+#endif /* FALLOC_FL_KEEP_SIZE */
+ STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, rsp_xdata);
+ return 0;
+}
+
+int32_t
+posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ int32_t ret = 0;
+ struct iatt statpre = {
+ 0,
+ };
+ struct iatt statpost = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ dict_t *rsp_xdata = NULL;
+ gf_boolean_t check_space_error = _gf_false;
+ struct posix_fd *pfd = NULL;
+ struct stat statbuf = {
+ 0,
+ };
+
+ VALIDATE_OR_GOTO(frame, unwind);
+ VALIDATE_OR_GOTO(this, unwind);
+
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+overwrite:
+ check_space_error = _gf_true;
+ ret = posix_do_zerofill(frame, this, fd, offset, len, &statpre, &statpost,
+ xdata, &rsp_xdata);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, rsp_xdata);
+ return 0;
+
+out:
+ if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) {
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ if (sys_fstat(pfd->fd, &statbuf) < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED,
+ "%d", pfd->fd);
+ goto out;
+ }
+
+ if (offset + len <= statbuf.st_size) {
+ gf_msg_debug(this->name, 0,
+ "io vector size will not"
+ " change disk size so allow overwrite for"
+ " fd %d",
+ pfd->fd);
+ goto overwrite;
+ }
+ }
+
+unwind:
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL,
+ rsp_xdata);
+ return 0;
+}
+
+int32_t
+posix_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ /*
+ * IPC is for inter-translator communication. If one gets here, it
+ * means somebody sent one that nobody else recognized, which is an
+ * error much like an uncaught exception.
+ */
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_IPC_NOT_HANDLE,
+ "GF_LOG_IPC(%d) not handled", op);
+ STACK_UNWIND_STRICT(ipc, frame, -1, EOPNOTSUPP, NULL);
+ return 0;
+}
+
+int32_t
+posix_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+#ifdef HAVE_SEEK_HOLE
+ struct posix_fd *pfd = NULL;
+ off_t ret = -1;
+ int err = 0;
+ int whence = 0;
+ struct iatt preop = {
+ 0,
+ };
+ dict_t *rsp_xdata = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ switch (what) {
+ case GF_SEEK_DATA:
+ whence = SEEK_DATA;
+ break;
+ case GF_SEEK_HOLE:
+ whence = SEEK_HOLE;
+ break;
+ default:
+ err = ENOTSUP;
+ gf_msg(this->name, GF_LOG_ERROR, ENOTSUP, P_MSG_SEEK_UNKOWN,
+ "don't know what to seek");
+ goto out;
+ }
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &err);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ if (xdata) {
+ ret = posix_fdstat(this, fd->inode, pfd->fd, &preop);
+ if (ret == -1) {
+ ret = -errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, &preop, NULL,
+ xdata, &rsp_xdata, _gf_false);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd);
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ ret = sys_lseek(pfd->fd, offset, whence);
+ if (ret == -1) {
+ err = errno;
+ gf_msg(this->name, fop_log_level(GF_FOP_SEEK, err), err,
+ P_MSG_SEEK_FAILED, "seek failed on fd %d length %" PRId64,
+ pfd->fd, offset);
+ goto out;
+ }
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(seek, frame, (ret == -1 ? -1 : 0), err,
+ (ret == -1 ? -1 : ret), rsp_xdata);
+#else
+ STACK_UNWIND_STRICT(seek, frame, -1, EINVAL, 0, NULL);
+#endif
+ return 0;
+}
+
+int32_t
+posix_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ char *real_path = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ DIR *dir = NULL;
+ struct posix_fd *pfd = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+ if (!real_path) {
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ op_ret = -1;
+ dir = sys_opendir(real_path);
+
+ if (dir == NULL) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_OPENDIR_FAILED,
+ "opendir failed on gfid-handle: %s (path: %s)", real_path,
+ loc->path);
+ goto out;
+ }
+
+ op_ret = dirfd(dir);
+ if (op_ret < 0) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DIRFD_FAILED,
+ "dirfd() failed (path: %s, gfid-handle: %s", loc->path,
+ real_path);
+ goto out;
+ }
+
+ pfd = GF_CALLOC(1, sizeof(*pfd), gf_posix_mt_posix_fd);
+ if (!pfd) {
+ op_errno = errno;
+ goto out;
+ }
+
+ pfd->dir = dir;
+ pfd->dir_eof = -1;
+ pfd->fd = op_ret;
+
+ op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd);
+ if (op_ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED,
+ "failed to set the fd"
+ "context path=%s "
+ "gfid-handle= %s,fd=%p",
+ loc->path, real_path, fd);
+
+ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, NULL);
+
+ op_ret = 0;
+
+out:
+ if (op_ret == -1) {
+ if (dir) {
+ (void)sys_closedir(dir);
+ dir = NULL;
+ }
+ if (pfd) {
+ GF_FREE(pfd);
+ pfd = NULL;
+ }
+ }
+
+ SET_TO_OLD_FS_ID();
+ STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, NULL);
+ return 0;
+}
+
+static void
+posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd)
+{
+ glusterfs_ctx_t *ctx = this->ctx;
+ struct posix_private *priv = this->private;
+
+ pfd->xl = this;
+ pthread_mutex_lock(&ctx->fd_lock);
+ {
+ list_add_tail(&pfd->list, &ctx->janitor_fds);
+ priv->rel_fdcount++;
+ pthread_cond_signal(&ctx->fd_cond);
+ }
+ pthread_mutex_unlock(&ctx->fd_lock);
+}
+
+int32_t
+posix_releasedir(xlator_t *this, fd_t *fd)
+{
+ struct posix_fd *pfd = NULL;
+ uint64_t tmp_pfd = 0;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ ret = fd_ctx_del(fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, "pfd from fd=%p is NULL", fd);
+ goto out;
+ }
+
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+ if (!pfd->dir) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
+ "pfd->dir is NULL for fd=%p", fd);
+ goto out;
+ }
+ posix_add_fd_to_cleanup(this, pfd);
+
+out:
+ return 0;
+}
+
+int32_t
+posix_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ char *dest = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ char *real_path = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(loc, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ dest = alloca(size + 1);
+
+ MAKE_INODE_HANDLE(real_path, this, loc, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat on %s failed", loc->path ? loc->path : "<null>");
+ goto out;
+ }
+
+ op_ret = sys_readlink(real_path, dest, size);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READYLINK_FAILED,
+ "readlink on gfid-handle: %s (path: %s) failed", real_path,
+ loc->path);
+ goto out;
+ }
+
+ dest[op_ret] = 0;
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, dest, &stbuf, NULL);
+
+ return 0;
+}
+
+int32_t
+posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = 0;
+ struct posix_private *priv = NULL;
+ struct iatt prebuf = {
+ 0,
+ };
+ struct iatt postbuf = {
+ 0,
+ };
+ dict_t *rsp_xdata = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ MAKE_INODE_HANDLE(real_path, this, loc, &prebuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "pre-operation lstat on (path: %s gfid-handle: %s) "
+ "failed",
+ loc->path, real_path ? real_path : "<null>");
+ goto out;
+ }
+
+ if (xdata) {
+ op_ret = posix_cs_maintenance(this, NULL, loc, NULL, &prebuf, real_path,
+ xdata, &rsp_xdata, _gf_false);
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, path %s", loc->path);
+ op_errno = EIO;
+ goto out;
+ }
+ }
+
+ posix_update_iatt_buf(&prebuf, -1, real_path, xdata);
+ op_ret = sys_truncate(real_path, offset);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED,
+ "truncate on gfid-handle: %s (path: %s) failed", real_path,
+ loc->path);
+ goto out;
+ }
+
+ op_ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &postbuf,
+ _gf_false);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "lstat on gfid-handle %s (path: %s) failed", real_path,
+ loc->path);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, real_path, -1, loc->inode, &postbuf);
+
+ op_ret = 0;
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, &prebuf, &postbuf,
+ NULL);
+
+ return 0;
+}
+
+int32_t
+posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = NULL;
+ int32_t _fd = -1;
+ struct posix_fd *pfd = NULL;
+ struct posix_private *priv = NULL;
+ struct iatt preop = {
+ 0,
+ };
+ dict_t *rsp_xdata = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(this->private, out);
+ VALIDATE_OR_GOTO(loc, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+
+ if (loc->inode && ((loc->inode->ia_type == IA_IFBLK) ||
+ (loc->inode->ia_type == IA_IFCHR))) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT,
+ "open received on a block/char file (%s)",
+ uuid_utoa(loc->inode->gfid));
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (flags & O_CREAT)
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+ MAKE_INODE_HANDLE(real_path, this, loc, &stbuf);
+ if (!real_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ if (IA_ISLNK(stbuf.ia_type)) {
+ op_ret = -1;
+ op_errno = ELOOP;
+ goto out;
+ }
+
+ op_ret = -1;
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ if (priv->o_direct)
+ flags |= O_DIRECT;
+
+ _fd = sys_open(real_path, flags, priv->force_create_mode);
+ if (_fd == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FILE_OP_FAILED,
+ "open on gfid-handle %s (path: %s), flags: %d", real_path,
+ loc->path, flags);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, real_path, -1, loc->inode, &stbuf);
+
+ pfd = GF_CALLOC(1, sizeof(*pfd), gf_posix_mt_posix_fd);
+ if (!pfd) {
+ op_errno = errno;
+ goto out;
+ }
+
+ pfd->flags = flags;
+ pfd->fd = _fd;
+
+ if (xdata) {
+ op_ret = posix_fdstat(this, fd->inode, pfd->fd, &preop);
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
+ GF_FREE(pfd);
+ goto out;
+ }
+
+ posix_cs_maintenance(this, fd, NULL, &pfd->fd, &preop, NULL, xdata,
+ &rsp_xdata, _gf_true);
+ }
+
+ op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd);
+ if (op_ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED,
+ "failed to set the fd context gfid-handle=%s path=%s fd=%p",
+ real_path, loc->path, fd);
+
+ op_ret = 0;
+
+out:
+ if (op_ret == -1) {
+ if (_fd != -1) {
+ sys_close(_fd);
+ }
+ }
+
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, rsp_xdata);
+
+ return 0;
+}
+
+int
+posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd = -1;
+ struct posix_private *priv = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec vec = {
+ 0,
+ };
+ struct posix_fd *pfd = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ struct iatt preop = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *rsp_xdata = NULL;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+ VALIDATE_OR_GOTO(fd->inode, out);
+ VALIDATE_OR_GOTO(this->private, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+
+ if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT,
+ "readv received on a block/char file (%s)",
+ uuid_utoa(fd->inode->gfid));
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ if (!size) {
+ op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, P_MSG_INVALID_ARGUMENT,
+ "size=%" GF_PRI_SIZET, size);
+ goto out;
+ }
+
+ iobuf = iobuf_get_page_aligned(this->ctx->iobuf_pool, size, ALIGN_SIZE);
+ if (!iobuf) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ _fd = pfd->fd;
+
+ if (xdata) {
+ op_ret = posix_fdstat(this, fd->inode, _fd, &preop);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+ op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata,
+ &rsp_xdata, _gf_false);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd);
+ op_errno = EIO;
+ goto out;
+ }
+ }
+
+ posix_update_iatt_buf(&preop, _fd, NULL, xdata);
+ op_ret = sys_pread(_fd, iobuf->ptr, size, offset);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READ_FAILED,
+ "read failed on gfid=%s, "
+ "fd=%p, offset=%" PRIu64 " size=%" GF_PRI_SIZET
+ ", "
+ "buf=%p",
+ uuid_utoa(fd->inode->gfid), fd, offset, size, iobuf->ptr);
+ goto out;
+ }
+
+ GF_ATOMIC_ADD(priv->read_value, op_ret);
+
+ vec.iov_base = iobuf->ptr;
+ vec.iov_len = op_ret;
+
+ iobref = iobref_new();
+
+ iobref_add(iobref, iobuf);
+
+ /*
+ * readv successful, and we need to get the stat of the file
+ * we read from
+ */
+
+ op_ret = posix_fdstat(this, fd->inode, _fd, &stbuf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &stbuf);
+
+ /* Hack to notify higher layers of EOF. */
+ if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size)
+ op_errno = ENOENT;
+
+ op_ret = vec.iov_len;
+
+out:
+
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &vec, 1, &stbuf, iobref,
+ rsp_xdata);
+
+ if (iobref)
+ iobref_unref(iobref);
+ if (iobuf)
+ iobuf_unref(iobuf);
+
+ return 0;
+}
+
+int32_t
+__posix_pwritev(int fd, struct iovec *vector, int count, off_t offset)
+{
+ int32_t op_ret = 0;
+ int idx = 0;
+ int retval = 0;
+ off_t internal_off = 0;
+
+ if (!vector)
+ return -EFAULT;
+
+ internal_off = offset;
+ for (idx = 0; idx < count; idx++) {
+ retval = sys_pwrite(fd, vector[idx].iov_base, vector[idx].iov_len,
+ internal_off);
+ if (retval == -1) {
+ op_ret = -errno;
+ goto err;
+ }
+ op_ret += retval;
+ internal_off += retval;
+ }
+
+err:
+ return op_ret;
+}
+
+int32_t
+__posix_writev(int fd, struct iovec *vector, int count, off_t startoff,
+ int odirect)
+{
+ int32_t op_ret = 0;
+ int idx = 0;
+ int max_buf_size = 0;
+ int retval = 0;
+ char *buf = NULL;
+ char *alloc_buf = NULL;
+ off_t internal_off = 0;
+
+ /* Check for the O_DIRECT flag during open() */
+ if (!odirect)
+ return __posix_pwritev(fd, vector, count, startoff);
+
+ for (idx = 0; idx < count; idx++) {
+ if (max_buf_size < vector[idx].iov_len)
+ max_buf_size = vector[idx].iov_len;
+ }
+
+ alloc_buf = _page_aligned_alloc(max_buf_size, &buf);
+ if (!alloc_buf) {
+ op_ret = -errno;
+ goto err;
+ }
+
+ internal_off = startoff;
+ for (idx = 0; idx < count; idx++) {
+ memcpy(buf, vector[idx].iov_base, vector[idx].iov_len);
+
+ /* not sure whether writev works on O_DIRECT'd fd */
+ retval = sys_pwrite(fd, buf, vector[idx].iov_len, internal_off);
+ if (retval == -1) {
+ op_ret = -errno;
+ goto err;
+ }
+
+ op_ret += retval;
+ internal_off += retval;
+ }
+
+err:
+ GF_FREE(alloc_buf);
+
+ return op_ret;
+}
+
+dict_t *
+_fill_writev_xdata(fd_t *fd, dict_t *xdata, xlator_t *this, int is_append)
+{
+ dict_t *rsp_xdata = NULL;
+ int32_t ret = 0;
+ inode_t *inode = NULL;
+
+ if (fd)
+ inode = fd->inode;
+
+ if (!fd || !fd->inode || gf_uuid_is_null(fd->inode->gfid)) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, P_MSG_XATTR_FAILED,
+ "fd: %p inode: %p"
+ "gfid:%s",
+ fd, inode ? inode : 0,
+ inode ? uuid_utoa(inode->gfid) : "N/A");
+ goto out;
+ }
+
+ if (!xdata)
+ goto out;
+
+ rsp_xdata = dict_new();
+ if (!rsp_xdata)
+ goto out;
+
+ if (dict_get(xdata, GLUSTERFS_OPEN_FD_COUNT)) {
+ ret = dict_set_uint32(rsp_xdata, GLUSTERFS_OPEN_FD_COUNT,
+ fd->inode->fd_count);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "%s: Failed to set "
+ "dictionary value for %s",
+ uuid_utoa(fd->inode->gfid), GLUSTERFS_OPEN_FD_COUNT);
+ }
+ }
+
+ if (dict_get(xdata, GLUSTERFS_ACTIVE_FD_COUNT)) {
+ ret = dict_set_uint32(rsp_xdata, GLUSTERFS_ACTIVE_FD_COUNT,
+ fd->inode->active_fd_count);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "%s: Failed to set "
+ "dictionary value for %s",
+ uuid_utoa(fd->inode->gfid), GLUSTERFS_ACTIVE_FD_COUNT);
+ }
+ }
+
+ if (dict_get(xdata, GLUSTERFS_WRITE_IS_APPEND)) {
+ ret = dict_set_uint32(rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, is_append);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "%s: Failed to set "
+ "dictionary value for %s",
+ uuid_utoa(fd->inode->gfid), GLUSTERFS_WRITE_IS_APPEND);
+ }
+ }
+out:
+ return rsp_xdata;
+}
+
+int32_t
+posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd = -1;
+ struct posix_private *priv = NULL;
+ struct posix_fd *pfd = NULL;
+ struct iatt preop = {
+ 0,
+ };
+ struct iatt postop = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *rsp_xdata = NULL;
+ int is_append = 0;
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t write_append = _gf_false;
+ gf_boolean_t update_atomic = _gf_false;
+ posix_inode_ctx_t *ctx = NULL;
+ gf_boolean_t check_space_error = _gf_false;
+ struct stat statbuf = {
+ 0,
+ };
+ int totlen = 0;
+ int idx = 0;
+
+ VALIDATE_OR_GOTO(frame, unwind);
+ VALIDATE_OR_GOTO(this, unwind);
+ VALIDATE_OR_GOTO(fd, unwind);
+ VALIDATE_OR_GOTO(fd->inode, unwind);
+ VALIDATE_OR_GOTO(vector, unwind);
+ VALIDATE_OR_GOTO(this->private, unwind);
+
+ priv = this->private;
+
+ VALIDATE_OR_GOTO(priv, unwind);
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+overwrite:
+
+ check_space_error = _gf_true;
+ if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT,
+ "writev received on a block/char file (%s)",
+ uuid_utoa(fd->inode->gfid));
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ _fd = pfd->fd;
+
+ ret = posix_check_internal_writes(this, fd, _fd, xdata);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "possible overwrite from internal client, fd=%p", fd);
+ op_ret = -1;
+ op_errno = EBUSY;
+ goto out;
+ }
+
+ if (xdata) {
+ if (dict_get(xdata, GLUSTERFS_WRITE_IS_APPEND))
+ write_append = _gf_true;
+ if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC))
+ update_atomic = _gf_true;
+ }
+
+ /* The write_is_append check and write must happen
+ atomically. Else another write can overtake this
+ write after the check and get written earlier.
+
+ So lock before preop-stat and unlock after write.
+ */
+
+ /*
+ * The update_atomic option is to instruct posix to do prestat,
+ * write and poststat atomically. This is to prevent any modification to
+ * ia_size and ia_blocks until poststat and the diff in their values
+ * between pre and poststat could be of use for some translators (shard
+ * as of today).
+ */
+
+ op_ret = posix_inode_ctx_get_all(fd->inode, this, &ctx);
+ if (op_ret < 0) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (write_append || update_atomic) {
+ locked = _gf_true;
+ pthread_mutex_lock(&ctx->write_atomic_lock);
+ }
+
+ op_ret = posix_fdstat(this, fd->inode, _fd, &preop);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ if (xdata) {
+ op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata,
+ &rsp_xdata, _gf_false);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd);
+ op_errno = EIO;
+ goto out;
+ }
+ }
+
+ posix_update_iatt_buf(&preop, _fd, NULL, xdata);
+ if (locked && write_append) {
+ if (preop.ia_size == offset || (fd->flags & O_APPEND))
+ is_append = 1;
+ }
+
+ op_ret = __posix_writev(_fd, vector, count, offset,
+ (pfd->flags & O_DIRECT));
+
+ if (locked && (!update_atomic)) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_WRITE_FAILED,
+ "write failed: offset %" PRIu64 ",", offset);
+ goto out;
+ }
+
+ rsp_xdata = _fill_writev_xdata(fd, xdata, this, is_append);
+ /* writev successful, we also need to get the stat of
+ * the file we wrote to
+ */
+
+ ret = posix_fdstat(this, fd->inode, _fd, &postop);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &postop);
+
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+
+ if (flags & (O_SYNC | O_DSYNC)) {
+ ret = sys_fsync(_fd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_WRITEV_FAILED,
+ "fsync() in writev on fd %d failed", _fd);
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+ }
+
+ GF_ATOMIC_ADD(priv->write_value, op_ret);
+
+out:
+
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+
+ if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) {
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto unwind;
+ }
+
+ if (sys_fstat(pfd->fd, &statbuf) < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED,
+ "%d", pfd->fd);
+ goto unwind;
+ }
+
+ for (idx = 0; idx < count; idx++) {
+ totlen = vector[idx].iov_len;
+ }
+
+ if ((offset + totlen <= statbuf.st_size) &&
+ !(statbuf.st_blocks * statbuf.st_blksize < statbuf.st_size)) {
+ gf_msg_debug(this->name, 0,
+ "io vector size will not"
+ " change disk size so allow overwrite for"
+ " fd %d",
+ pfd->fd);
+ goto overwrite;
+ }
+ }
+
+unwind:
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, &preop, &postop,
+ rsp_xdata);
+
+ if (rsp_xdata)
+ dict_unref(rsp_xdata);
+ return 0;
+}
+
+int32_t
+posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd_in = -1;
+ int _fd_out = -1;
+ struct posix_private *priv = NULL;
+ struct posix_fd *pfd_in = NULL;
+ struct posix_fd *pfd_out = NULL;
+ struct iatt preop_dst = {
+ 0,
+ };
+ struct iatt postop_dst = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *rsp_xdata = NULL;
+ int is_append = 0;
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t update_atomic = _gf_false;
+ posix_inode_ctx_t *ctx = NULL;
+ char in_uuid_str[64] = {0}, out_uuid_str[64] = {0};
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd_in, out);
+ VALIDATE_OR_GOTO(fd_in->inode, out);
+ VALIDATE_OR_GOTO(fd_out, out);
+ VALIDATE_OR_GOTO(fd_out->inode, out);
+ VALIDATE_OR_GOTO(this->private, out);
+
+ priv = this->private;
+
+ VALIDATE_OR_GOTO(priv, out);
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+ if (posix_check_dev_file(this, fd_in->inode, "copy_file_range", &op_errno))
+ goto out;
+
+ if (posix_check_dev_file(this, fd_out->inode, "copy_file_range", &op_errno))
+ goto out;
+
+ ret = posix_fd_ctx_get(fd_in, this, &pfd_in, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd_in);
+ goto out;
+ }
+
+ _fd_in = pfd_in->fd;
+
+ ret = posix_fd_ctx_get(fd_out, this, &pfd_out, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd_out);
+ goto out;
+ }
+
+ _fd_out = pfd_out->fd;
+
+ /*
+ * Currently, the internal write is checked via xdata which
+ * is set by some xlator above. It could be due to several of
+ * the reasons such as healing or a snapshot operation happening
+ * using copy_file_range. As of now (i.e. writing the patch with
+ * this change) none of the xlators above posix are using the
+ * internal write with copy_file_range. In future it might
+ * change. Atleast as of now the hope is that, when that happens
+ * this functon or fop does not require additional changes for
+ * handling internal writes.
+ */
+ ret = posix_check_internal_writes(this, fd_out, _fd_out, xdata);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "possible overwrite from internal client, fd=%p", fd_out);
+ op_ret = -1;
+ op_errno = EBUSY;
+ goto out;
+ }
+
+ if (xdata) {
+ if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC))
+ update_atomic = _gf_true;
+ }
+
+ /*
+ * The update_atomic option is to instruct posix to do prestat,
+ * write and poststat atomically. This is to prevent any modification to
+ * ia_size and ia_blocks until poststat and the diff in their values
+ * between pre and poststat could be of use for some translators.
+ * This is similar to the atomic write operation. atmoic write is
+ * (i.e. prestat + write + poststat) used by shard as of now. In case,
+ * some xlator needs copy_file_range to be atomic from prestat and postat
+ * prespective (i.e. prestat + copy_file_range + poststat) then it has
+ * to send "GLUSTERFS_WRITE_UPDATE_ATOMIC" key in xdata.
+ */
+
+ op_ret = posix_inode_ctx_get_all(fd_out->inode, this, &ctx);
+ if (op_ret < 0) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (update_atomic) {
+ ret = pthread_mutex_lock(&ctx->write_atomic_lock);
+ if (!ret)
+ locked = _gf_true;
+ else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MUTEX_FAILED,
+ "failed to hold write atomic lock on %s",
+ uuid_utoa(fd_out->inode->gfid));
+ goto out;
+ }
+ }
+
+ op_ret = posix_fdstat(this, fd_out->inode, _fd_out, &preop_dst);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd_out);
+ goto out;
+ }
+
+ /*
+ * Since, only the destination file (fd_out) is undergoing
+ * modification, the write related tests are done on that.
+ * i.e. this is treater similar to as if the destination file
+ * undergoing write fop from maintenance perspective.
+ */
+ if (xdata) {
+ op_ret = posix_cs_maintenance(this, fd_out, NULL, &_fd_out, &preop_dst,
+ NULL, xdata, &rsp_xdata, _gf_false);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd_out);
+ op_errno = EIO;
+ goto out;
+ }
+ }
+
+ /*
+ * NOTE: This is just doing a single execution of copy_file_range
+ * system call. If the returned value of this system call is less
+ * than len, then should we keep doing it in a for loop until the
+ * copy_file_range of all the len bytes is done?
+ * Check the example program provided in the man page of
+ * copy_file_range.
+ * If so, then a separate variables for both off_in and off_out
+ * should be used which are initialized to off_in and off_out
+ * that this function call receives, but then advanced by the
+ * value returned by sys_copy_file_range and then use that as
+ * off_in and off_out for next instance of copy_file_range execution.
+ */
+ op_ret = sys_copy_file_range(_fd_in, &off_in, _fd_out, &off_out, len,
+ flags);
+
+ if (op_ret < 0) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED,
+ "copy_file_range failed: fd_in: %p (gfid: %s) ,"
+ " fd_out %p (gfid:%s)",
+ fd_in, uuid_utoa_r(fd_in->inode->gfid, in_uuid_str), fd_out,
+ uuid_utoa_r(fd_out->inode->gfid, out_uuid_str));
+ goto out;
+ }
+
+ /*
+ * Let this be as it is for now. This function collects
+ * infomration such as open fd count etc. So, even though
+ * is_append does not apply to copy_file_range, for now,
+ * allowing it to be recorded in the dict as _gf_false.
+ */
+ rsp_xdata = _fill_writev_xdata(fd_out, xdata, this, is_append);
+
+ /* copy_file_range successful, we also need to get the stat of
+ * the file we wrote to (i.e. destination file or fd_out).
+ */
+ ret = posix_fdstat(this, fd_out->inode, _fd_out, &postop_dst);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p", fd_out);
+ goto out;
+ }
+
+ /*
+ * Also perform the stat on the source fd (i.e. fd_in). For now,
+ * allowing it to be done within the locked region if the request
+ * is for atomic operation (and update) of copy_file_range.
+ */
+ ret = posix_fdstat(this, fd_in->inode, _fd_in, &stbuf);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p", fd_in);
+ goto out;
+ }
+
+ /*
+ * The core logic of what time attributes are to be updated
+ * on a fop is decided at client side xlator utime.
+ * All the remaining fops call posix_set_ctime function
+ * to update the {a,m,c}time. But, for all the other fops,
+ * the operation is happening on only one file (or inode).
+ * But here, there are 2 fds (source and destination). Hence
+ * the new function below to update the appropriate times for
+ * both the source and the destination file.
+ * For the source file, if at all anything has to be updated,
+ * it would be atime (as that file is only read, not updated).
+ * For the destination file, the attributes that require the
+ * modification would be mtime and ctime.
+ * What times have to be changed is actually determined by
+ * utime xlator. But, all of them would be in frame->root->flags.
+ * So, currently posix assumes that, the atime flag is for
+ * the source file and the other 2 flags are for the destination
+ * file. Since, the assumption is rigid (i.e. atime for source
+ * and {m,c}time for destination), the below function is called
+ * posix_set_ctime_cfr (cfr standing for copy_file_range).
+ * FUTURE TODO:
+ * In future, some other functionality or fop might operate
+ * simultaneously on 2 files. Then, depending upon what that new
+ * fop does or what are its requirements, the below function might
+ * require changes to become generic for consumption in case of
+ * simultaneous operations on 2 files.
+ */
+ posix_set_ctime_cfr(frame, this, NULL, pfd_in->fd, fd_in->inode, &stbuf,
+ NULL, pfd_out->fd, fd_out->inode, &postop_dst);
+
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+
+ /*
+ * Record copy_file_range in priv->write_value for now.
+ * If not needed, remove below section of code along with
+ * this comment (or add comment to explain why it is not
+ * needed).
+ */
+ GF_ATOMIC_ADD(priv->write_value, op_ret);
+
+out:
+
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+
+ STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, &stbuf,
+ &preop_dst, &postop_dst, rsp_xdata);
+
+ if (rsp_xdata)
+ dict_unref(rsp_xdata);
+ return 0;
+}
+
+int32_t
+posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ char *real_path = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct statvfs buf = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+ int shared_by = 1;
+ double percent = 0;
+ uint64_t reserved_blocks = 0;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+ VALIDATE_OR_GOTO(this->private, out);
+
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+ if (!real_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ priv = this->private;
+
+ op_ret = sys_statvfs(real_path, &buf);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED,
+ "statvfs failed on gfid-handle %s (path: %s)", real_path,
+ loc->path);
+ goto out;
+ }
+
+ if (priv->disk_unit == 'p') {
+ percent = priv->disk_reserve;
+ reserved_blocks = (((buf.f_blocks * percent) / 100) + 0.5);
+ } else {
+ if (buf.f_bsize) {
+ reserved_blocks = (priv->disk_reserve + buf.f_bsize - 1) /
+ buf.f_bsize;
+ }
+ }
+
+ if (buf.f_bfree > reserved_blocks) {
+ buf.f_bfree = (buf.f_bfree - reserved_blocks);
+ if (buf.f_bavail > buf.f_bfree) {
+ buf.f_bavail = buf.f_bfree;
+ }
+ } else {
+ buf.f_bfree = 0;
+ buf.f_bavail = 0;
+ }
+
+ shared_by = priv->shared_brick_count;
+ if (shared_by > 1) {
+ buf.f_blocks /= shared_by;
+ buf.f_bfree /= shared_by;
+ buf.f_bavail /= shared_by;
+ buf.f_files /= shared_by;
+ buf.f_ffree /= shared_by;
+ buf.f_favail /= shared_by;
+ }
+
+ if (!priv->export_statfs) {
+ buf.f_blocks = 0;
+ buf.f_bfree = 0;
+ buf.f_bavail = 0;
+ buf.f_files = 0;
+ buf.f_ffree = 0;
+ buf.f_favail = 0;
+ }
+
+ op_ret = 0;
+
+out:
+ STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, &buf, NULL);
+ return 0;
+}
+
+int32_t
+posix_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ struct posix_fd *pfd = NULL;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL on fd=%p", fd);
+ goto out;
+ }
+
+ op_ret = 0;
+
+out:
+ STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+int32_t
+posix_release(xlator_t *this, fd_t *fd)
+{
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ uint64_t tmp_pfd = 0;
+
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ ret = fd_ctx_del(fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ pfd = (struct posix_fd *)(long)tmp_pfd;
+ if (pfd->dir) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DIR_NOT_NULL,
+ "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd);
+ }
+
+ posix_add_fd_to_cleanup(this, pfd);
+
+out:
+ return 0;
+}
+
+int
+posix_batch_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ stub = fop_fsync_stub(frame, default_fsync, fd, datasync, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ pthread_mutex_lock(&priv->fsync_mutex);
+ {
+ list_add_tail(&stub->list, &priv->fsyncs);
+ priv->fsync_queue_count++;
+ pthread_cond_signal(&priv->fsync_cond);
+ }
+ pthread_mutex_unlock(&priv->fsync_mutex);
+
+ return 0;
+}
+
+int32_t
+posix_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd = -1;
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ struct iatt preop = {
+ 0,
+ };
+ struct iatt postop = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+#ifdef GF_DARWIN_HOST_OS
+ /* Always return success in case of fsync in MAC OS X */
+ op_ret = 0;
+ goto out;
+#endif
+
+ priv = this->private;
+
+ if (priv->batch_fsync_mode && xdata && dict_get(xdata, "batch-fsync")) {
+ posix_batch_fsync(frame, this, fd, datasync, xdata);
+ return 0;
+ }
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd not found in fd's ctx");
+ goto out;
+ }
+
+ _fd = pfd->fd;
+
+ op_ret = posix_fdstat(this, fd->inode, _fd, &preop);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ if (datasync) {
+ op_ret = sys_fdatasync(_fd);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSYNC_FAILED,
+ "fdatasync on fd=%p"
+ "failed:",
+ fd);
+ goto out;
+ }
+ } else {
+ op_ret = sys_fsync(_fd);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSYNC_FAILED,
+ "fsync on fd=%p "
+ "failed",
+ fd);
+ goto out;
+ }
+ }
+
+ op_ret = posix_fdstat(this, fd->inode, _fd, &postop);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, &preop, &postop, NULL);
+
+ return 0;
+}
+
+static int gf_posix_xattr_enotsup_log;
+static int
+_handle_setxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
+{
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = tmp;
+
+ return posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v,
+ filler->flags, filler->stbuf);
+}
+
+#ifdef GF_DARWIN_HOST_OS
+static int
+map_xattr_flags(int flags)
+{
+ /* DARWIN has different defines on XATTR_ flags.
+ There do not seem to be a POSIX standard
+ Parse any other flags over.
+ */
+ int darwinflags = flags &
+ ~(GF_XATTR_CREATE | GF_XATTR_REPLACE | XATTR_REPLACE);
+ if (GF_XATTR_CREATE & flags)
+ darwinflags |= XATTR_CREATE;
+ if (GF_XATTR_REPLACE & flags)
+ darwinflags |= XATTR_REPLACE;
+ return darwinflags;
+}
+#endif
+
+int32_t
+posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = NULL;
+ char *acl_xattr = NULL;
+ struct iatt preop = {0};
+ struct iatt postop = {0};
+ int32_t ret = 0;
+ ssize_t acl_size = 0;
+ dict_t *xattr = NULL;
+ dict_t *subvol_xattrs = NULL;
+ posix_xattr_filler_t filler = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+ struct iatt tmp_stbuf = {
+ 0,
+ };
+ data_t *tdata = NULL;
+ char *cs_var = NULL;
+ gf_cs_obj_state state = -1;
+ int i = 0;
+ int len;
+ struct mdata_iatt mdata_iatt = {
+ 0,
+ };
+ int8_t sync_backend_xattrs = _gf_false;
+ data_pair_t *custom_xattrs;
+ data_t *keyval = NULL;
+ char **xattrs_to_heal = get_xattrs_to_heal();
+
+ DECLARE_OLD_FS_ID_VAR;
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(this->private, out);
+ VALIDATE_OR_GOTO(loc, out);
+ VALIDATE_OR_GOTO(dict, out);
+
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+ if (!real_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ ret = dict_get_mdata(dict, CTIME_MDATA_XDATA_KEY, &mdata_iatt);
+ if (ret == 0) {
+ /* This is initiated by lookup when ctime feature is enabled to create
+ * "trusted.glusterfs.mdata" xattr if not present. These are the files
+ * which were created when ctime feature is disabled.
+ */
+ ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path,
+ &mdata_iatt, &op_errno);
+ if (ret != 0) {
+ op_ret = -1;
+ }
+ goto out;
+ }
+
+ posix_pstat(this, loc->inode, loc->gfid, real_path, &preop, _gf_false);
+
+ op_ret = -1;
+
+ dict_del(dict, GFID_XATTR_KEY);
+ dict_del(dict, GF_XATTR_VOL_ID_KEY);
+ /* the io-stats-dump key should not reach disk */
+ dict_del(dict, GF_XATTR_IOSTATS_DUMP_KEY);
+
+ tdata = dict_get(dict, GF_CS_OBJECT_UPLOAD_COMPLETE);
+ if (tdata) {
+ /*TODO: move the following to a different function */
+ LOCK(&loc->inode->lock);
+ {
+ state = posix_cs_check_status(this, real_path, NULL, &preop);
+ if (state != GF_CS_LOCAL) {
+ op_errno = EINVAL;
+ ret = posix_cs_set_state(this, &xattr, state, real_path, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "set state failed");
+ }
+ goto unlock;
+ }
+
+ ret = posix_pstat(this, loc->inode, loc->gfid, real_path,
+ &tmp_stbuf, _gf_true);
+ if (ret) {
+ op_errno = EINVAL;
+ goto unlock;
+ }
+
+ cs_var = alloca(4096);
+ sprintf(cs_var, "%" PRId64, tmp_stbuf.ia_mtime);
+
+ /*TODO: may be should consider nano-second also */
+ if (strncmp(cs_var, tdata->data, tdata->len) > 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "mtime "
+ "passed is different from seen by file now."
+ " Will skip truncating the file");
+ ret = -1;
+ op_errno = EINVAL;
+ goto unlock;
+ }
+
+ len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_size);
+
+ ret = sys_lsetxattr(real_path, GF_CS_OBJECT_SIZE, cs_var, len,
+ flags);
+ if (ret) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "setxattr failed. key %s err %d", GF_CS_OBJECT_SIZE,
+ ret);
+ goto unlock;
+ }
+
+ len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_blocks);
+
+ ret = sys_lsetxattr(real_path, GF_CS_NUM_BLOCKS, cs_var, len,
+ flags);
+ if (ret) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "setxattr failed. key %s err %d", GF_CS_NUM_BLOCKS, ret);
+ goto unlock;
+ }
+
+ len = sprintf(cs_var, "%" PRIu32, tmp_stbuf.ia_blksize);
+
+ ret = sys_lsetxattr(real_path, GF_CS_BLOCK_SIZE, cs_var, len,
+ flags);
+ if (ret) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "setxattr failed. key %s err %d", GF_CS_BLOCK_SIZE, ret);
+ goto unlock;
+ }
+
+ memset(cs_var, 0, 4096);
+ if (loc->path[0] == '/') {
+ for (i = 1; i < strlen(loc->path); i++) {
+ cs_var[i - 1] = loc->path[i];
+ }
+
+ cs_var[i] = '\0';
+ gf_msg_debug(this->name, GF_LOG_ERROR, "remotepath %s", cs_var);
+ }
+
+ ret = sys_lsetxattr(real_path, GF_CS_OBJECT_REMOTE, cs_var,
+ strlen(cs_var), flags);
+ if (ret) {
+ op_errno = errno;
+ gf_log("POSIX", GF_LOG_ERROR,
+ "setxattr failed - %s"
+ " %d",
+ GF_CS_OBJECT_SIZE, ret);
+ goto unlock;
+ }
+
+ ret = sys_truncate(real_path, 0);
+ if (ret) {
+ op_errno = errno;
+ gf_log("POSIX", GF_LOG_ERROR,
+ "truncate failed - %s"
+ " %d",
+ GF_CS_OBJECT_SIZE, ret);
+ ret = sys_lremovexattr(real_path, GF_CS_OBJECT_REMOTE);
+ if (ret) {
+ op_errno = errno;
+ gf_log("POSIX", GF_LOG_ERROR,
+ "removexattr "
+ "failed post processing- %s"
+ " %d",
+ GF_CS_OBJECT_SIZE, ret);
+ }
+ goto unlock;
+ } else {
+ state = GF_CS_REMOTE;
+ ret = posix_cs_set_state(this, &xattr, state, real_path, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "set state failed");
+ }
+ }
+ }
+ unlock:
+ UNLOCK(&loc->inode->lock);
+ op_ret = ret;
+ goto out;
+ }
+
+ filler.real_path = real_path;
+ filler.this = this;
+ filler.stbuf = &preop;
+ filler.loc = loc;
+
+#ifdef GF_DARWIN_HOST_OS
+ filler.flags = map_xattr_flags(flags);
+#else
+ filler.flags = flags;
+#endif
+ op_ret = dict_foreach(dict, _handle_setxattr_keyvalue_pair, &filler);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int8(xdata, "sync_backend_xattrs", &sync_backend_xattrs);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to get sync_backend_xattrs");
+ }
+
+ if (sync_backend_xattrs) {
+ /* List all custom xattrs */
+ subvol_xattrs = dict_new();
+ if (!subvol_xattrs)
+ goto out;
+
+ ret = dict_set_int32_sizen(xdata, "list-xattr", 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
+ "Unable to set list-xattr in dict ");
+ goto out;
+ }
+
+ subvol_xattrs = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
+ NULL);
+
+ /* Remove all user xattrs from the file */
+ dict_foreach_fnmatch(subvol_xattrs, "user.*", posix_delete_user_xattr,
+ real_path);
+
+ /* Remove all custom xattrs from the file */
+ for (i = 1; xattrs_to_heal[i]; i++) {
+ keyval = dict_get(subvol_xattrs, xattrs_to_heal[i]);
+ if (keyval) {
+ ret = sys_lremovexattr(real_path, xattrs_to_heal[i]);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED,
+ errno, "removexattr failed. key %s path %s",
+ xattrs_to_heal[i], loc->path);
+ goto out;
+ }
+
+ dict_del(subvol_xattrs, xattrs_to_heal[i]);
+ keyval = NULL;
+ }
+ }
+
+ /* Set custom xattrs based on info provided by DHT */
+ custom_xattrs = dict->members_list;
+
+ while (custom_xattrs != NULL) {
+ ret = sys_lsetxattr(real_path, custom_xattrs->key,
+ custom_xattrs->value->data,
+ custom_xattrs->value->len, flags);
+ if (ret) {
+ op_errno = errno;
+ gf_log(this->name, GF_LOG_ERROR, "setxattr failed - %s %d",
+ custom_xattrs->key, ret);
+ goto out;
+ }
+
+ custom_xattrs = custom_xattrs->next;
+ }
+ }
+
+ xattr = dict_new();
+ if (!xattr)
+ goto out;
+
+ /*
+ * FIXFIX: Send the stbuf info in the xdata for now
+ * This is used by DHT to redirect FOPs if the file is being migrated
+ * Ignore errors for now
+ */
+ ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &postop,
+ _gf_false);
+ if (ret)
+ goto out;
+
+ ret = posix_set_iatt_in_dict(xattr, &preop, &postop);
+
+ /*
+ * ACL can be set on a file/folder using GF_POSIX_ACL_*_KEY xattrs which
+ * won't aware of access-control xlator. To update its context correctly,
+ * POSIX_ACL_*_XATTR stored in xdata which is send in the call_back path.
+ */
+ if (dict_get(dict, GF_POSIX_ACL_ACCESS)) {
+ /*
+ * The size of buffer will be know after calling sys_lgetxattr,
+ * so first we allocate buffer with large size(~4k), then we
+ * reduced into required size using GF_REALLO().
+ */
+ acl_xattr = GF_CALLOC(1, ACL_BUFFER_MAX, gf_posix_mt_char);
+ if (!acl_xattr)
+ goto out;
+
+ acl_size = sys_lgetxattr(real_path, POSIX_ACL_ACCESS_XATTR, acl_xattr,
+ ACL_BUFFER_MAX);
+
+ if (acl_size < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED,
+ "Posix acl is not set "
+ "properly at the backend");
+ goto out;
+ }
+
+ /* If acl_size is more than max buffer size, just ignore it */
+ if (acl_size >= ACL_BUFFER_MAX) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, P_MSG_BUFFER_OVERFLOW,
+ "size of acl is more"
+ "than the buffer");
+ goto out;
+ }
+
+ acl_xattr = GF_REALLOC(acl_xattr, acl_size);
+ if (!acl_xattr)
+ goto out;
+
+ ret = dict_set_bin(xattr, POSIX_ACL_ACCESS_XATTR, acl_xattr, acl_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
+ "failed to set"
+ "xdata for acl");
+ GF_FREE(acl_xattr);
+ goto out;
+ }
+ }
+
+ if (dict_get(dict, GF_POSIX_ACL_DEFAULT)) {
+ acl_xattr = GF_CALLOC(1, ACL_BUFFER_MAX, gf_posix_mt_char);
+ if (!acl_xattr)
+ goto out;
+
+ acl_size = sys_lgetxattr(real_path, POSIX_ACL_DEFAULT_XATTR, acl_xattr,
+ ACL_BUFFER_MAX);
+
+ if (acl_size < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED,
+ "Posix acl is not set "
+ "properly at the backend");
+ goto out;
+ }
+
+ if (acl_size >= ACL_BUFFER_MAX) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, P_MSG_BUFFER_OVERFLOW,
+ "size of acl is more"
+ "than the buffer");
+ goto out;
+ }
+
+ acl_xattr = GF_REALLOC(acl_xattr, acl_size);
+ if (!acl_xattr)
+ goto out;
+
+ ret = dict_set_bin(xattr, POSIX_ACL_DEFAULT_XATTR, acl_xattr, acl_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL,
+ "failed to set"
+ "xdata for acl");
+ GF_FREE(acl_xattr);
+ goto out;
+ }
+ }
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xattr);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ if (subvol_xattrs)
+ dict_unref(subvol_xattrs);
+
+ return 0;
+}
+
+int
+posix_xattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *dict, dict_t *xdata)
+{
+ int ret = -1;
+ int op_ret = -1;
+ const char *fname = NULL;
+ char *real_path = NULL;
+ char *found = NULL;
+ DIR *fd = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+ if (!real_path) {
+ return -ESTALE;
+ }
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED,
+ "posix_xattr_get_real_filename (lstat) on "
+ "gfid-handle %s (path: %s) failed",
+ real_path, loc->path);
+ return -errno;
+ }
+
+ fd = sys_opendir(real_path);
+ if (!fd)
+ return -errno;
+
+ fname = key + SLEN(GF_XATTR_GET_REAL_FILENAME_KEY);
+
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir(fd, scratch);
+ if (!entry || errno != 0)
+ break;
+
+ if (strcasecmp(entry->d_name, fname) == 0) {
+ found = gf_strdup(entry->d_name);
+ if (!found) {
+ (void)sys_closedir(fd);
+ return -ENOMEM;
+ }
+ break;
+ }
+ }
+
+ (void)sys_closedir(fd);
+
+ if (!found)
+ return -ENOATTR;
+
+ ret = dict_set_dynstr(dict, (char *)key, found);
+ if (ret) {
+ GF_FREE(found);
+ return -ENOMEM;
+ }
+ ret = strlen(found) + 1;
+
+ return ret;
+}
+
+int
+posix_get_ancestry_directory(xlator_t *this, inode_t *leaf_inode,
+ gf_dirent_t *head, char **path, int type,
+ int32_t *op_errno, dict_t *xdata)
+{
+ ssize_t handle_size = 0;
+ struct posix_private *priv = NULL;
+ inode_t *inode = NULL;
+ int ret = -1;
+ char dirpath[PATH_MAX] = {
+ 0,
+ };
+
+ priv = this->private;
+
+ handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length);
+
+ ret = posix_make_ancestryfromgfid(
+ this, dirpath, PATH_MAX + 1, head, type | POSIX_ANCESTRY_PATH,
+ leaf_inode->gfid, handle_size, priv->base_path, leaf_inode->table,
+ &inode, xdata, op_errno);
+ if (ret < 0)
+ goto out;
+
+ /* there is already a reference in loc->inode */
+ inode_unref(inode);
+
+ if ((type & POSIX_ANCESTRY_PATH) && (path != NULL)) {
+ if (strcmp(dirpath, "/"))
+ dirpath[strlen(dirpath) - 1] = '\0';
+
+ *path = gf_strdup(dirpath);
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+posix_links_in_same_directory(char *dirpath, int count, inode_t *leaf_inode,
+ inode_t *parent, struct stat *stbuf,
+ gf_dirent_t *head, char **path, int type,
+ dict_t *xdata, int32_t *op_errno)
+{
+ int op_ret = -1;
+ gf_dirent_t *gf_entry = NULL;
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ DIR *dirp = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char temppath[PATH_MAX] = {
+ 0,
+ };
+ char scr[PATH_MAX * 4] = {
+ 0,
+ };
+
+ this = THIS;
+
+ priv = this->private;
+
+ dirp = sys_opendir(dirpath);
+ if (!dirp) {
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_OPEN_FAILED,
+ "could not opendir %s", dirpath);
+ goto out;
+ }
+
+ while (count > 0) {
+ errno = 0;
+ entry = sys_readdir(dirp, scratch);
+ if (!entry || errno != 0)
+ break;
+
+ if (entry->d_ino != stbuf->st_ino)
+ continue;
+
+ /* Linking an inode here, can cause a race in posix_acl.
+ Parent inode gets linked here, but before
+ it reaches posix_acl_readdirp_cbk, create/lookup can
+ come on a leaf-inode, as parent-inode-ctx not yet updated
+ in posix_acl_readdirp_cbk, create and lookup can fail
+ with EACCESS. So do the inode linking in the quota xlator
+
+ linked_inode = inode_link (leaf_inode, parent,
+ entry->d_name, NULL);
+
+ GF_ASSERT (linked_inode == leaf_inode);
+ inode_unref (linked_inode);*/
+
+ if (type & POSIX_ANCESTRY_DENTRY) {
+ loc_t loc = {
+ 0,
+ };
+
+ loc.inode = inode_ref(leaf_inode);
+ gf_uuid_copy(loc.gfid, leaf_inode->gfid);
+
+ (void)snprintf(temppath, sizeof(temppath), "%s/%s", dirpath,
+ entry->d_name);
+
+ gf_entry = gf_dirent_for_name(entry->d_name);
+ if (!gf_entry) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, "gf_entry is NULL");
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ inode_unref(loc.inode);
+ goto out;
+ }
+ gf_entry->inode = inode_ref(leaf_inode);
+ gf_entry->dict = posix_xattr_fill(this, temppath, &loc, NULL, -1,
+ xdata, NULL);
+ iatt_from_stat(&(gf_entry->d_stat), stbuf);
+
+ list_add_tail(&gf_entry->list, &head->list);
+ loc_wipe(&loc);
+ }
+
+ if (type & POSIX_ANCESTRY_PATH) {
+ (void)snprintf(temppath, sizeof(temppath), "%s/%s",
+ &dirpath[priv->base_path_length], entry->d_name);
+ if (!*path) {
+ *path = gf_strdup(temppath);
+ } else {
+ /* creating a colon separated */
+ /* list of hard links */
+ (void)snprintf(scr, sizeof(scr), "%s:%s", *path, temppath);
+
+ GF_FREE(*path);
+ *path = gf_strdup(scr);
+ }
+ if (!*path) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ count--;
+ }
+
+ op_ret = 0;
+out:
+ if (dirp) {
+ op_ret = sys_closedir(dirp);
+ if (op_ret == -1) {
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_CLOSE_FAILED,
+ "closedir failed");
+ }
+ }
+
+ return op_ret;
+}
+
+int
+posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode,
+ gf_dirent_t *head, char **path, int type,
+ int32_t *op_errno, dict_t *xdata)
+{
+ size_t remaining_size = 0;
+ int op_ret = -1, pathlen = -1;
+ ssize_t handle_size = 0;
+ uuid_t pgfid = {
+ 0,
+ };
+ int nlink_samepgfid = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ char *list = NULL;
+ int32_t list_offset = 0;
+ struct posix_private *priv = NULL;
+ ssize_t size = 0;
+ inode_t *parent = NULL;
+ loc_t *loc = NULL;
+ char *leaf_path = NULL;
+ char key[4096] = {
+ 0,
+ };
+ char dirpath[PATH_MAX] = {
+ 0,
+ };
+ char pgfidstr[UUID_CANONICAL_FORM_LEN + 1] = {
+ 0,
+ };
+ int len;
+
+ priv = this->private;
+
+ loc = GF_CALLOC(1, sizeof(*loc), gf_posix_mt_char);
+ if (loc == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_copy(loc->gfid, leaf_inode->gfid);
+
+ MAKE_INODE_HANDLE(leaf_path, this, loc, NULL);
+ if (!leaf_path) {
+ GF_FREE(loc);
+ *op_errno = ESTALE;
+ goto out;
+ }
+ GF_FREE(loc);
+
+ size = sys_llistxattr(leaf_path, NULL, 0);
+ if (size == -1) {
+ *op_errno = errno;
+ if ((errno == ENOTSUP) || (errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
+ GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting brick"
+ " with 'user_xattr' flag)");
+
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED,
+ "listxattr failed on"
+ "%s",
+ leaf_path);
+ }
+
+ goto out;
+ }
+
+ if (size == 0) {
+ op_ret = 0;
+ goto out;
+ }
+
+ list = alloca(size);
+ if (!list) {
+ *op_errno = errno;
+ goto out;
+ }
+
+ size = sys_llistxattr(leaf_path, list, size);
+ if (size < 0) {
+ op_ret = -1;
+ *op_errno = errno;
+ goto out;
+ }
+ remaining_size = size;
+ list_offset = 0;
+
+ op_ret = sys_lstat(leaf_path, &stbuf);
+ if (op_ret == -1) {
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED,
+ "lstat failed on %s", leaf_path);
+ goto out;
+ }
+
+ while (remaining_size > 0) {
+ len = snprintf(key, sizeof(key), "%s", list + list_offset);
+ if (strncmp(key, PGFID_XATTR_KEY_PREFIX,
+ SLEN(PGFID_XATTR_KEY_PREFIX)) != 0)
+ goto next;
+
+ op_ret = sys_lgetxattr(leaf_path, key, &nlink_samepgfid,
+ sizeof(nlink_samepgfid));
+ if (op_ret == -1) {
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "getxattr failed on "
+ "%s: key = %s ",
+ leaf_path, key);
+ goto out;
+ }
+
+ nlink_samepgfid = ntoh32(nlink_samepgfid);
+
+ snprintf(pgfidstr, sizeof(pgfidstr), "%s",
+ key + SLEN(PGFID_XATTR_KEY_PREFIX));
+ gf_uuid_parse(pgfidstr, pgfid);
+
+ handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length);
+
+ /* constructing the absolute real path of parent dir */
+ snprintf(dirpath, sizeof(dirpath), "%s", priv->base_path);
+ pathlen = PATH_MAX + 1 - priv->base_path_length;
+
+ op_ret = posix_make_ancestryfromgfid(
+ this, dirpath + priv->base_path_length, pathlen, head,
+ type | POSIX_ANCESTRY_PATH, pgfid, handle_size, priv->base_path,
+ leaf_inode->table, &parent, xdata, op_errno);
+ if (op_ret < 0) {
+ goto next;
+ }
+
+ dirpath[strlen(dirpath) - 1] = '\0';
+
+ posix_links_in_same_directory(dirpath, nlink_samepgfid, leaf_inode,
+ parent, &stbuf, head, path, type, xdata,
+ op_errno);
+
+ if (parent != NULL) {
+ inode_unref(parent);
+ parent = NULL;
+ }
+
+ next:
+ remaining_size -= (len + 1);
+ list_offset += (len + 1);
+ } /* while (remaining_size > 0) */
+
+ op_ret = 0;
+
+out:
+ return op_ret;
+}
+
+int
+posix_get_ancestry(xlator_t *this, inode_t *leaf_inode, gf_dirent_t *head,
+ char **path, int type, int32_t *op_errno, dict_t *xdata)
+{
+ int ret = -1;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ if (IA_ISDIR(leaf_inode->ia_type)) {
+ ret = posix_get_ancestry_directory(this, leaf_inode, head, path, type,
+ op_errno, xdata);
+ } else {
+ if (!priv->update_pgfid_nlinks)
+ goto out;
+ ret = posix_get_ancestry_non_directory(this, leaf_inode, head, path,
+ type, op_errno, xdata);
+ }
+
+out:
+ if (ret && path && *path) {
+ GF_FREE(*path);
+ *path = NULL;
+ }
+
+ return ret;
+}
+
+/**
+ * posix_getxattr - this function returns a dictionary with all the
+ * key:value pair present as xattr. used for
+ * both 'listxattr' and 'getxattr'.
+ */
+int32_t
+posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ struct posix_private *priv = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *value = NULL;
+ char *real_path = NULL;
+ dict_t *dict = NULL;
+ int ret = -1;
+ char *path = NULL;
+ char *rpath = NULL;
+ ssize_t size = 0;
+ char *list = NULL;
+ int32_t list_offset = 0;
+ size_t remaining_size = 0;
+ char *host_buf = NULL;
+ char *keybuffer = NULL;
+ int keybuff_len;
+ char *value_buf = NULL;
+ gf_boolean_t have_val = _gf_false;
+ struct iatt buf = {
+ 0,
+ };
+ dict_t *xattr_rsp = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+ VALIDATE_OR_GOTO(this->private, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+
+ op_ret = -1;
+ priv = this->private;
+
+ ret = posix_handle_georep_xattrs(frame, name, &op_errno, _gf_true);
+ if (ret == -1) {
+ op_ret = -1;
+ /* errno should be set from the above function*/
+ goto out;
+ }
+
+ ret = posix_handle_mdata_xattr(frame, name, &op_errno);
+ if (ret == -1) {
+ op_ret = -1;
+ /* errno should be set from the above function*/
+ goto out;
+ }
+
+ if (name && posix_is_gfid2path_xattr(name)) {
+ op_ret = -1;
+ op_errno = ENOATTR;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (loc->inode && name && GF_POSIX_ACL_REQUEST(name)) {
+ ret = posix_pacl_get(real_path, -1, name, &value);
+ if (ret || !value) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_FAILED,
+ "could not get acl (%s) for"
+ "gfid-handle %s (path: %s)",
+ name, real_path, loc->path);
+ op_ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr(dict, (char *)name, value);
+ if (ret < 0) {
+ GF_FREE(value);
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_FAILED,
+ "could not set acl (%s) for %s "
+ "(gfid-handle: %s) in dictionary",
+ name, loc->path, real_path);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ size = ret;
+ goto done;
+ }
+
+ if (loc->inode && name &&
+ (strncmp(name, GF_XATTR_GET_REAL_FILENAME_KEY,
+ SLEN(GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) {
+ ret = posix_xattr_get_real_filename(frame, this, loc, name, dict,
+ xdata);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ if (op_errno == ENOATTR) {
+ gf_msg_debug(this->name, 0,
+ "Failed to get "
+ "real filename (%s, %s)",
+ loc->path, name);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ P_MSG_GETTING_FILENAME_FAILED,
+ "Failed to get real filename (%s, %s):", loc->path,
+ name);
+ }
+ goto out;
+ }
+
+ size = ret;
+ goto done;
+ }
+
+ if (loc->inode && name && !strcmp(name, GLUSTERFS_OPEN_FD_COUNT)) {
+ if (!fd_list_empty(loc->inode)) {
+ ret = dict_set_uint32(dict, (char *)name, 1);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "dictionary value for %s",
+ name);
+ op_errno = ENOMEM;
+ goto out;
+ }
+ } else {
+ ret = dict_set_uint32(dict, (char *)name, 0);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "dictionary value for %s",
+ name);
+ op_errno = ENOMEM;
+ goto out;
+ }
+ }
+ goto done;
+ }
+ if (loc->inode && name && (XATTR_IS_PATHINFO(name))) {
+ VALIDATE_OR_GOTO(this->private, out);
+ if (LOC_HAS_ABSPATH(loc)) {
+ MAKE_REAL_PATH(rpath, this, loc->path);
+ } else {
+ rpath = real_path;
+ }
+ size = gf_asprintf(
+ &host_buf, "<POSIX(%s):%s:%s>", priv->base_path,
+ ((priv->node_uuid_pathinfo && !gf_uuid_is_null(priv->glusterd_uuid))
+ ? uuid_utoa(priv->glusterd_uuid)
+ : priv->hostname),
+ rpath);
+ if (size < 0) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = dict_set_dynstr(dict, (char *)name, host_buf);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "could not set value"
+ " (%s) in dictionary",
+ host_buf);
+ GF_FREE(host_buf);
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ goto done;
+ }
+
+ if (loc->inode && name && (strcmp(name, GF_XATTR_NODE_UUID_KEY) == 0) &&
+ !gf_uuid_is_null(priv->glusterd_uuid)) {
+ size = gf_asprintf(&host_buf, "%s", uuid_utoa(priv->glusterd_uuid));
+ if (size == -1) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = dict_set_dynstr(dict, GF_XATTR_NODE_UUID_KEY, host_buf);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED,
+ "could not set value"
+ "(%s) in dictionary",
+ host_buf);
+ GF_FREE(host_buf);
+ op_errno = -ret;
+ goto out;
+ }
+ goto done;
+ }
+
+ if (loc->inode && name && (strcmp(name, GFID_TO_PATH_KEY) == 0)) {
+ ret = inode_path(loc->inode, NULL, &path);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ P_MSG_INODE_PATH_GET_FAILED,
+ "%s: could not get "
+ "inode path",
+ uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+
+ size = ret;
+ ret = dict_set_dynstr(dict, GFID_TO_PATH_KEY, path);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ GF_FREE(path);
+ goto out;
+ }
+ goto done;
+ }
+
+ if (loc->inode && name && (strcmp(name, GFID2PATH_VIRT_XATTR_KEY) == 0)) {
+ if (!priv->gfid2path) {
+ op_errno = ENOATTR;
+ op_ret = -1;
+ goto out;
+ }
+ ret = posix_get_gfid2path(this, loc->inode, real_path, &op_errno, dict);
+ if (ret < 0) {
+ op_ret = -1;
+ goto out;
+ }
+ size = ret;
+ goto done;
+ }
+
+ if (loc->inode && name && (strcmp(name, GET_ANCESTRY_PATH_KEY) == 0)) {
+ int type = POSIX_ANCESTRY_PATH;
+
+ op_ret = posix_get_ancestry(this, loc->inode, NULL, &path, type,
+ &op_errno, xdata);
+ if (op_ret < 0) {
+ op_ret = -1;
+ op_errno = ENODATA;
+ goto out;
+ }
+ size = op_ret;
+ op_ret = dict_set_dynstr(dict, GET_ANCESTRY_PATH_KEY, path);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -op_ret,
+ P_MSG_GET_KEY_VALUE_FAILED,
+ "could not get "
+ "value for key (%s)",
+ GET_ANCESTRY_PATH_KEY);
+ GF_FREE(path);
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ goto done;
+ }
+
+ if (loc->inode && name &&
+ (strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE,
+ SLEN(GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) {
+ op_ret = posix_get_objectsignature(real_path, dict);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto out;
+ }
+
+ goto done;
+ }
+
+ /* here allocate value_buf of 8192 bytes to avoid one extra getxattr
+ call,If buffer size is small to hold the xattr result then it will
+ allocate a new buffer value of required size and call getxattr again
+ */
+
+ value_buf = alloca(XATTR_VAL_BUF_SIZE);
+ if (name) {
+ char *key = (char *)name;
+
+ keybuffer = key;
+#if defined(GF_DARWIN_HOST_OS_DISABLED)
+ if (priv->xattr_user_namespace == XATTR_STRIP) {
+ if (strncmp(key, "user.", 5) == 0) {
+ key += 5;
+ gf_msg_debug(this->name, 0,
+ "getxattr for file %s (gfid-handle: %s)"
+ " stripping user key: %s -> %s",
+ loc->path, real_path, keybuffer, key);
+ }
+ }
+#endif
+ size = sys_lgetxattr(real_path, key, value_buf, XATTR_VAL_BUF_SIZE - 1);
+ if (size >= 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE) {
+ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED,
+ "getxattr failed due to overflow of buffer"
+ " on gfid-handle %s (path: %s) : %s ",
+ real_path, loc->path, key);
+ size = sys_lgetxattr(real_path, key, NULL, 0);
+ }
+ if (size == -1) {
+ op_errno = errno;
+ if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
+ GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting"
+ " brick with 'user_xattr' "
+ "flag)");
+ }
+ if ((op_errno == ENOATTR) || (op_errno == ENODATA)) {
+ gf_msg_debug(this->name, 0,
+ "No such attribute:%s for file %s (path: %s)",
+ key, real_path, loc->path);
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ P_MSG_XATTR_FAILED,
+ "getxattr failed on "
+ "%s (path: %s): %s ",
+ real_path, loc->path, key);
+ }
+ goto out;
+ }
+ }
+ value = GF_MALLOC(size + 1, gf_posix_mt_char);
+ if (!value) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ if (have_val) {
+ memcpy(value, value_buf, size);
+ } else {
+ bzero(value, size + 1);
+ size = sys_lgetxattr(real_path, key, value, size);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "getxattr failed on %s (path: %s): key = %s", real_path,
+ loc->path, key);
+ GF_FREE(value);
+ goto out;
+ }
+ }
+ value[size] = '\0';
+ op_ret = dict_set_dynptr(dict, key, value, size);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_DICT_SET_FAILED,
+ "dict set operation "
+ "on %s (gfid-handle: %s) for the key %s failed.",
+ loc->path, real_path, key);
+ GF_FREE(value);
+ goto out;
+ }
+
+ goto done;
+ }
+
+ have_val = _gf_false;
+ size = sys_llistxattr(real_path, value_buf, XATTR_VAL_BUF_SIZE - 1);
+ if (size > 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE) {
+ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED,
+ "listxattr failed due to overflow of buffer"
+ " on %s (path: %s) ",
+ real_path, loc->path);
+ size = sys_llistxattr(real_path, NULL, 0);
+ }
+ if (size == -1) {
+ op_errno = errno;
+ if ((errno == ENOTSUP) || (errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
+ GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting"
+ " brick with 'user_xattr' "
+ "flag)");
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "listxattr failed on %s (path: %s)", real_path,
+ loc->path);
+ }
+ goto out;
+ }
+ if (size == 0)
+ goto done;
+ }
+ list = alloca(size);
+ if (!list) {
+ op_errno = errno;
+ goto out;
+ }
+ if (have_val) {
+ memcpy(list, value_buf, size);
+ } else {
+ size = sys_llistxattr(real_path, list, size);
+ if (size < 0) {
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+ }
+ remaining_size = size;
+ list_offset = 0;
+ keybuffer = alloca(XATTR_KEY_BUF_SIZE);
+ while (remaining_size > 0) {
+ keybuff_len = snprintf(keybuffer, XATTR_KEY_BUF_SIZE, "%s",
+ list + list_offset);
+
+ ret = posix_handle_georep_xattrs(frame, keybuffer, NULL, _gf_false);
+ if (ret == -1)
+ goto ignore;
+
+ ret = posix_handle_mdata_xattr(frame, keybuffer, &op_errno);
+ if (ret == -1) {
+ goto ignore;
+ }
+
+ if (posix_is_gfid2path_xattr(keybuffer)) {
+ goto ignore;
+ }
+
+ have_val = _gf_false;
+ size = sys_lgetxattr(real_path, keybuffer, value_buf,
+ XATTR_VAL_BUF_SIZE - 1);
+ if (size >= 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE) {
+ gf_msg(this->name, GF_LOG_INFO, op_errno, P_MSG_XATTR_FAILED,
+ "getxattr failed due to overflow of"
+ " buffer on %s (path: %s): %s ",
+ real_path, loc->path, keybuffer);
+ size = sys_lgetxattr(real_path, keybuffer, NULL, 0);
+ }
+ if (size == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "getxattr failed on"
+ " %s (path: %s): key = %s ",
+ real_path, loc->path, keybuffer);
+ goto out;
+ }
+ }
+ value = GF_MALLOC(size + 1, gf_posix_mt_char);
+ if (!value) {
+ op_errno = errno;
+ goto out;
+ }
+ if (have_val) {
+ memcpy(value, value_buf, size);
+ } else {
+ bzero(value, size + 1);
+ size = sys_lgetxattr(real_path, keybuffer, value, size);
+ if (size == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "getxattr failed on"
+ " %s (path: %s): key = %s ",
+ real_path, loc->path, keybuffer);
+ GF_FREE(value);
+ goto out;
+ }
+ }
+ value[size] = '\0';
+#ifdef GF_DARWIN_HOST_OS
+ /* The protocol expect namespace for now */
+ char *newkey = NULL;
+ gf_add_prefix(XATTR_USER_PREFIX, keybuffer, &newkey);
+ keybuff_len = snprintf(keybuffer, sizeof(keybuffer), "%s", newkey);
+ GF_FREE(newkey);
+#endif
+ op_ret = dict_set_dynptr(dict, keybuffer, value, size);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_DICT_SET_FAILED,
+ "dict set operation "
+ "on %s (gfid-handle: %s) for the key %s failed.",
+ loc->path, real_path, keybuffer);
+ GF_FREE(value);
+ goto out;
+ }
+
+ ignore:
+ remaining_size -= keybuff_len + 1;
+ list_offset += keybuff_len + 1;
+
+ } /* while (remaining_size > 0) */
+
+done:
+ op_ret = size;
+
+ if (xdata && (op_ret >= 0)) {
+ xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata,
+ &buf);
+ }
+
+ if (dict) {
+ dict_del(dict, GFID_XATTR_KEY);
+ dict_del(dict, GF_XATTR_VOL_ID_KEY);
+ }
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xattr_rsp);
+
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ if (dict) {
+ dict_unref(dict);
+ }
+
+ return 0;
+}
+
+int32_t
+posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ struct posix_fd *pfd = NULL;
+ int _fd = -1;
+ int32_t list_offset = 0;
+ ssize_t size = 0;
+ size_t remaining_size = 0;
+ char *value = NULL;
+ char *list = NULL;
+ dict_t *dict = NULL;
+ int ret = -1;
+ char key[4096] = {
+ 0,
+ };
+ int key_len;
+ char *value_buf = NULL;
+ gf_boolean_t have_val = _gf_false;
+ struct iatt buf = {
+ 0,
+ };
+ dict_t *xattr_rsp = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ op_ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+
+ _fd = pfd->fd;
+
+ /* Get the total size */
+ dict = dict_new();
+ if (!dict) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (name && !strcmp(name, GLUSTERFS_OPEN_FD_COUNT)) {
+ ret = dict_set_uint32(dict, (char *)name, 1);
+ if (ret < 0) {
+ op_ret = -1;
+ size = -1;
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "dictionary value for %s",
+ name);
+ goto out;
+ }
+ goto done;
+ }
+
+ if (name && strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE,
+ SLEN(GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0) {
+ op_ret = posix_fdget_objectsignature(_fd, dict);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "posix_fdget_objectsignature failed");
+ op_errno = -op_ret;
+ op_ret = -1;
+ size = -1;
+ goto out;
+ }
+
+ goto done;
+ }
+
+ /* here allocate value_buf of 8192 bytes to avoid one extra getxattr
+ call,If buffer size is small to hold the xattr result then it will
+ allocate a new buffer value of required size and call getxattr again
+ */
+ value_buf = alloca(XATTR_VAL_BUF_SIZE);
+
+ if (name) {
+ key_len = snprintf(key, sizeof(key), "%s", name);
+#ifdef GF_DARWIN_HOST_OS
+ struct posix_private *priv = NULL;
+ priv = this->private;
+ if (priv->xattr_user_namespace == XATTR_STRIP) {
+ char *newkey = NULL;
+ gf_add_prefix(XATTR_USER_PREFIX, key, &newkey);
+ key_len = snprintf(key, sizeof(key), "%s", newkey);
+ GF_FREE(newkey);
+ }
+#endif
+ size = sys_fgetxattr(_fd, key, value_buf, XATTR_VAL_BUF_SIZE - 1);
+ if (size >= 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE) {
+ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED,
+ "fgetxattr failed due to overflow of"
+ "buffer on %s ",
+ key);
+ size = sys_fgetxattr(_fd, key, NULL, 0);
+ }
+ if (size == -1) {
+ op_errno = errno;
+ if (errno == ENODATA || errno == ENOATTR) {
+ gf_msg_debug(this->name, 0,
+ "fgetxattr"
+ " failed on key %s (%s)",
+ key, strerror(op_errno));
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "fgetxattr"
+ " failed on key %s",
+ key);
+ }
+ goto done;
+ }
+ }
+ value = GF_MALLOC(size + 1, gf_posix_mt_char);
+ if (!value) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ if (have_val) {
+ memcpy(value, value_buf, size);
+ } else {
+ bzero(value, size + 1);
+ size = sys_fgetxattr(_fd, key, value, size);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "fgetxattr"
+ " failed on fd %p for the key %s ",
+ fd, key);
+ GF_FREE(value);
+ goto out;
+ }
+ }
+
+ value[size] = '\0';
+ op_ret = dict_set_dynptr(dict, key, value, size);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DICT_SET_FAILED,
+ "dict set operation "
+ "on key %s failed",
+ key);
+ GF_FREE(value);
+ goto out;
+ }
+
+ goto done;
+ }
+ size = sys_flistxattr(_fd, value_buf, XATTR_VAL_BUF_SIZE - 1);
+ if (size > 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE) {
+ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED,
+ "listxattr failed due to overflow of buffer"
+ " on %p ",
+ fd);
+ size = sys_flistxattr(_fd, NULL, 0);
+ }
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ if ((errno == ENOTSUP) || (errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
+ GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting "
+ "brick with 'user_xattr' flag)");
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "listxattr failed "
+ "on %p:",
+ fd);
+ }
+ goto out;
+ }
+ if (size == 0)
+ goto done;
+ }
+ list = alloca(size + 1);
+ if (!list) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ if (have_val)
+ memcpy(list, value_buf, size);
+ else
+ size = sys_flistxattr(_fd, list, size);
+
+ remaining_size = size;
+ list_offset = 0;
+ while (remaining_size > 0) {
+ if (*(list + list_offset) == '\0')
+ break;
+
+ key_len = snprintf(key, sizeof(key), "%s", list + list_offset);
+ have_val = _gf_false;
+ size = sys_fgetxattr(_fd, key, value_buf, XATTR_VAL_BUF_SIZE - 1);
+ if (size >= 0) {
+ have_val = _gf_true;
+ } else {
+ if (errno == ERANGE) {
+ gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED,
+ "fgetxattr failed due to overflow of buffer"
+ " on fd %p: for the key %s ",
+ fd, key);
+ size = sys_fgetxattr(_fd, key, NULL, 0);
+ }
+ if (size == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "fgetxattr failed "
+ "on fd %p for the key %s ",
+ fd, key);
+ break;
+ }
+ }
+ value = GF_MALLOC(size + 1, gf_posix_mt_char);
+ if (!value) {
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+ if (have_val) {
+ memcpy(value, value_buf, size);
+ } else {
+ bzero(value, size + 1);
+ size = sys_fgetxattr(_fd, key, value, size);
+ if (size == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "fgetxattr failed o"
+ "n the fd %p for the key %s ",
+ fd, key);
+ GF_FREE(value);
+ break;
+ }
+ }
+ value[size] = '\0';
+
+ op_ret = dict_set_dynptr(dict, key, value, size);
+ if (op_ret) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DICT_SET_FAILED,
+ "dict set operation "
+ "failed on key %s",
+ key);
+ GF_FREE(value);
+ goto out;
+ }
+ remaining_size -= key_len + 1;
+ list_offset += key_len + 1;
+
+ } /* while (remaining_size > 0) */
+
+done:
+ op_ret = size;
+
+ if (xdata && (op_ret >= 0)) {
+ xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, pfd->fd, xdata,
+ &buf);
+ }
+
+ if (dict) {
+ dict_del(dict, GFID_XATTR_KEY);
+ dict_del(dict, GF_XATTR_VOL_ID_KEY);
+ }
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xattr_rsp);
+
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ if (dict)
+ dict_unref(dict);
+
+ return 0;
+}
+
+static int
+_handle_fsetxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
+{
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = tmp;
+
+ return posix_fhandle_pair(filler->frame, filler->this, filler->fdnum, k, v,
+ filler->flags, filler->stbuf, filler->fd);
+}
+
+int32_t
+posix_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct posix_fd *pfd = NULL;
+ int _fd = -1;
+ int ret = -1;
+ struct iatt preop = {
+ 0,
+ };
+ struct iatt postop = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ posix_xattr_filler_t filler = {
+ 0,
+ };
+ struct posix_private *priv = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+ VALIDATE_OR_GOTO(dict, out);
+
+ priv = this->private;
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+ _fd = pfd->fd;
+
+ ret = posix_fdstat(this, fd->inode, pfd->fd, &preop);
+ if (ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_FSTAT_FAILED,
+ "fsetxattr (fstat)"
+ "failed on fd=%p",
+ fd);
+ goto out;
+ }
+
+ dict_del(dict, GFID_XATTR_KEY);
+ dict_del(dict, GF_XATTR_VOL_ID_KEY);
+
+ filler.fdnum = _fd;
+ filler.this = this;
+ filler.frame = frame;
+ filler.stbuf = &preop;
+ filler.fd = fd;
+#ifdef GF_DARWIN_HOST_OS
+ filler.flags = map_xattr_flags(flags);
+#else
+ filler.flags = flags;
+#endif
+ op_ret = dict_foreach(dict, _handle_fsetxattr_keyvalue_pair, &filler);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ }
+
+ if (!ret && xdata && dict_get(xdata, GLUSTERFS_DURABLE_OP)) {
+ op_ret = sys_fsync(_fd);
+ if (op_ret < 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_WARNING, errno,
+ P_MSG_DURABILITY_REQ_NOT_SATISFIED,
+ "could not satisfy durability request: "
+ "reason ");
+ }
+ }
+
+ ret = posix_fdstat(this, fd->inode, pfd->fd, &postop);
+ if (ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED,
+ "fsetxattr (fstat)"
+ "failed on fd=%p",
+ fd);
+ goto out;
+ }
+ xattr = dict_new();
+ if (!xattr)
+ goto out;
+
+ ret = posix_set_iatt_in_dict(xattr, &preop, &postop);
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xattr);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+}
+
+int
+_posix_remove_xattr(dict_t *dict, char *key, data_t *value, void *data)
+{
+ int32_t op_ret = 0;
+ xlator_t *this = NULL;
+ posix_xattr_filler_t *filler = NULL;
+
+ filler = (posix_xattr_filler_t *)data;
+ this = filler->this;
+#ifdef GF_DARWIN_HOST_OS
+ struct posix_private *priv = NULL;
+ priv = (struct posix_private *)this->private;
+ char *newkey = NULL;
+ if (priv->xattr_user_namespace == XATTR_STRIP) {
+ gf_remove_prefix(XATTR_USER_PREFIX, key, &newkey);
+ gf_msg_debug("remove_xattr", 0, "key %s => %s", key, newkey);
+ key = newkey;
+ }
+#endif
+ /* Bulk remove xattr is internal fop in gluster. Some of the xattrs may
+ * have special behavior. Ex: removexattr("posix.system_acl_access"),
+ * removes more than one xattr on the file that could be present in the
+ * bulk-removal request. Removexattr of these deleted xattrs will fail
+ * with either ENODATA/ENOATTR. Since all this fop cares is removal of the
+ * xattrs in bulk-remove request and if they are already deleted, it can be
+ * treated as success.
+ */
+
+ if (filler->real_path)
+ op_ret = sys_lremovexattr(filler->real_path, key);
+ else
+ op_ret = sys_fremovexattr(filler->fdnum, key);
+
+ if (op_ret == -1) {
+ if (errno == ENODATA || errno == ENOATTR)
+ op_ret = 0;
+ }
+
+ if (op_ret == -1) {
+ filler->op_errno = errno;
+ if (errno != ENOATTR && errno != ENODATA && errno != EPERM) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "removexattr failed on "
+ "file/dir %s with gfid: %s (for %s)",
+ filler->real_path ? filler->real_path : "",
+ uuid_utoa(filler->inode->gfid), key);
+ }
+ }
+#ifdef GF_DARWIN_HOST_OS
+ GF_FREE(newkey);
+#endif
+ return op_ret;
+}
+
+int
+posix_common_removexattr(call_frame_t *frame, loc_t *loc, fd_t *fd,
+ const char *name, dict_t *xdata, int *op_errno,
+ dict_t **xdata_rsp)
+{
+ gf_boolean_t bulk_removexattr = _gf_false;
+ gf_boolean_t disallow = _gf_false;
+ char *real_path = NULL;
+ struct posix_fd *pfd = NULL;
+ int op_ret = 0;
+ struct iatt preop = {
+ 0,
+ };
+ struct iatt postop = {
+ 0,
+ };
+ int ret = 0;
+ int _fd = -1;
+ xlator_t *this = frame->this;
+ inode_t *inode = NULL;
+ posix_xattr_filler_t filler = {0};
+
+ DECLARE_OLD_FS_ID_VAR;
+
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ if (loc) {
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+ if (!real_path) {
+ op_ret = -1;
+ *op_errno = ESTALE;
+ goto out;
+ }
+ inode = loc->inode;
+ } else {
+ op_ret = posix_fd_ctx_get(fd, this, &pfd, op_errno);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, *op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd);
+ goto out;
+ }
+ _fd = pfd->fd;
+ inode = fd->inode;
+ }
+
+ if (posix_is_gfid2path_xattr(name)) {
+ op_ret = -1;
+ *op_errno = ENOATTR;
+ goto out;
+ }
+
+ if (loc) {
+ ret = posix_pstat(this, inode, loc->gfid, real_path, &preop, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PSTAT_FAILED,
+ "pstat operaton failed on %s", real_path);
+ }
+ } else {
+ ret = posix_fdstat(this, inode, _fd, &preop);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FDSTAT_FAILED,
+ "fdstat operaton failed on %s", real_path ? real_path : "");
+ }
+ }
+
+ if (gf_get_index_by_elem(disallow_removexattrs, (char *)name) >= 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED,
+ "Remove xattr called on %s for file/dir %s with gfid: "
+ "%s",
+ name, real_path ? real_path : "", uuid_utoa(inode->gfid));
+ op_ret = -1;
+ *op_errno = EPERM;
+ goto out;
+ } else if (posix_is_bulk_removexattr((char *)name, xdata)) {
+ bulk_removexattr = _gf_true;
+ (void)dict_has_key_from_array(xdata, disallow_removexattrs, &disallow);
+ if (disallow) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED,
+ "Bulk removexattr has keys that shouldn't be "
+ "removed for file/dir %s with gfid: %s",
+ real_path ? real_path : "", uuid_utoa(inode->gfid));
+ op_ret = -1;
+ *op_errno = EPERM;
+ goto out;
+ }
+ }
+
+ if (bulk_removexattr) {
+ filler.real_path = real_path;
+ filler.this = this;
+ filler.fdnum = _fd;
+ filler.inode = inode;
+ op_ret = dict_foreach(xdata, _posix_remove_xattr, &filler);
+ if (op_ret) {
+ *op_errno = filler.op_errno;
+ goto out;
+ }
+ } else {
+ if (loc)
+ op_ret = sys_lremovexattr(real_path, name);
+ else
+ op_ret = sys_fremovexattr(_fd, name);
+ if (op_ret == -1) {
+ *op_errno = errno;
+ if (*op_errno != ENOATTR && *op_errno != ENODATA &&
+ *op_errno != EPERM) {
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_XATTR_FAILED,
+ "removexattr on %s with gfid %s "
+ "(for %s)",
+ real_path, uuid_utoa(inode->gfid), name);
+ }
+ goto out;
+ }
+ }
+
+ if (loc) {
+ posix_set_ctime(frame, this, real_path, -1, inode, NULL);
+ ret = posix_pstat(this, inode, loc->gfid, real_path, &postop,
+ _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PSTAT_FAILED,
+ "pstat operaton failed on %s", real_path);
+ }
+ } else {
+ posix_set_ctime(frame, this, NULL, _fd, inode, NULL);
+ ret = posix_fdstat(this, inode, _fd, &postop);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FDSTAT_FAILED,
+ "fdstat operaton failed on %s", real_path);
+ }
+ }
+ if (ret)
+ goto out;
+ *xdata_rsp = dict_new();
+ if (!*xdata_rsp)
+ goto out;
+
+ ret = posix_set_iatt_in_dict(*xdata_rsp, &preop, &postop);
+
+ op_ret = 0;
+out:
+ SET_TO_OLD_FS_ID();
+ return op_ret;
+}
+
+int32_t
+posix_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ dict_t *xdata_rsp = NULL;
+
+ VALIDATE_OR_GOTO(loc, out);
+
+ op_ret = posix_common_removexattr(frame, loc, NULL, name, xdata, &op_errno,
+ &xdata_rsp);
+out:
+ STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata_rsp);
+
+ if (xdata_rsp)
+ dict_unref(xdata_rsp);
+
+ return 0;
+}
+
+int32_t
+posix_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ dict_t *xdata_rsp = NULL;
+
+ VALIDATE_OR_GOTO(fd, out);
+
+ op_ret = posix_common_removexattr(frame, NULL, fd, name, xdata, &op_errno,
+ &xdata_rsp);
+out:
+ STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata_rsp);
+
+ if (xdata_rsp)
+ dict_unref(xdata_rsp);
+
+ return 0;
+}
+
+int32_t
+posix_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ struct posix_fd *pfd = NULL;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL, fd=%p", fd);
+ goto out;
+ }
+
+ op_ret = 0;
+
+out:
+ STACK_UNWIND_STRICT(fsyncdir, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+void
+posix_print_xattr(dict_t *this, char *key, data_t *value, void *data)
+{
+ gf_msg_debug("posix", 0, "(key/val) = (%s/%d)", key, data_to_int32(value));
+}
+
+/**
+ * add_array - add two arrays of 32-bit numbers (stored in network byte order)
+ * dest = dest + src
+ * @count: number of 32-bit numbers
+ * FIXME: handle overflow
+ */
+
+static void
+__add_array(int32_t *dest, int32_t *src, int count)
+{
+ int i = 0;
+ int32_t destval = 0;
+ for (i = 0; i < count; i++) {
+ destval = ntoh32(dest[i]);
+ dest[i] = hton32(destval + ntoh32(src[i]));
+ }
+}
+
+static void
+__add_long_array(int64_t *dest, int64_t *src, int count)
+{
+ int i = 0;
+ for (i = 0; i < count; i++) {
+ dest[i] = hton64(ntoh64(dest[i]) + ntoh64(src[i]));
+ }
+}
+
+/* functions:
+ __add_array_with_default
+ __add_long_array_with_default
+
+ xattrop type:
+ GF_XATTROP_ADD_ARRAY_WITH_DEFAULT
+ GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT
+
+ These operations are similar to 'GF_XATTROP_ADD_ARRAY',
+ except that it adds a default value if xattr is missing
+ or its value is zero on disk.
+
+ One use-case of this operation is in inode-quota.
+ When a new directory is created, its default dir_count
+ should be set to 1. So when a xattrop performed setting
+ inode-xattrs, it should account initial dir_count
+ 1 if the xattrs are not present
+
+ Here is the usage of this operation
+
+ value required in xdata for each key
+ struct array {
+ int32_t newvalue_1;
+ int32_t newvalue_2;
+ ...
+ int32_t newvalue_n;
+ int32_t default_1;
+ int32_t default_2;
+ ...
+ int32_t default_n;
+ };
+
+ or
+
+ struct array {
+ int32_t value_1;
+ int32_t value_2;
+ ...
+ int32_t value_n;
+ } data[2];
+ fill data[0] with new value to add
+ fill data[1] with default value
+
+ xattrop GF_XATTROP_ADD_ARRAY_WITH_DEFAULT
+ for i from 1 to n
+ {
+ if (xattr (dest_i) is zero or not set in the disk)
+ dest_i = newvalue_i + default_i
+ else
+ dest_i = dest_i + newvalue_i
+ }
+
+ value in xdata after xattrop is successful
+ struct array {
+ int32_t dest_1;
+ int32_t dest_2;
+ ...
+ int32_t dest_n;
+ };
+*/
+static void
+__add_array_with_default(int32_t *dest, int32_t *src, int count)
+{
+ int i = 0;
+ int32_t destval = 0;
+
+ for (i = 0; i < count; i++) {
+ destval = ntoh32(dest[i]);
+ if (destval == 0)
+ dest[i] = hton32(ntoh32(src[i]) + ntoh32(src[count + i]));
+ else
+ dest[i] = hton32(destval + ntoh32(src[i]));
+ }
+}
+
+static void
+__add_long_array_with_default(int64_t *dest, int64_t *src, int count)
+{
+ int i = 0;
+ int64_t destval = 0;
+
+ for (i = 0; i < count; i++) {
+ destval = ntoh64(dest[i]);
+ if (destval == 0)
+ dest[i] = hton64(ntoh64(src[i]) + ntoh64(src[i + count]));
+ else
+ dest[i] = hton64(destval + ntoh64(src[i]));
+ }
+}
+
+static int
+_posix_handle_xattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp)
+{
+ int size = 0;
+ int count = 0;
+ int op_ret = 0;
+ int op_errno = 0;
+ gf_xattrop_flags_t optype = 0;
+ char *array = NULL;
+ char *dst_data = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ posix_xattr_filler_t *filler = NULL;
+ posix_inode_ctx_t *ctx = NULL;
+
+ filler = tmp;
+
+ optype = (gf_xattrop_flags_t)(filler->flags);
+ this = filler->this;
+ inode = filler->inode;
+ count = v->len;
+ if (optype == GF_XATTROP_ADD_ARRAY_WITH_DEFAULT ||
+ optype == GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT)
+ count = count / 2;
+
+ array = GF_CALLOC(count, sizeof(char), gf_posix_mt_char);
+
+#ifdef GF_DARWIN_HOST_OS
+ struct posix_private *priv = NULL;
+ priv = this->private;
+ if (priv->xattr_user_namespace == XATTR_STRIP) {
+ if (strncmp(k, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) == 0) {
+ k += XATTR_USER_PREFIX_LEN;
+ }
+ }
+#endif
+ op_ret = posix_inode_ctx_get_all(inode, this, &ctx);
+ if (op_ret < 0) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ pthread_mutex_lock(&ctx->xattrop_lock);
+ {
+ if (filler->real_path) {
+ size = sys_lgetxattr(filler->real_path, k, (char *)array, count);
+ } else {
+ size = sys_fgetxattr(filler->fdnum, k, (char *)array, count);
+ }
+
+ op_errno = errno;
+ if ((size == -1) && (op_errno != ENODATA) && (op_errno != ENOATTR)) {
+ if (op_errno == ENOTSUP) {
+ GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name,
+ GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported by filesystem");
+ } else if (op_errno != ENOENT ||
+ !posix_special_xattr(marker_xattrs, k)) {
+ if (filler->real_path)
+ gf_msg(this->name, fop_log_level(GF_FOP_XATTROP, op_errno),
+ op_errno, P_MSG_XATTR_FAILED,
+ "getxattr failed on %s while "
+ "doing xattrop: Key:%s ",
+ filler->real_path, k);
+ else
+ gf_msg(
+ this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED,
+ "fgetxattr failed on gfid=%s "
+ "while doing xattrop: "
+ "Key:%s (%s)",
+ uuid_utoa(filler->inode->gfid), k, strerror(op_errno));
+ }
+
+ op_ret = -1;
+ goto unlock;
+ }
+
+ if (size == -1 && optype == GF_XATTROP_GET_AND_SET) {
+ GF_FREE(array);
+ array = NULL;
+ }
+
+ /* We only write back the xattr if it has been really modified
+ * (i.e. v->data is not all 0's). Otherwise we return its value
+ * but we don't update anything.
+ *
+ * If the xattr does not exist, a value of all 0's is returned
+ * without creating it. */
+ size = count;
+ if (optype != GF_XATTROP_GET_AND_SET &&
+ mem_0filled(v->data, v->len) == 0)
+ goto unlock;
+
+ dst_data = array;
+ switch (optype) {
+ case GF_XATTROP_ADD_ARRAY:
+ __add_array((int32_t *)array, (int32_t *)v->data, count / 4);
+ break;
+
+ case GF_XATTROP_ADD_ARRAY64:
+ __add_long_array((int64_t *)array, (int64_t *)v->data,
+ count / 8);
+ break;
+
+ case GF_XATTROP_ADD_ARRAY_WITH_DEFAULT:
+ __add_array_with_default((int32_t *)array, (int32_t *)v->data,
+ count / 4);
+ break;
+
+ case GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT:
+ __add_long_array_with_default((int64_t *)array,
+ (int64_t *)v->data, count / 8);
+ break;
+
+ case GF_XATTROP_GET_AND_SET:
+ dst_data = v->data;
+ break;
+
+ default:
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_UNKNOWN_OP,
+ "Unknown xattrop type (%d)"
+ " on %s. Please send a bug report to "
+ "gluster-devel@gluster.org",
+ optype, filler->real_path);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unlock;
+ }
+
+ if (filler->real_path) {
+ size = sys_lsetxattr(filler->real_path, k, dst_data, count, 0);
+ } else {
+ size = sys_fsetxattr(filler->fdnum, k, (char *)dst_data, count, 0);
+ }
+ op_errno = errno;
+ }
+unlock:
+ pthread_mutex_unlock(&ctx->xattrop_lock);
+
+ if (op_ret == -1)
+ goto out;
+
+ if (size == -1) {
+ if (filler->real_path)
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED,
+ "setxattr failed on %s "
+ "while doing xattrop: key=%s",
+ filler->real_path, k);
+ else
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED,
+ "fsetxattr failed on gfid=%s while doing "
+ "xattrop: key=%s (%s)",
+ uuid_utoa(filler->inode->gfid), k, strerror(op_errno));
+ op_ret = -1;
+ goto out;
+ } else if (array) {
+ op_ret = dict_set_bin(filler->xattr, k, array, count);
+ if (op_ret) {
+ if (filler->real_path)
+ gf_msg_debug(this->name, 0,
+ "dict_set_bin failed (path=%s): "
+ "key=%s (%s)",
+ filler->real_path, k, strerror(-size));
+ else
+ gf_msg_debug(this->name, 0,
+ "dict_set_bin failed (gfid=%s): "
+ "key=%s (%s)",
+ uuid_utoa(filler->inode->gfid), k,
+ strerror(-size));
+
+ op_ret = -1;
+ op_errno = EINVAL;
+ GF_FREE(array);
+ array = NULL;
+ goto out;
+ }
+ array = NULL;
+ }
+
+out:
+ if (op_ret < 0)
+ filler->op_errno = op_errno;
+
+ if (array)
+ GF_FREE(array);
+
+ return op_ret;
+}
+
+/**
+ * xattrop - xattr operations - for internal use by GlusterFS
+ * @optype: ADD_ARRAY:
+ * dict should contain:
+ * "key" ==> array of 32-bit numbers
+ */
+
+int
+do_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ int op_ret = 0;
+ int op_errno = 0;
+ int _fd = -1;
+ char *real_path = NULL;
+ struct posix_fd *pfd = NULL;
+ inode_t *inode = NULL;
+ posix_xattr_filler_t filler = {
+ 0,
+ };
+ dict_t *xattr_rsp = NULL;
+ dict_t *xdata_rsp = NULL;
+ struct iatt stbuf = {0};
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(xattr, out);
+ VALIDATE_OR_GOTO(this, out);
+
+ if (fd) {
+ op_ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING,
+ fop_log_level(GF_FOP_FXATTROP, op_errno),
+ P_MSG_PFD_GET_FAILED,
+ "failed to get pfd from"
+ " fd=%p",
+ fd);
+ goto out;
+ }
+ _fd = pfd->fd;
+ }
+
+ if (loc && !gf_uuid_is_null(loc->gfid)) {
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+ if (!real_path) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+ }
+
+ if (real_path) {
+ inode = loc->inode;
+ } else if (fd) {
+ inode = fd->inode;
+ }
+
+ xattr_rsp = dict_new();
+ if (xattr_rsp == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ filler.this = this;
+ filler.fdnum = _fd;
+ filler.real_path = real_path;
+ filler.flags = (int)optype;
+ filler.inode = inode;
+ filler.xattr = xattr_rsp;
+
+ op_ret = dict_foreach(xattr, _posix_handle_xattr_keyvalue_pair, &filler);
+ op_errno = filler.op_errno;
+ if (op_ret < 0)
+ goto out;
+
+ if (!xdata)
+ goto out;
+
+ if (fd) {
+ op_ret = posix_fdstat(this, inode, _fd, &stbuf);
+ } else {
+ op_ret = posix_pstat(this, inode, inode->gfid, real_path, &stbuf,
+ _gf_false);
+ }
+ if (op_ret < 0) {
+ op_errno = errno;
+ goto out;
+ }
+ xdata_rsp = posix_xattr_fill(this, real_path, loc, fd, _fd, xdata, &stbuf);
+ if (!xdata_rsp) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+ posix_set_mode_in_dict(xdata, xdata_rsp, &stbuf);
+out:
+
+ STACK_UNWIND_STRICT(xattrop, frame, op_ret, op_errno, xattr_rsp, xdata_rsp);
+
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ if (xdata_rsp)
+ dict_unref(xdata_rsp);
+ return 0;
+}
+
+int
+posix_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ do_xattrop(frame, this, loc, NULL, optype, xattr, xdata);
+ return 0;
+}
+
+int
+posix_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ do_xattrop(frame, this, NULL, fd, optype, xattr, xdata);
+ return 0;
+}
+
+int
+posix_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ char *real_path = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(loc, out);
+
+ MAKE_INODE_HANDLE(real_path, this, loc, NULL);
+ if (!real_path) {
+ op_ret = -1;
+ op_errno = errno;
+ goto out;
+ }
+
+ op_ret = sys_access(real_path, mask & 07);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ACCESS_FAILED,
+ "access failed on %s", real_path);
+ goto out;
+ }
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(access, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd = -1;
+ struct iatt preop = {
+ 0,
+ };
+ struct iatt postop = {
+ 0,
+ };
+ struct posix_fd *pfd = NULL;
+ int ret = -1;
+ struct posix_private *priv = NULL;
+ dict_t *rsp_xdata = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL, fd=%p", fd);
+ goto out;
+ }
+
+ _fd = pfd->fd;
+
+ op_ret = posix_fdstat(this, fd->inode, _fd, &preop);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ if (xdata) {
+ op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata,
+ &rsp_xdata, _gf_false);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd);
+ op_errno = EIO;
+ goto out;
+ }
+ }
+
+ posix_update_iatt_buf(&preop, _fd, NULL, xdata);
+ op_ret = sys_ftruncate(_fd, offset);
+
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED,
+ "ftruncate failed on fd=%p (%" PRId64 "", fd, offset);
+ goto out;
+ }
+
+ op_ret = posix_fdstat(this, fd->inode, _fd, &postop);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &postop);
+
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, &preop, &postop,
+ NULL);
+
+ return 0;
+}
+
+int32_t
+posix_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int _fd = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ struct iatt buf = {
+ 0,
+ };
+ struct posix_fd *pfd = NULL;
+ dict_t *xattr_rsp = NULL;
+ int ret = -1;
+ struct posix_private *priv = NULL;
+
+ DECLARE_OLD_FS_ID_VAR;
+ SET_FS_ID(frame->root->uid, frame->root->gid);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+
+ if (!xdata)
+ gf_msg_trace(this->name, 0, "null xdata passed, fd %p", fd);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL, fd=%p", fd);
+ goto out;
+ }
+
+ _fd = pfd->fd;
+
+ op_ret = posix_fdstat(this, fd->inode, _fd, &buf);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ if (xdata) {
+ xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, _fd, xdata, &buf);
+
+ op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &buf, NULL, xdata,
+ &xattr_rsp, _gf_false);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd);
+ }
+ posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, _fd, NULL);
+ }
+
+ posix_update_iatt_buf(&buf, _fd, NULL, xdata);
+ op_ret = 0;
+
+out:
+ SET_TO_OLD_FS_ID();
+
+ STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, &buf, xattr_rsp);
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+ return 0;
+}
+
+int32_t
+posix_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
+{
+ struct gf_lease nullease = {
+ 0,
+ };
+
+ gf_msg(this->name, GF_LOG_CRITICAL, EINVAL, P_MSG_LEASE_DISABLED,
+ "\"features/leases\" translator is not loaded. You need"
+ "to use it for proper functioning of your application");
+
+ STACK_UNWIND_STRICT(lease, frame, -1, ENOSYS, &nullease, NULL);
+ return 0;
+}
+
+static int gf_posix_lk_log;
+
+int32_t
+posix_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ struct gf_flock nullock = {
+ 0,
+ };
+
+ GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
+
+ STACK_UNWIND_STRICT(lk, frame, -1, ENOSYS, &nullock, NULL);
+ return 0;
+}
+
+int32_t
+posix_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
+
+ STACK_UNWIND_STRICT(inodelk, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+int32_t
+posix_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
+
+ STACK_UNWIND_STRICT(finodelk, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+int32_t
+posix_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
+
+ STACK_UNWIND_STRICT(entrylk, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+int32_t
+posix_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL,
+ "\"features/locks\" translator is "
+ "not loaded. You need to use it for proper "
+ "functioning of your application.");
+
+ STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOSYS, NULL);
+ return 0;
+}
+
+int
+posix_fill_readdir(fd_t *fd, DIR *dir, off_t off, size_t size,
+ gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs)
+{
+ off_t in_case = -1;
+ off_t last_off = 0;
+ size_t filled = 0;
+ int count = 0;
+ int32_t this_size = -1;
+ gf_dirent_t *this_entry = NULL;
+ struct posix_fd *pfd = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ char *hpath = NULL;
+ int len = 0;
+ int ret = 0;
+ int op_errno = 0;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL, fd=%p", fd);
+ count = -1;
+ errno = op_errno;
+ goto out;
+ }
+
+ if (skip_dirs) {
+ hpath = alloca(PATH_MAX);
+ len = posix_handle_path(this, fd->inode->gfid, NULL, hpath, PATH_MAX);
+ if (len <= 0) {
+ errno = ESTALE;
+ count = -1;
+ goto out;
+ }
+ len = strlen(hpath);
+ hpath[len] = '/';
+ }
+
+ if (!off) {
+ rewinddir(dir);
+ } else {
+ seekdir(dir, off);
+#ifndef GF_LINUX_HOST_OS
+ if ((u_long)telldir(dir) != off && off != pfd->dir_eof) {
+ gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, P_MSG_DIR_OPERATION_FAILED,
+ "seekdir(0x%llx) failed on dir=%p: "
+ "Invalid argument (offset reused from "
+ "another DIR * structure?)",
+ off, dir);
+ errno = EINVAL;
+ count = -1;
+ goto out;
+ }
+#endif /* GF_LINUX_HOST_OS */
+ }
+
+ while (filled <= size) {
+ in_case = (u_long)telldir(dir);
+
+ if (in_case == -1) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, P_MSG_DIR_OPERATION_FAILED,
+ "telldir failed on dir=%p", dir);
+ goto out;
+ }
+
+ errno = 0;
+
+ entry = sys_readdir(dir, scratch);
+
+ if (!entry || errno != 0) {
+ if (errno == EBADF) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno,
+ P_MSG_DIR_OPERATION_FAILED, "readdir failed on dir=%p",
+ dir);
+ goto out;
+ }
+ break;
+ }
+
+#ifdef __NetBSD__
+ /*
+ * NetBSD with UFS1 backend uses backing files for
+ * extended attributes. They can be found in a
+ * .attribute file located at the root of the filesystem
+ * We hide it to glusterfs clients, since chaos will occur
+ * when the cluster/dht xlator decides to distribute
+ * exended attribute backing file across storage servers.
+ */
+ if (__is_root_gfid(fd->inode->gfid) == 0 &&
+ (!strcmp(entry->d_name, ".attribute")))
+ continue;
+#endif /* __NetBSD__ */
+
+ if (__is_root_gfid(fd->inode->gfid) &&
+ (!strcmp(GF_HIDDEN_PATH, entry->d_name))) {
+ continue;
+ }
+
+ if (skip_dirs) {
+ if (DT_ISDIR(entry->d_type)) {
+ continue;
+ } else if (hpath) {
+ strcpy(&hpath[len + 1], entry->d_name);
+ ret = sys_lstat(hpath, &stbuf);
+ if (!ret && S_ISDIR(stbuf.st_mode))
+ continue;
+ }
+ }
+
+ this_size = max(sizeof(gf_dirent_t), sizeof(gfs3_dirplist)) +
+ strlen(entry->d_name) + 1;
+
+ if (this_size + filled > size) {
+ seekdir(dir, in_case);
+#ifndef GF_LINUX_HOST_OS
+ if ((u_long)telldir(dir) != in_case && in_case != pfd->dir_eof) {
+ gf_msg(THIS->name, GF_LOG_ERROR, EINVAL,
+ P_MSG_DIR_OPERATION_FAILED,
+ "seekdir(0x%llx) failed on dir=%p: "
+ "Invalid argument (offset reused from "
+ "another DIR * structure?)",
+ in_case, dir);
+ errno = EINVAL;
+ count = -1;
+ goto out;
+ }
+#endif /* GF_LINUX_HOST_OS */
+ break;
+ }
+
+ this_entry = gf_dirent_for_name(entry->d_name);
+
+ if (!this_entry) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno,
+ P_MSG_GF_DIRENT_CREATE_FAILED,
+ "could not create "
+ "gf_dirent for entry %s",
+ entry->d_name);
+ goto out;
+ }
+ /*
+ * we store the offset of next entry here, which is
+ * probably not intended, but code using syncop_readdir()
+ * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it
+ * for directory read resumption.
+ */
+ last_off = (u_long)telldir(dir);
+ this_entry->d_off = last_off;
+ this_entry->d_ino = entry->d_ino;
+ this_entry->d_type = entry->d_type;
+
+ list_add_tail(&this_entry->list, &entries->list);
+
+ filled += this_size;
+ count++;
+ }
+
+ if ((!sys_readdir(dir, scratch) && (errno == 0))) {
+ /* Indicate EOF */
+ errno = ENOENT;
+ /* Remember EOF offset for later detection */
+ pfd->dir_eof = (u_long)last_off;
+ }
+out:
+ return count;
+}
+
+dict_t *
+posix_entry_xattr_fill(xlator_t *this, inode_t *inode, fd_t *fd,
+ char *entry_path, dict_t *dict, struct iatt *stbuf)
+{
+ loc_t tmp_loc = {
+ 0,
+ };
+
+ /* if we don't send the 'loc', open-fd-count be a problem. */
+ tmp_loc.inode = inode;
+
+ return posix_xattr_fill(this, entry_path, &tmp_loc, NULL, -1, dict, stbuf);
+}
+
+int
+posix_readdirp_fill(xlator_t *this, fd_t *fd, gf_dirent_t *entries,
+ dict_t *dict)
+{
+ gf_dirent_t *entry = NULL;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+ char *hpath = NULL;
+ int len = 0;
+ struct iatt stbuf = {
+ 0,
+ };
+ uuid_t gfid;
+ int ret = -1;
+
+ if (list_empty(&entries->list))
+ return 0;
+
+ itable = fd->inode->table;
+
+ hpath = alloca(PATH_MAX);
+ len = posix_handle_path(this, fd->inode->gfid, NULL, hpath, PATH_MAX);
+ if (len <= 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLEPATH_FAILED,
+ "Failed to create handle path, fd=%p, gfid=%s", fd,
+ uuid_utoa(fd->inode->gfid));
+ return -1;
+ }
+ len = strlen(hpath);
+ hpath[len] = '/';
+
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ inode = inode_grep(fd->inode->table, fd->inode, entry->d_name);
+ if (inode)
+ gf_uuid_copy(gfid, inode->gfid);
+ else
+ bzero(gfid, 16);
+
+ strcpy(&hpath[len + 1], entry->d_name);
+
+ ret = posix_pstat(this, inode, gfid, hpath, &stbuf, _gf_false);
+
+ if (ret == -1) {
+ if (inode)
+ inode_unref(inode);
+ continue;
+ }
+
+ posix_update_iatt_buf(&stbuf, -1, hpath, dict);
+
+ if (!inode)
+ inode = inode_find(itable, stbuf.ia_gfid);
+
+ if (!inode)
+ inode = inode_new(itable);
+
+ entry->inode = inode;
+
+ if (dict) {
+ entry->dict = posix_entry_xattr_fill(this, entry->inode, fd, hpath,
+ dict, &stbuf);
+ }
+
+ entry->d_stat = stbuf;
+ if (stbuf.ia_ino)
+ entry->d_ino = stbuf.ia_ino;
+
+ if (entry->d_type == DT_UNKNOWN && !IA_ISINVAL(stbuf.ia_type)) {
+ /* The platform supports d_type but the underlying
+ filesystem doesn't. We set d_type to the correct
+ value from ia_type */
+ entry->d_type = gf_d_type_from_ia_type(stbuf.ia_type);
+ }
+
+ inode = NULL;
+ }
+
+ return 0;
+}
+
+int32_t
+posix_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, int whichop, dict_t *dict)
+{
+ struct posix_fd *pfd = NULL;
+ DIR *dir = NULL;
+ int ret = -1;
+ int count = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ gf_dirent_t entries;
+ int32_t skip_dirs = 0;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ INIT_LIST_HEAD(&entries.list);
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL,
+ "pfd is NULL, fd=%p", fd);
+ goto out;
+ }
+
+ dir = pfd->dir;
+
+ if (!dir) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, P_MSG_PFD_NULL,
+ "dir is NULL for fd=%p", fd);
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /* When READDIR_FILTER option is set to on, we can filter out
+ * directory's entry from the entry->list.
+ */
+ ret = dict_get_int32(dict, GF_READDIR_SKIP_DIRS, &skip_dirs);
+
+ LOCK(&fd->lock);
+ {
+ /* posix_fill_readdir performs multiple separate individual
+ readdir() calls to fill up the buffer.
+
+ In case of NFS where the same anonymous FD is shared between
+ different applications, reading a common directory can
+ result in the anonymous fd getting re-used unsafely between
+ the two readdir requests (in two different io-threads).
+
+ It would also help, in the future, to replace the loop
+ around readdir() with a single large getdents() call.
+ */
+ count = posix_fill_readdir(fd, dir, off, size, &entries, this,
+ skip_dirs);
+ }
+ UNLOCK(&fd->lock);
+
+ /* pick ENOENT to indicate EOF */
+ op_errno = errno;
+ op_ret = count;
+
+ if (whichop != GF_FOP_READDIRP)
+ goto out;
+
+ posix_readdirp_fill(this, fd, &entries, dict);
+
+out:
+ if (whichop == GF_FOP_READDIR)
+ STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, &entries, NULL);
+ else
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, &entries, NULL);
+
+ gf_dirent_free(&entries);
+
+ return 0;
+}
+
+int32_t
+posix_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ posix_do_readdir(frame, this, fd, size, off, GF_FOP_READDIR, xdata);
+ return 0;
+}
+
+int32_t
+posix_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict)
+{
+ gf_dirent_t entries;
+ int32_t op_ret = -1, op_errno = 0;
+ gf_dirent_t *entry = NULL;
+
+ if ((dict != NULL) && (dict_get(dict, GET_ANCESTRY_DENTRY_KEY))) {
+ INIT_LIST_HEAD(&entries.list);
+
+ op_ret = posix_get_ancestry(this, fd->inode, &entries, NULL,
+ POSIX_ANCESTRY_DENTRY, &op_errno, dict);
+ if (op_ret >= 0) {
+ op_ret = 0;
+
+ list_for_each_entry(entry, &entries.list, list) { op_ret++; }
+ }
+
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, &entries, NULL);
+
+ gf_dirent_free(&entries);
+ return 0;
+ }
+
+ posix_do_readdir(frame, this, fd, size, off, GF_FOP_READDIRP, dict);
+ return 0;
+}
+
+int32_t
+posix_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
+{
+ char *alloc_buf = NULL;
+ char *buf = NULL;
+ int _fd = -1;
+ struct posix_fd *pfd = NULL;
+ int op_ret = -1;
+ int op_errno = 0;
+ int ret = 0;
+ ssize_t bytes_read = 0;
+ int32_t weak_checksum = 0;
+ int32_t zerofillcheck = 0;
+ /* Protocol version 4 uses 32 bytes i.e SHA256_DIGEST_LENGTH,
+ so this is used. */
+ unsigned char md5_checksum[SHA256_DIGEST_LENGTH] = {0};
+ unsigned char strong_checksum[SHA256_DIGEST_LENGTH] = {0};
+ unsigned char *checksum = NULL;
+ struct posix_private *priv = NULL;
+ dict_t *rsp_xdata = NULL;
+ gf_boolean_t buf_has_zeroes = _gf_false;
+ struct iatt preop = {
+ 0,
+ };
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd, out);
+
+ priv = this->private;
+
+ alloc_buf = _page_aligned_alloc(len, &buf);
+ if (!alloc_buf) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ rsp_xdata = dict_new();
+ if (!rsp_xdata) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_PFD_NULL,
+ "pfd is NULL, fd=%p", fd);
+ goto out;
+ }
+
+ _fd = pfd->fd;
+
+ if (xdata) {
+ op_ret = posix_fdstat(this, fd->inode, _fd, &preop);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd);
+ goto out;
+ }
+
+ op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata,
+ &rsp_xdata, _gf_false);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd);
+ op_errno = EIO;
+ goto out;
+ }
+ }
+
+ LOCK(&fd->lock);
+ {
+ if (priv->aio_capable && priv->aio_init_done)
+ __posix_fd_set_odirect(fd, pfd, 0, offset, len);
+
+ bytes_read = sys_pread(_fd, buf, len, offset);
+ if (bytes_read < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PREAD_FAILED,
+ "pread of %d bytes returned %zd", len, bytes_read);
+
+ op_errno = errno;
+ }
+ }
+ UNLOCK(&fd->lock);
+
+ if (bytes_read < 0)
+ goto out;
+
+ if (xdata &&
+ dict_get_int32(xdata, "check-zero-filled", &zerofillcheck) == 0) {
+ buf_has_zeroes = (mem_0filled(buf, bytes_read)) ? _gf_false : _gf_true;
+ ret = dict_set_uint32(rsp_xdata, "buf-has-zeroes", buf_has_zeroes);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED,
+ "%s: Failed to set "
+ "dictionary value for key: %s",
+ uuid_utoa(fd->inode->gfid), "buf-has-zeroes");
+ op_errno = -ret;
+ goto out;
+ }
+ }
+ weak_checksum = gf_rsync_weak_checksum((unsigned char *)buf, (size_t)ret);
+
+ if (priv->fips_mode_rchecksum) {
+ ret = dict_set_int32(rsp_xdata, "fips-mode-rchecksum", 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED,
+ "%s: Failed to set "
+ "dictionary value for key: %s",
+ uuid_utoa(fd->inode->gfid), "fips-mode-rchecksum");
+ goto out;
+ }
+ checksum = strong_checksum;
+ gf_rsync_strong_checksum((unsigned char *)buf, (size_t)bytes_read,
+ (unsigned char *)checksum);
+ } else {
+ checksum = md5_checksum;
+ gf_rsync_md5_checksum((unsigned char *)buf, (size_t)bytes_read,
+ (unsigned char *)checksum);
+ }
+ op_ret = 0;
+
+ posix_set_ctime(frame, this, NULL, _fd, fd->inode, NULL);
+
+out:
+ STACK_UNWIND_STRICT(rchecksum, frame, op_ret, op_errno, weak_checksum,
+ checksum, rsp_xdata);
+ if (rsp_xdata)
+ dict_unref(rsp_xdata);
+ GF_FREE(alloc_buf);
+
+ return 0;
+}
+
+int
+posix_forget(xlator_t *this, inode_t *inode)
+{
+ int ret = 0;
+ char *unlink_path = NULL;
+ uint64_t ctx_uint1 = 0;
+ uint64_t ctx_uint2 = 0;
+ posix_inode_ctx_t *ctx = NULL;
+ posix_mdata_t *mdata = NULL;
+ struct posix_private *priv_posix = NULL;
+
+ priv_posix = (struct posix_private *)this->private;
+ if (!priv_posix)
+ return 0;
+
+ ret = inode_ctx_del2(inode, this, &ctx_uint1, &ctx_uint2);
+ if (!ctx_uint1)
+ goto check_ctx2;
+
+ ctx = (posix_inode_ctx_t *)(uintptr_t)ctx_uint1;
+
+ if (ctx->unlink_flag == GF_UNLINK_TRUE) {
+ POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path, inode->gfid,
+ unlink_path);
+ if (!unlink_path) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED,
+ "Failed to remove gfid :%s", uuid_utoa(inode->gfid));
+ ret = -1;
+ goto ctx_free;
+ }
+ ret = sys_unlink(unlink_path);
+ }
+ctx_free:
+ pthread_mutex_destroy(&ctx->xattrop_lock);
+ pthread_mutex_destroy(&ctx->write_atomic_lock);
+ pthread_mutex_destroy(&ctx->pgfid_lock);
+ GF_FREE(ctx);
+
+check_ctx2:
+ if (ctx_uint2) {
+ mdata = (posix_mdata_t *)(uintptr_t)ctx_uint2;
+ }
+
+ GF_FREE(mdata);
+ return ret;
+}