diff options
Diffstat (limited to 'xlators/storage/posix/src/posix-inode-fd-ops.c')
| -rw-r--r-- | xlators/storage/posix/src/posix-inode-fd-ops.c | 6004 |
1 files changed, 6004 insertions, 0 deletions
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c new file mode 100644 index 00000000000..6d54d37e5aa --- /dev/null +++ b/xlators/storage/posix/src/posix-inode-fd-ops.c @@ -0,0 +1,6004 @@ +/* + Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#define __XOPEN_SOURCE 500 + +/* for SEEK_HOLE and SEEK_DATA */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <openssl/md5.h> +#include <stdint.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <errno.h> +#include <libgen.h> +#include <pthread.h> +#include <ftw.h> +#include <sys/stat.h> +#include <signal.h> +#include <sys/uio.h> +#include <unistd.h> +#include <regex.h> + +#ifndef GF_BSD_HOST_OS +#include <alloca.h> +#endif /* GF_BSD_HOST_OS */ + +#ifdef HAVE_LINKAT +#include <fcntl.h> +#endif /* HAVE_LINKAT */ + +#include <glusterfs/checksum.h> +#include <glusterfs/dict.h> +#include <glusterfs/logging.h> +#include "posix-handle.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/compat.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/syscall.h> +#include <glusterfs/statedump.h> +#include <glusterfs/locking.h> +#include <glusterfs/timer.h> +#include "glusterfs3-xdr.h" +#include <glusterfs/glusterfs-acl.h> +#include "posix-messages.h" +#include "posix-metadata.h" +#include <glusterfs/events.h> +#include "posix-gfid-path.h" +#include <glusterfs/compat-uuid.h> +#include <glusterfs/common-utils.h> + +extern char *marker_xattrs[]; +#define ALIGN_SIZE 4096 + +#undef HAVE_SET_FSID +#ifdef HAVE_SET_FSID + +#define DECLARE_OLD_FS_ID_VAR \ + uid_t old_fsuid; \ + gid_t old_fsgid; + +#define SET_FS_ID(uid, gid) \ + do { \ + old_fsuid = setfsuid(uid); \ + old_fsgid = setfsgid(gid); \ + } while (0) + +#define SET_TO_OLD_FS_ID() \ + do { \ + setfsuid(old_fsuid); \ + setfsgid(old_fsgid); \ + } while (0) + +#else + +#define DECLARE_OLD_FS_ID_VAR +#define SET_FS_ID(uid, gid) +#define SET_TO_OLD_FS_ID() + +#endif + +/* Setting microseconds or nanoseconds depending on what's supported: + The passed in `tv` can be + struct timespec + if supported (better, because it supports nanosecond resolution) or + struct timeval + otherwise. */ +#if HAVE_UTIMENSAT +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) tv.tv_nsec = nanosecs +#else +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ + tv.tv_usec = nanosecs / 1000 +#endif + +static char *disallow_removexattrs[] = {GF_XATTR_VOL_ID_KEY, GFID_XATTR_KEY, + NULL}; + +void +posix_cs_build_xattr_rsp(xlator_t *this, dict_t **rsp, dict_t *req, int fd, + char *loc) +{ + int ret = 0; + uuid_t uuid; + + if (!dict_get_sizen(req, GF_CS_OBJECT_STATUS)) + return; + + if (!(*rsp)) { + *rsp = dict_new(); + if (!(*rsp)) { + return; + } + } + + if (fd != -1) { + if (dict_get_sizen(req, GF_CS_XATTR_ARCHIVE_UUID)) { + ret = sys_fgetxattr(fd, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16); + if (ret > 0) { + ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid, + true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for %s for fd %d", + uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, fd); + } + } else { + gf_msg_debug(this->name, 0, "getxattr failed on %s for fd %d", + GF_CS_XATTR_ARCHIVE_UUID, fd); + } + } + } else { + if (dict_get_sizen(req, GF_CS_XATTR_ARCHIVE_UUID)) { + ret = sys_lgetxattr(loc, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16); + if (ret > 0) { + ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid, + true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for %s for loc %s", + uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, loc); + } + } else { + gf_msg_debug(this->name, 0, "getxattr failed on %s for %s", + GF_CS_XATTR_ARCHIVE_UUID, loc); + } + } + } + return; +} + +int32_t +posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + struct iatt buf = { + 0, + }; + int32_t op_ret = -1; + int32_t op_errno = 0; + struct posix_private *priv = NULL; + char *real_path = NULL; + dict_t *xattr_rsp = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + + MAKE_INODE_HANDLE(real_path, this, loc, &buf); + + if (op_ret == -1) { + op_errno = errno; + if (op_errno == ENOENT) { + gf_msg_debug(this->name, 0, + "lstat on gfid-handle %s (path: %s)" + "failed: %s", + real_path ? real_path : "<null>", loc->path, + strerror(op_errno)); + } else { + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_LSTAT_FAILED, + "lstat on gfid-handle %s (path: %s) failed", + real_path ? real_path : "<null>", loc->path); + } + goto out; + } + if (xdata) { + xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, + &buf); + + posix_cs_maintenance(this, NULL, loc, NULL, &buf, real_path, xdata, + &xattr_rsp, _gf_true); + + posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, -1, real_path); + } + + posix_update_iatt_buf(&buf, -1, real_path, xdata); + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, &buf, xattr_rsp); + if (xattr_rsp) + dict_unref(xattr_rsp); + + return 0; +} + +static int +posix_do_chmod(xlator_t *this, const char *path, struct iatt *stbuf) +{ + int32_t ret = -1; + mode_t mode = 0; + mode_t mode_bit = 0; + struct posix_private *priv = NULL; + struct stat stat; + int is_symlink = 0; + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + ret = sys_lstat(path, &stat); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_LSTAT_FAILED, + "lstat failed: %s", path); + goto out; + } + + if (S_ISLNK(stat.st_mode)) + is_symlink = 1; + + if (S_ISDIR(stat.st_mode)) { + mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type); + mode_bit = (mode & priv->create_directory_mask) | + priv->force_directory_mode; + mode = posix_override_umask(mode, mode_bit); + } else { + mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type); + mode_bit = (mode & priv->create_mask) | priv->force_create_mode; + mode = posix_override_umask(mode, mode_bit); + } + ret = lchmod(path, mode); + if ((ret == -1) && (errno == ENOSYS)) { + /* in Linux symlinks are always in mode 0777 and no + such call as lchmod exists. + */ + gf_msg_debug(this->name, 0, "%s (%s)", path, strerror(errno)); + if (is_symlink) { + ret = 0; + goto out; + } + + ret = sys_chmod(path, mode); + } +out: + return ret; +} + +static int +posix_do_chown(xlator_t *this, const char *path, struct iatt *stbuf, + int32_t valid) +{ + int32_t ret = -1; + uid_t uid = -1; + gid_t gid = -1; + + if (valid & GF_SET_ATTR_UID) + uid = stbuf->ia_uid; + + if (valid & GF_SET_ATTR_GID) + gid = stbuf->ia_gid; + + ret = sys_lchown(path, uid, gid); + + return ret; +} + +static int +posix_do_utimes(xlator_t *this, const char *path, struct iatt *stbuf, int valid) +{ + int32_t ret = -1; +#if defined(HAVE_UTIMENSAT) + struct timespec tv[2] = {{ + 0, + }, + { + 0, + }}; +#else + struct timeval tv[2] = {{ + 0, + }, + { + 0, + }}; +#endif + struct stat stat; + int is_symlink = 0; + + ret = sys_lstat(path, &stat); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, "%s", + path); + goto out; + } + + if (S_ISLNK(stat.st_mode)) + is_symlink = 1; + + if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { + tv[0].tv_sec = stbuf->ia_atime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[0], stbuf->ia_atime_nsec); + } else { + /* atime is not given, use current values */ + tv[0].tv_sec = ST_ATIM_SEC(&stat); + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[0], ST_ATIM_NSEC(&stat)); + } + + if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { + tv[1].tv_sec = stbuf->ia_mtime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[1], stbuf->ia_mtime_nsec); + } else { + /* mtime is not given, use current values */ + tv[1].tv_sec = ST_MTIM_SEC(&stat); + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[1], ST_MTIM_NSEC(&stat)); + } + + ret = PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv); + if ((ret == -1) && (errno == ENOSYS)) { + gf_msg_debug(this->name, 0, "%s (%s)", path, strerror(errno)); + if (is_symlink) { + ret = 0; + goto out; + } + + ret = PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv); + } + +out: + return ret; +} + +int +posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = 0; + struct iatt statpre = { + 0, + }; + struct iatt statpost = { + 0, + }; + dict_t *xattr_rsp = NULL; + struct posix_private *priv = NULL; + + priv = this->private; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + MAKE_INODE_HANDLE(real_path, this, loc, &statpre); + + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "setattr (lstat) on gfid-handle %s (path: %s) failed", + real_path ? real_path : "<null>", loc->path); + goto out; + } + + if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) { + op_ret = posix_do_chown(this, real_path, stbuf, valid); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHOWN_FAILED, + "setattr (chown) on %s " + "failed", + real_path); + goto out; + } + } + + if (valid & GF_SET_ATTR_MODE) { + op_ret = posix_do_chmod(this, real_path, stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHMOD_FAILED, + "setattr (chmod) on gfid-handle %s (path: %s) " + "failed", + real_path, loc->path); + goto out; + } + } + + if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { + op_ret = posix_do_utimes(this, real_path, stbuf, valid); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UTIMES_FAILED, + "setattr (utimes) on gfid-handle %s (path: %s) " + "failed", + real_path, loc->path); + goto out; + } + posix_update_utime_in_mdata(this, real_path, -1, loc->inode, + &frame->root->ctime, stbuf, valid); + } + + if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) { + posix_update_ctime_in_mdata(this, real_path, -1, loc->inode, + &frame->root->ctime, stbuf, valid); + } + + if (!valid) { + op_ret = sys_lchown(real_path, -1, -1); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED, + "lchown (gfid-handle: %s, path: %s, -1, -1) " + "failed", + real_path, loc->path); + + goto out; + } + } + + op_ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &statpost, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "setattr (lstat) on gfid-handle %s (path: %s) failed", real_path, + loc->path); + goto out; + } + + posix_set_ctime(frame, this, real_path, -1, loc->inode, &statpost); + + if (xdata) + xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, + &statpost); + posix_update_iatt_buf(&statpre, -1, real_path, xdata); + posix_update_iatt_buf(&statpost, -1, real_path, xdata); + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, &statpre, &statpost, + xattr_rsp); + if (xattr_rsp) + dict_unref(xattr_rsp); + + return 0; +} + +int32_t +posix_do_fchown(xlator_t *this, int fd, struct iatt *stbuf, int32_t valid) +{ + int ret = -1; + uid_t uid = -1; + gid_t gid = -1; + + if (valid & GF_SET_ATTR_UID) + uid = stbuf->ia_uid; + + if (valid & GF_SET_ATTR_GID) + gid = stbuf->ia_gid; + + ret = sys_fchown(fd, uid, gid); + + return ret; +} + +int32_t +posix_do_fchmod(xlator_t *this, int fd, struct iatt *stbuf) +{ + int32_t ret = -1; + mode_t mode = 0; + mode_t mode_bit = 0; + struct posix_private *priv = NULL; + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + mode = st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type); + mode_bit = (mode & priv->create_mask) | priv->force_create_mode; + mode = posix_override_umask(mode, mode_bit); + ret = sys_fchmod(fd, mode); +out: + return ret; +} + +static int +posix_do_futimes(xlator_t *this, int fd, struct iatt *stbuf, int valid) +{ + int32_t ret = -1; + struct timeval tv[2] = {{ + 0, + }, + { + 0, + }}; + struct stat stat = { + 0, + }; + gf_boolean_t fstat_executed = _gf_false; + + if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { + tv[0].tv_sec = stbuf->ia_atime; + tv[0].tv_usec = stbuf->ia_atime_nsec / 1000; + } else { + ret = sys_fstat(fd, &stat); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, + "%d", fd); + goto out; + } + fstat_executed = _gf_true; + /* atime is not given, use current values */ + tv[0].tv_sec = ST_ATIM_SEC(&stat); + tv[0].tv_usec = ST_ATIM_NSEC(&stat) / 1000; + } + + if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { + tv[1].tv_sec = stbuf->ia_mtime; + tv[1].tv_usec = stbuf->ia_mtime_nsec / 1000; + } else { + if (!fstat_executed) { + ret = sys_fstat(fd, &stat); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, + "%d", fd); + goto out; + } + } + /* mtime is not given, use current values */ + tv[1].tv_sec = ST_MTIM_SEC(&stat); + tv[1].tv_usec = ST_MTIM_NSEC(&stat) / 1000; + } + + ret = sys_futimes(fd, tv); + if (ret == -1) + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FUTIMES_FAILED, "%d", fd); + +out: + return ret; +} + +int +posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + struct iatt statpre = { + 0, + }; + struct iatt statpost = { + 0, + }; + struct posix_private *priv = NULL; + struct posix_fd *pfd = NULL; + dict_t *xattr_rsp = NULL; + int32_t ret = -1; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd); + goto out; + } + + op_ret = posix_fdstat(this, fd->inode, pfd->fd, &statpre); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fsetattr (fstat) failed on fd=%p", fd); + goto out; + } + + if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) { + op_ret = posix_do_fchown(this, pfd->fd, stbuf, valid); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHOWN_FAILED, + "fsetattr (fchown) failed" + " on fd=%p", + fd); + goto out; + } + } + + if (valid & GF_SET_ATTR_MODE) { + op_ret = posix_do_fchmod(this, pfd->fd, stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHMOD_FAILED, + "fsetattr (fchmod) failed" + " on fd=%p", + fd); + goto out; + } + } + + if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { + op_ret = posix_do_futimes(this, pfd->fd, stbuf, valid); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FUTIMES_FAILED, + "fsetattr (futimes) on " + "failed fd=%p", + fd); + goto out; + } + posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode, + &frame->root->ctime, stbuf, valid); + } + + if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) { + posix_update_ctime_in_mdata(this, NULL, pfd->fd, fd->inode, + &frame->root->ctime, stbuf, valid); + } + + if (!valid) { + op_ret = sys_fchown(pfd->fd, -1, -1); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FCHOWN_FAILED, + "fchown (%d, -1, -1) failed", pfd->fd); + + goto out; + } + } + + op_ret = posix_fdstat(this, fd->inode, pfd->fd, &statpost); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fsetattr (fstat) failed on fd=%p", fd); + goto out; + } + + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &statpost); + + if (xdata) + xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, pfd->fd, xdata, + &statpost); + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, &statpre, &statpost, + xattr_rsp); + if (xattr_rsp) + dict_unref(xattr_rsp); + + return 0; +} + +static int32_t +posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + off_t offset, size_t len, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata, dict_t **rsp_xdata) +{ + int32_t ret = -1; + int32_t op_errno = 0; + struct posix_fd *pfd = NULL; + gf_boolean_t locked = _gf_false; + posix_inode_ctx_t *ctx = NULL; + struct posix_private *priv = NULL; + gf_boolean_t check_space_error = _gf_false; + struct stat statbuf = { + 0, + }; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + priv = this->private; + + /* fallocate case is special so call posix_disk_space_check separately + for every fallocate fop instead of calling posix_disk_space with + thread after every 5 sec sleep to working correctly storage.reserve + option behaviour + */ + if (priv->disk_reserve) + posix_disk_space_check(this); + + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, unlock); + +overwrite: + check_space_error = _gf_true; + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd); + goto out; + } + + ret = posix_inode_ctx_get_all(fd->inode, this, &ctx); + if (ret < 0) { + ret = -ENOMEM; + goto out; + } + + if (xdata && dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) { + locked = _gf_true; + pthread_mutex_lock(&ctx->write_atomic_lock); + } + + ret = posix_fdstat(this, fd->inode, pfd->fd, statpre); + if (ret == -1) { + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fallocate (fstat) failed on fd=%p", fd); + goto unlock; + } + + if (xdata) { + ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, statpre, NULL, + xdata, rsp_xdata, _gf_false); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + ret = -EIO; + goto unlock; + } + } + + ret = sys_fallocate(pfd->fd, flags, offset, len); + if (ret == -1) { + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, -ret, P_MSG_FALLOCATE_FAILED, + "fallocate failed on %s offset: %jd, " + "len:%zu, flags: %d", + uuid_utoa(fd->inode->gfid), offset, len, flags); + goto unlock; + } + + ret = posix_fdstat(this, fd->inode, pfd->fd, statpost); + if (ret == -1) { + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fallocate (fstat) failed on fd=%p", fd); + goto unlock; + } + + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost); + +unlock: + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { +#ifdef FALLOC_FL_KEEP_SIZE + if (flags & FALLOC_FL_KEEP_SIZE) { + goto overwrite; + } +#endif + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + if (sys_fstat(pfd->fd, &statbuf) < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, + "%d", pfd->fd); + goto out; + } + + if (offset + len <= statbuf.st_size) { + gf_msg_debug(this->name, 0, + "io vector size will not" + " change disk size so allow overwrite for" + " fd %d", + pfd->fd); + goto overwrite; + } + } + +out: + SET_TO_OLD_FS_ID(); + if (ret == ENOSPC) + ret = -ENOSPC; + + return ret; +} + +char * +_page_aligned_alloc(size_t size, char **aligned_buf) +{ + char *alloc_buf = NULL; + char *buf = NULL; + + alloc_buf = GF_CALLOC(1, (size + ALIGN_SIZE), gf_posix_mt_char); + if (!alloc_buf) + goto out; + /* page aligned buffer */ + buf = GF_ALIGN_BUF(alloc_buf, ALIGN_SIZE); + *aligned_buf = buf; +out: + return alloc_buf; +} + +static int32_t +_posix_do_zerofill(int fd, off_t offset, off_t len, int o_direct) +{ + off_t num_vect = 0; + off_t num_loop = 1; + off_t idx = 0; + int32_t op_ret = -1; + int32_t vect_size = VECTOR_SIZE; + off_t remain = 0; + off_t extra = 0; + struct iovec *vector = NULL; + char *iov_base = NULL; + char *alloc_buf = NULL; + + if (len == 0) + return 0; + if (len < VECTOR_SIZE) + vect_size = len; + + num_vect = len / (vect_size); + remain = len % vect_size; + if (num_vect > MAX_NO_VECT) { + extra = num_vect % MAX_NO_VECT; + num_loop = num_vect / MAX_NO_VECT; + num_vect = MAX_NO_VECT; + } + + vector = GF_CALLOC(num_vect, sizeof(struct iovec), gf_common_mt_iovec); + if (!vector) + return -1; + if (o_direct) { + alloc_buf = _page_aligned_alloc(vect_size, &iov_base); + if (!alloc_buf) { + GF_FREE(vector); + return -1; + } + } else { + iov_base = GF_CALLOC(vect_size, sizeof(char), gf_common_mt_char); + if (!iov_base) { + GF_FREE(vector); + return -1; + } + } + + for (idx = 0; idx < num_vect; idx++) { + vector[idx].iov_base = iov_base; + vector[idx].iov_len = vect_size; + } + if (sys_lseek(fd, offset, SEEK_SET) < 0) { + op_ret = -1; + goto err; + } + + for (idx = 0; idx < num_loop; idx++) { + op_ret = sys_writev(fd, vector, num_vect); + if (op_ret < 0) + goto err; + if (op_ret != (vect_size * num_vect)) { + op_ret = -1; + errno = ENOSPC; + goto err; + } + } + if (extra) { + op_ret = sys_writev(fd, vector, extra); + if (op_ret < 0) + goto err; + if (op_ret != (vect_size * extra)) { + op_ret = -1; + errno = ENOSPC; + goto err; + } + } + if (remain) { + vector[0].iov_len = remain; + op_ret = sys_writev(fd, vector, 1); + if (op_ret < 0) + goto err; + if (op_ret != remain) { + op_ret = -1; + errno = ENOSPC; + goto err; + } + } +err: + if (o_direct) + GF_FREE(alloc_buf); + else + GF_FREE(iov_base); + GF_FREE(vector); + return op_ret; +} + +static int32_t +posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, struct iatt *statpre, struct iatt *statpost, + dict_t *xdata, dict_t **rsp_xdata) +{ + int32_t ret = -1; + int32_t op_errno = 0; + int32_t flags = 0; + struct posix_fd *pfd = NULL; + gf_boolean_t locked = _gf_false; + posix_inode_ctx_t *ctx = NULL; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd); + goto out; + } + + ret = posix_inode_ctx_get_all(fd->inode, this, &ctx); + if (ret < 0) { + ret = -ENOMEM; + goto out; + } + + if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) { + locked = _gf_true; + pthread_mutex_lock(&ctx->write_atomic_lock); + } + + ret = posix_fdstat(this, fd->inode, pfd->fd, statpre); + if (ret == -1) { + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd = %p", fd); + goto out; + } + + if (xdata) { + ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, statpre, NULL, + xdata, rsp_xdata, _gf_false); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state " + "check failed, fd %p", + fd); + ret = -EIO; + goto out; + } + } + + posix_update_iatt_buf(statpre, pfd->fd, NULL, xdata); + /* See if we can use FALLOC_FL_ZERO_RANGE to perform the zero fill. + * If it fails, fall back to _posix_do_zerofill() and an optional fsync. + */ + flags = FALLOC_FL_ZERO_RANGE; + ret = sys_fallocate(pfd->fd, flags, offset, len); + if (ret == 0) { + goto fsync; + } else { + ret = -errno; + if ((ret != -ENOSYS) && (ret != -EOPNOTSUPP)) { + goto out; + } + } + + ret = _posix_do_zerofill(pfd->fd, offset, len, pfd->flags & O_DIRECT); + if (ret < 0) { + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ZEROFILL_FAILED, + "zerofill failed on fd %d length %" PRId64, pfd->fd, len); + goto out; + } + +fsync: + if (pfd->flags & (O_SYNC | O_DSYNC)) { + ret = sys_fsync(pfd->fd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_WRITEV_FAILED, + "fsync() in writev on fd" + "%d failed", + pfd->fd); + ret = -errno; + goto out; + } + } + + ret = posix_fdstat(this, fd->inode, pfd->fd, statpost); + if (ret == -1) { + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post operation fstat failed on fd=%p", fd); + goto out; + } + + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost); + +out: + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + SET_TO_OLD_FS_ID(); + + return ret; +} + +int32_t +posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t keep_size, off_t offset, size_t len, dict_t *xdata) +{ + int32_t ret; + int32_t flags = 0; + struct iatt statpre = { + 0, + }; + struct iatt statpost = { + 0, + }; + dict_t *rsp_xdata = NULL; + +#ifdef FALLOC_FL_KEEP_SIZE + if (keep_size) + flags = FALLOC_FL_KEEP_SIZE; +#endif /* FALLOC_FL_KEEP_SIZE */ + + ret = posix_do_fallocate(frame, this, fd, flags, offset, len, &statpre, + &statpost, xdata, &rsp_xdata); + if (ret < 0) + goto err; + + STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, rsp_xdata); + return 0; + +err: + STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, rsp_xdata); + return 0; +} + +int32_t +posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + int32_t ret; + dict_t *rsp_xdata = NULL; +#ifndef FALLOC_FL_KEEP_SIZE + ret = EOPNOTSUPP; + +#else /* FALLOC_FL_KEEP_SIZE */ + int32_t flags = FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE; + struct iatt statpre = { + 0, + }; + struct iatt statpost = { + 0, + }; + + ret = posix_do_fallocate(frame, this, fd, flags, offset, len, &statpre, + &statpost, xdata, &rsp_xdata); + if (ret < 0) + goto err; + + STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, rsp_xdata); + return 0; + +err: +#endif /* FALLOC_FL_KEEP_SIZE */ + STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, rsp_xdata); + return 0; +} + +int32_t +posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) +{ + int32_t ret = 0; + struct iatt statpre = { + 0, + }; + struct iatt statpost = { + 0, + }; + struct posix_private *priv = NULL; + int op_ret = -1; + int op_errno = EINVAL; + dict_t *rsp_xdata = NULL; + gf_boolean_t check_space_error = _gf_false; + struct posix_fd *pfd = NULL; + struct stat statbuf = { + 0, + }; + + VALIDATE_OR_GOTO(frame, unwind); + VALIDATE_OR_GOTO(this, unwind); + + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + +overwrite: + check_space_error = _gf_true; + ret = posix_do_zerofill(frame, this, fd, offset, len, &statpre, &statpost, + xdata, &rsp_xdata); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + goto unwind; + } + + STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, rsp_xdata); + return 0; + +out: + if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + if (sys_fstat(pfd->fd, &statbuf) < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, + "%d", pfd->fd); + goto out; + } + + if (offset + len <= statbuf.st_size) { + gf_msg_debug(this->name, 0, + "io vector size will not" + " change disk size so allow overwrite for" + " fd %d", + pfd->fd); + goto overwrite; + } + } + +unwind: + STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL, + rsp_xdata); + return 0; +} + +int32_t +posix_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +{ + /* + * IPC is for inter-translator communication. If one gets here, it + * means somebody sent one that nobody else recognized, which is an + * error much like an uncaught exception. + */ + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_IPC_NOT_HANDLE, + "GF_LOG_IPC(%d) not handled", op); + STACK_UNWIND_STRICT(ipc, frame, -1, EOPNOTSUPP, NULL); + return 0; +} + +int32_t +posix_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) +{ +#ifdef HAVE_SEEK_HOLE + struct posix_fd *pfd = NULL; + off_t ret = -1; + int err = 0; + int whence = 0; + struct iatt preop = { + 0, + }; + dict_t *rsp_xdata = NULL; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + switch (what) { + case GF_SEEK_DATA: + whence = SEEK_DATA; + break; + case GF_SEEK_HOLE: + whence = SEEK_HOLE; + break; + default: + err = ENOTSUP; + gf_msg(this->name, GF_LOG_ERROR, ENOTSUP, P_MSG_SEEK_UNKOWN, + "don't know what to seek"); + goto out; + } + + ret = posix_fd_ctx_get(fd, this, &pfd, &err); + if (ret < 0) { + gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd); + goto out; + } + + if (xdata) { + ret = posix_fdstat(this, fd->inode, pfd->fd, &preop); + if (ret == -1) { + ret = -errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + goto out; + } + + ret = posix_cs_maintenance(this, fd, NULL, &pfd->fd, &preop, NULL, + xdata, &rsp_xdata, _gf_false); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + ret = -EIO; + goto out; + } + } + + ret = sys_lseek(pfd->fd, offset, whence); + if (ret == -1) { + err = errno; + gf_msg(this->name, fop_log_level(GF_FOP_SEEK, err), err, + P_MSG_SEEK_FAILED, "seek failed on fd %d length %" PRId64, + pfd->fd, offset); + goto out; + } + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(seek, frame, (ret == -1 ? -1 : 0), err, + (ret == -1 ? -1 : ret), rsp_xdata); +#else + STACK_UNWIND_STRICT(seek, frame, -1, EINVAL, 0, NULL); +#endif + return 0; +} + +int32_t +posix_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) +{ + char *real_path = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + DIR *dir = NULL; + struct posix_fd *pfd = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + VALIDATE_OR_GOTO(fd, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + if (!real_path) { + op_errno = ESTALE; + goto out; + } + + op_ret = -1; + dir = sys_opendir(real_path); + + if (dir == NULL) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_OPENDIR_FAILED, + "opendir failed on gfid-handle: %s (path: %s)", real_path, + loc->path); + goto out; + } + + op_ret = dirfd(dir); + if (op_ret < 0) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DIRFD_FAILED, + "dirfd() failed (path: %s, gfid-handle: %s", loc->path, + real_path); + goto out; + } + + pfd = GF_CALLOC(1, sizeof(*pfd), gf_posix_mt_posix_fd); + if (!pfd) { + op_errno = errno; + goto out; + } + + pfd->dir = dir; + pfd->dir_eof = -1; + pfd->fd = op_ret; + + op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd); + if (op_ret) + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, + "failed to set the fd" + "context path=%s " + "gfid-handle= %s,fd=%p", + loc->path, real_path, fd); + + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, NULL); + + op_ret = 0; + +out: + if (op_ret == -1) { + if (dir) { + (void)sys_closedir(dir); + dir = NULL; + } + if (pfd) { + GF_FREE(pfd); + pfd = NULL; + } + } + + SET_TO_OLD_FS_ID(); + STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, NULL); + return 0; +} + +static void +posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd) +{ + glusterfs_ctx_t *ctx = this->ctx; + struct posix_private *priv = this->private; + + pfd->xl = this; + pthread_mutex_lock(&ctx->fd_lock); + { + list_add_tail(&pfd->list, &ctx->janitor_fds); + priv->rel_fdcount++; + pthread_cond_signal(&ctx->fd_cond); + } + pthread_mutex_unlock(&ctx->fd_lock); +} + +int32_t +posix_releasedir(xlator_t *this, fd_t *fd) +{ + struct posix_fd *pfd = NULL; + uint64_t tmp_pfd = 0; + int ret = 0; + + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + ret = fd_ctx_del(fd, this, &tmp_pfd); + if (ret < 0) { + gf_msg_debug(this->name, 0, "pfd from fd=%p is NULL", fd); + goto out; + } + + pfd = (struct posix_fd *)(long)tmp_pfd; + if (!pfd->dir) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, + "pfd->dir is NULL for fd=%p", fd); + goto out; + } + posix_add_fd_to_cleanup(this, pfd); + +out: + return 0; +} + +int32_t +posix_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, + dict_t *xdata) +{ + char *dest = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + char *real_path = NULL; + struct iatt stbuf = { + 0, + }; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(loc, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + + dest = alloca(size + 1); + + MAKE_INODE_HANDLE(real_path, this, loc, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat on %s failed", loc->path ? loc->path : "<null>"); + goto out; + } + + op_ret = sys_readlink(real_path, dest, size); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READYLINK_FAILED, + "readlink on gfid-handle: %s (path: %s) failed", real_path, + loc->path); + goto out; + } + + dest[op_ret] = 0; +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, dest, &stbuf, NULL); + + return 0; +} + +int32_t +posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = 0; + struct posix_private *priv = NULL; + struct iatt prebuf = { + 0, + }; + struct iatt postbuf = { + 0, + }; + dict_t *rsp_xdata = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + + MAKE_INODE_HANDLE(real_path, this, loc, &prebuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on (path: %s gfid-handle: %s) " + "failed", + loc->path, real_path ? real_path : "<null>"); + goto out; + } + + if (xdata) { + op_ret = posix_cs_maintenance(this, NULL, loc, NULL, &prebuf, real_path, + xdata, &rsp_xdata, _gf_false); + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, path %s", loc->path); + op_errno = EIO; + goto out; + } + } + + posix_update_iatt_buf(&prebuf, -1, real_path, xdata); + op_ret = sys_truncate(real_path, offset); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED, + "truncate on gfid-handle: %s (path: %s) failed", real_path, + loc->path); + goto out; + } + + op_ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &postbuf, + _gf_false); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat on gfid-handle %s (path: %s) failed", real_path, + loc->path); + goto out; + } + + posix_set_ctime(frame, this, real_path, -1, loc->inode, &postbuf); + + op_ret = 0; +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, &prebuf, &postbuf, + NULL); + + return 0; +} + +int32_t +posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + int32_t _fd = -1; + struct posix_fd *pfd = NULL; + struct posix_private *priv = NULL; + struct iatt preop = { + 0, + }; + dict_t *rsp_xdata = NULL; + struct iatt stbuf = { + 0, + }; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(this->private, out); + VALIDATE_OR_GOTO(loc, out); + VALIDATE_OR_GOTO(fd, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + + if (loc->inode && ((loc->inode->ia_type == IA_IFBLK) || + (loc->inode->ia_type == IA_IFCHR))) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT, + "open received on a block/char file (%s)", + uuid_utoa(loc->inode->gfid)); + op_errno = EINVAL; + goto out; + } + + if (flags & O_CREAT) + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + MAKE_INODE_HANDLE(real_path, this, loc, &stbuf); + if (!real_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + if (IA_ISLNK(stbuf.ia_type)) { + op_ret = -1; + op_errno = ELOOP; + goto out; + } + + op_ret = -1; + SET_FS_ID(frame->root->uid, frame->root->gid); + + if (priv->o_direct) + flags |= O_DIRECT; + + _fd = sys_open(real_path, flags, priv->force_create_mode); + if (_fd == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FILE_OP_FAILED, + "open on gfid-handle %s (path: %s), flags: %d", real_path, + loc->path, flags); + goto out; + } + + posix_set_ctime(frame, this, real_path, -1, loc->inode, &stbuf); + + pfd = GF_CALLOC(1, sizeof(*pfd), gf_posix_mt_posix_fd); + if (!pfd) { + op_errno = errno; + goto out; + } + + pfd->flags = flags; + pfd->fd = _fd; + + if (xdata) { + op_ret = posix_fdstat(this, fd->inode, pfd->fd, &preop); + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + GF_FREE(pfd); + goto out; + } + + posix_cs_maintenance(this, fd, NULL, &pfd->fd, &preop, NULL, xdata, + &rsp_xdata, _gf_true); + } + + op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd); + if (op_ret) + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, + "failed to set the fd context gfid-handle=%s path=%s fd=%p", + real_path, loc->path, fd); + + op_ret = 0; + +out: + if (op_ret == -1) { + if (_fd != -1) { + sys_close(_fd); + } + } + + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, rsp_xdata); + + return 0; +} + +int +posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct posix_private *priv = NULL; + struct iobuf *iobuf = NULL; + struct iobref *iobref = NULL; + struct iovec vec = { + 0, + }; + struct posix_fd *pfd = NULL; + struct iatt stbuf = { + 0, + }; + struct iatt preop = { + 0, + }; + int ret = -1; + dict_t *rsp_xdata = NULL; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + VALIDATE_OR_GOTO(fd->inode, out); + VALIDATE_OR_GOTO(this->private, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + + if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT, + "readv received on a block/char file (%s)", + uuid_utoa(fd->inode->gfid)); + op_errno = EINVAL; + goto out; + } + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + if (!size) { + op_errno = EINVAL; + gf_msg(this->name, GF_LOG_WARNING, EINVAL, P_MSG_INVALID_ARGUMENT, + "size=%" GF_PRI_SIZET, size); + goto out; + } + + iobuf = iobuf_get_page_aligned(this->ctx->iobuf_pool, size, ALIGN_SIZE); + if (!iobuf) { + op_errno = ENOMEM; + goto out; + } + + _fd = pfd->fd; + + if (xdata) { + op_ret = posix_fdstat(this, fd->inode, _fd, &preop); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + goto out; + } + op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata, + &rsp_xdata, _gf_false); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + op_errno = EIO; + goto out; + } + } + + posix_update_iatt_buf(&preop, _fd, NULL, xdata); + op_ret = sys_pread(_fd, iobuf->ptr, size, offset); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READ_FAILED, + "read failed on gfid=%s, " + "fd=%p, offset=%" PRIu64 " size=%" GF_PRI_SIZET + ", " + "buf=%p", + uuid_utoa(fd->inode->gfid), fd, offset, size, iobuf->ptr); + goto out; + } + + GF_ATOMIC_ADD(priv->read_value, op_ret); + + vec.iov_base = iobuf->ptr; + vec.iov_len = op_ret; + + iobref = iobref_new(); + + iobref_add(iobref, iobuf); + + /* + * readv successful, and we need to get the stat of the file + * we read from + */ + + op_ret = posix_fdstat(this, fd->inode, _fd, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fstat failed on fd=%p", fd); + goto out; + } + + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &stbuf); + + /* Hack to notify higher layers of EOF. */ + if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size) + op_errno = ENOENT; + + op_ret = vec.iov_len; + +out: + + STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &vec, 1, &stbuf, iobref, + rsp_xdata); + + if (iobref) + iobref_unref(iobref); + if (iobuf) + iobuf_unref(iobuf); + + return 0; +} + +int32_t +__posix_pwritev(int fd, struct iovec *vector, int count, off_t offset) +{ + int32_t op_ret = 0; + int idx = 0; + int retval = 0; + off_t internal_off = 0; + + if (!vector) + return -EFAULT; + + internal_off = offset; + for (idx = 0; idx < count; idx++) { + retval = sys_pwrite(fd, vector[idx].iov_base, vector[idx].iov_len, + internal_off); + if (retval == -1) { + op_ret = -errno; + goto err; + } + op_ret += retval; + internal_off += retval; + } + +err: + return op_ret; +} + +int32_t +__posix_writev(int fd, struct iovec *vector, int count, off_t startoff, + int odirect) +{ + int32_t op_ret = 0; + int idx = 0; + int max_buf_size = 0; + int retval = 0; + char *buf = NULL; + char *alloc_buf = NULL; + off_t internal_off = 0; + + /* Check for the O_DIRECT flag during open() */ + if (!odirect) + return __posix_pwritev(fd, vector, count, startoff); + + for (idx = 0; idx < count; idx++) { + if (max_buf_size < vector[idx].iov_len) + max_buf_size = vector[idx].iov_len; + } + + alloc_buf = _page_aligned_alloc(max_buf_size, &buf); + if (!alloc_buf) { + op_ret = -errno; + goto err; + } + + internal_off = startoff; + for (idx = 0; idx < count; idx++) { + memcpy(buf, vector[idx].iov_base, vector[idx].iov_len); + + /* not sure whether writev works on O_DIRECT'd fd */ + retval = sys_pwrite(fd, buf, vector[idx].iov_len, internal_off); + if (retval == -1) { + op_ret = -errno; + goto err; + } + + op_ret += retval; + internal_off += retval; + } + +err: + GF_FREE(alloc_buf); + + return op_ret; +} + +dict_t * +_fill_writev_xdata(fd_t *fd, dict_t *xdata, xlator_t *this, int is_append) +{ + dict_t *rsp_xdata = NULL; + int32_t ret = 0; + inode_t *inode = NULL; + + if (fd) + inode = fd->inode; + + if (!fd || !fd->inode || gf_uuid_is_null(fd->inode->gfid)) { + gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, P_MSG_XATTR_FAILED, + "fd: %p inode: %p" + "gfid:%s", + fd, inode ? inode : 0, + inode ? uuid_utoa(inode->gfid) : "N/A"); + goto out; + } + + if (!xdata) + goto out; + + rsp_xdata = dict_new(); + if (!rsp_xdata) + goto out; + + if (dict_get(xdata, GLUSTERFS_OPEN_FD_COUNT)) { + ret = dict_set_uint32(rsp_xdata, GLUSTERFS_OPEN_FD_COUNT, + fd->inode->fd_count); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for %s", + uuid_utoa(fd->inode->gfid), GLUSTERFS_OPEN_FD_COUNT); + } + } + + if (dict_get(xdata, GLUSTERFS_ACTIVE_FD_COUNT)) { + ret = dict_set_uint32(rsp_xdata, GLUSTERFS_ACTIVE_FD_COUNT, + fd->inode->active_fd_count); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for %s", + uuid_utoa(fd->inode->gfid), GLUSTERFS_ACTIVE_FD_COUNT); + } + } + + if (dict_get(xdata, GLUSTERFS_WRITE_IS_APPEND)) { + ret = dict_set_uint32(rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, is_append); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for %s", + uuid_utoa(fd->inode->gfid), GLUSTERFS_WRITE_IS_APPEND); + } + } +out: + return rsp_xdata; +} + +int32_t +posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, uint32_t flags, + struct iobref *iobref, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct posix_private *priv = NULL; + struct posix_fd *pfd = NULL; + struct iatt preop = { + 0, + }; + struct iatt postop = { + 0, + }; + int ret = -1; + dict_t *rsp_xdata = NULL; + int is_append = 0; + gf_boolean_t locked = _gf_false; + gf_boolean_t write_append = _gf_false; + gf_boolean_t update_atomic = _gf_false; + posix_inode_ctx_t *ctx = NULL; + gf_boolean_t check_space_error = _gf_false; + struct stat statbuf = { + 0, + }; + int totlen = 0; + int idx = 0; + + VALIDATE_OR_GOTO(frame, unwind); + VALIDATE_OR_GOTO(this, unwind); + VALIDATE_OR_GOTO(fd, unwind); + VALIDATE_OR_GOTO(fd->inode, unwind); + VALIDATE_OR_GOTO(vector, unwind); + VALIDATE_OR_GOTO(this->private, unwind); + + priv = this->private; + + VALIDATE_OR_GOTO(priv, unwind); + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + +overwrite: + + check_space_error = _gf_true; + if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT, + "writev received on a block/char file (%s)", + uuid_utoa(fd->inode->gfid)); + op_errno = EINVAL; + goto out; + } + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + _fd = pfd->fd; + + ret = posix_check_internal_writes(this, fd, _fd, xdata); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "possible overwrite from internal client, fd=%p", fd); + op_ret = -1; + op_errno = EBUSY; + goto out; + } + + if (xdata) { + if (dict_get(xdata, GLUSTERFS_WRITE_IS_APPEND)) + write_append = _gf_true; + if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) + update_atomic = _gf_true; + } + + /* The write_is_append check and write must happen + atomically. Else another write can overtake this + write after the check and get written earlier. + + So lock before preop-stat and unlock after write. + */ + + /* + * The update_atomic option is to instruct posix to do prestat, + * write and poststat atomically. This is to prevent any modification to + * ia_size and ia_blocks until poststat and the diff in their values + * between pre and poststat could be of use for some translators (shard + * as of today). + */ + + op_ret = posix_inode_ctx_get_all(fd->inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + + if (write_append || update_atomic) { + locked = _gf_true; + pthread_mutex_lock(&ctx->write_atomic_lock); + } + + op_ret = posix_fdstat(this, fd->inode, _fd, &preop); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + goto out; + } + + if (xdata) { + op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata, + &rsp_xdata, _gf_false); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + op_errno = EIO; + goto out; + } + } + + posix_update_iatt_buf(&preop, _fd, NULL, xdata); + if (locked && write_append) { + if (preop.ia_size == offset || (fd->flags & O_APPEND)) + is_append = 1; + } + + op_ret = __posix_writev(_fd, vector, count, offset, + (pfd->flags & O_DIRECT)); + + if (locked && (!update_atomic)) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_WRITE_FAILED, + "write failed: offset %" PRIu64 ",", offset); + goto out; + } + + rsp_xdata = _fill_writev_xdata(fd, xdata, this, is_append); + /* writev successful, we also need to get the stat of + * the file we wrote to + */ + + ret = posix_fdstat(this, fd->inode, _fd, &postop); + if (ret == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd); + goto out; + } + + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &postop); + + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + if (flags & (O_SYNC | O_DSYNC)) { + ret = sys_fsync(_fd); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_WRITEV_FAILED, + "fsync() in writev on fd %d failed", _fd); + op_ret = -1; + op_errno = errno; + goto out; + } + } + + GF_ATOMIC_ADD(priv->write_value, op_ret); + +out: + + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto unwind; + } + + if (sys_fstat(pfd->fd, &statbuf) < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, + "%d", pfd->fd); + goto unwind; + } + + for (idx = 0; idx < count; idx++) { + totlen = vector[idx].iov_len; + } + + if ((offset + totlen <= statbuf.st_size) && + !(statbuf.st_blocks * statbuf.st_blksize < statbuf.st_size)) { + gf_msg_debug(this->name, 0, + "io vector size will not" + " change disk size so allow overwrite for" + " fd %d", + pfd->fd); + goto overwrite; + } + } + +unwind: + STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, &preop, &postop, + rsp_xdata); + + if (rsp_xdata) + dict_unref(rsp_xdata); + return 0; +} + +int32_t +posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd_in = -1; + int _fd_out = -1; + struct posix_private *priv = NULL; + struct posix_fd *pfd_in = NULL; + struct posix_fd *pfd_out = NULL; + struct iatt preop_dst = { + 0, + }; + struct iatt postop_dst = { + 0, + }; + struct iatt stbuf = { + 0, + }; + int ret = -1; + dict_t *rsp_xdata = NULL; + int is_append = 0; + gf_boolean_t locked = _gf_false; + gf_boolean_t update_atomic = _gf_false; + posix_inode_ctx_t *ctx = NULL; + char in_uuid_str[64] = {0}, out_uuid_str[64] = {0}; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd_in, out); + VALIDATE_OR_GOTO(fd_in->inode, out); + VALIDATE_OR_GOTO(fd_out, out); + VALIDATE_OR_GOTO(fd_out->inode, out); + VALIDATE_OR_GOTO(this->private, out); + + priv = this->private; + + VALIDATE_OR_GOTO(priv, out); + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + if (posix_check_dev_file(this, fd_in->inode, "copy_file_range", &op_errno)) + goto out; + + if (posix_check_dev_file(this, fd_out->inode, "copy_file_range", &op_errno)) + goto out; + + ret = posix_fd_ctx_get(fd_in, this, &pfd_in, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd_in); + goto out; + } + + _fd_in = pfd_in->fd; + + ret = posix_fd_ctx_get(fd_out, this, &pfd_out, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd_out); + goto out; + } + + _fd_out = pfd_out->fd; + + /* + * Currently, the internal write is checked via xdata which + * is set by some xlator above. It could be due to several of + * the reasons such as healing or a snapshot operation happening + * using copy_file_range. As of now (i.e. writing the patch with + * this change) none of the xlators above posix are using the + * internal write with copy_file_range. In future it might + * change. Atleast as of now the hope is that, when that happens + * this functon or fop does not require additional changes for + * handling internal writes. + */ + ret = posix_check_internal_writes(this, fd_out, _fd_out, xdata); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "possible overwrite from internal client, fd=%p", fd_out); + op_ret = -1; + op_errno = EBUSY; + goto out; + } + + if (xdata) { + if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) + update_atomic = _gf_true; + } + + /* + * The update_atomic option is to instruct posix to do prestat, + * write and poststat atomically. This is to prevent any modification to + * ia_size and ia_blocks until poststat and the diff in their values + * between pre and poststat could be of use for some translators. + * This is similar to the atomic write operation. atmoic write is + * (i.e. prestat + write + poststat) used by shard as of now. In case, + * some xlator needs copy_file_range to be atomic from prestat and postat + * prespective (i.e. prestat + copy_file_range + poststat) then it has + * to send "GLUSTERFS_WRITE_UPDATE_ATOMIC" key in xdata. + */ + + op_ret = posix_inode_ctx_get_all(fd_out->inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + + if (update_atomic) { + ret = pthread_mutex_lock(&ctx->write_atomic_lock); + if (!ret) + locked = _gf_true; + else { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MUTEX_FAILED, + "failed to hold write atomic lock on %s", + uuid_utoa(fd_out->inode->gfid)); + goto out; + } + } + + op_ret = posix_fdstat(this, fd_out->inode, _fd_out, &preop_dst); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd_out); + goto out; + } + + /* + * Since, only the destination file (fd_out) is undergoing + * modification, the write related tests are done on that. + * i.e. this is treater similar to as if the destination file + * undergoing write fop from maintenance perspective. + */ + if (xdata) { + op_ret = posix_cs_maintenance(this, fd_out, NULL, &_fd_out, &preop_dst, + NULL, xdata, &rsp_xdata, _gf_false); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd_out); + op_errno = EIO; + goto out; + } + } + + /* + * NOTE: This is just doing a single execution of copy_file_range + * system call. If the returned value of this system call is less + * than len, then should we keep doing it in a for loop until the + * copy_file_range of all the len bytes is done? + * Check the example program provided in the man page of + * copy_file_range. + * If so, then a separate variables for both off_in and off_out + * should be used which are initialized to off_in and off_out + * that this function call receives, but then advanced by the + * value returned by sys_copy_file_range and then use that as + * off_in and off_out for next instance of copy_file_range execution. + */ + op_ret = sys_copy_file_range(_fd_in, &off_in, _fd_out, &off_out, len, + flags); + + if (op_ret < 0) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED, + "copy_file_range failed: fd_in: %p (gfid: %s) ," + " fd_out %p (gfid:%s)", + fd_in, uuid_utoa_r(fd_in->inode->gfid, in_uuid_str), fd_out, + uuid_utoa_r(fd_out->inode->gfid, out_uuid_str)); + goto out; + } + + /* + * Let this be as it is for now. This function collects + * infomration such as open fd count etc. So, even though + * is_append does not apply to copy_file_range, for now, + * allowing it to be recorded in the dict as _gf_false. + */ + rsp_xdata = _fill_writev_xdata(fd_out, xdata, this, is_append); + + /* copy_file_range successful, we also need to get the stat of + * the file we wrote to (i.e. destination file or fd_out). + */ + ret = posix_fdstat(this, fd_out->inode, _fd_out, &postop_dst); + if (ret == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd_out); + goto out; + } + + /* + * Also perform the stat on the source fd (i.e. fd_in). For now, + * allowing it to be done within the locked region if the request + * is for atomic operation (and update) of copy_file_range. + */ + ret = posix_fdstat(this, fd_in->inode, _fd_in, &stbuf); + if (ret == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd_in); + goto out; + } + + /* + * The core logic of what time attributes are to be updated + * on a fop is decided at client side xlator utime. + * All the remaining fops call posix_set_ctime function + * to update the {a,m,c}time. But, for all the other fops, + * the operation is happening on only one file (or inode). + * But here, there are 2 fds (source and destination). Hence + * the new function below to update the appropriate times for + * both the source and the destination file. + * For the source file, if at all anything has to be updated, + * it would be atime (as that file is only read, not updated). + * For the destination file, the attributes that require the + * modification would be mtime and ctime. + * What times have to be changed is actually determined by + * utime xlator. But, all of them would be in frame->root->flags. + * So, currently posix assumes that, the atime flag is for + * the source file and the other 2 flags are for the destination + * file. Since, the assumption is rigid (i.e. atime for source + * and {m,c}time for destination), the below function is called + * posix_set_ctime_cfr (cfr standing for copy_file_range). + * FUTURE TODO: + * In future, some other functionality or fop might operate + * simultaneously on 2 files. Then, depending upon what that new + * fop does or what are its requirements, the below function might + * require changes to become generic for consumption in case of + * simultaneous operations on 2 files. + */ + posix_set_ctime_cfr(frame, this, NULL, pfd_in->fd, fd_in->inode, &stbuf, + NULL, pfd_out->fd, fd_out->inode, &postop_dst); + + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + /* + * Record copy_file_range in priv->write_value for now. + * If not needed, remove below section of code along with + * this comment (or add comment to explain why it is not + * needed). + */ + GF_ATOMIC_ADD(priv->write_value, op_ret); + +out: + + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, &stbuf, + &preop_dst, &postop_dst, rsp_xdata); + + if (rsp_xdata) + dict_unref(rsp_xdata); + return 0; +} + +int32_t +posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + char *real_path = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + struct statvfs buf = { + 0, + }; + struct posix_private *priv = NULL; + int shared_by = 1; + double percent = 0; + uint64_t reserved_blocks = 0; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + VALIDATE_OR_GOTO(this->private, out); + + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + if (!real_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + priv = this->private; + + op_ret = sys_statvfs(real_path, &buf); + + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED, + "statvfs failed on gfid-handle %s (path: %s)", real_path, + loc->path); + goto out; + } + + if (priv->disk_unit == 'p') { + percent = priv->disk_reserve; + reserved_blocks = (((buf.f_blocks * percent) / 100) + 0.5); + } else { + if (buf.f_bsize) { + reserved_blocks = (priv->disk_reserve + buf.f_bsize - 1) / + buf.f_bsize; + } + } + + if (buf.f_bfree > reserved_blocks) { + buf.f_bfree = (buf.f_bfree - reserved_blocks); + if (buf.f_bavail > buf.f_bfree) { + buf.f_bavail = buf.f_bfree; + } + } else { + buf.f_bfree = 0; + buf.f_bavail = 0; + } + + shared_by = priv->shared_brick_count; + if (shared_by > 1) { + buf.f_blocks /= shared_by; + buf.f_bfree /= shared_by; + buf.f_bavail /= shared_by; + buf.f_files /= shared_by; + buf.f_ffree /= shared_by; + buf.f_favail /= shared_by; + } + + if (!priv->export_statfs) { + buf.f_blocks = 0; + buf.f_bfree = 0; + buf.f_bavail = 0; + buf.f_files = 0; + buf.f_ffree = 0; + buf.f_favail = 0; + } + + op_ret = 0; + +out: + STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, &buf, NULL); + return 0; +} + +int32_t +posix_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + struct posix_fd *pfd = NULL; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL on fd=%p", fd); + goto out; + } + + op_ret = 0; + +out: + STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, NULL); + + return 0; +} + +int32_t +posix_release(xlator_t *this, fd_t *fd) +{ + struct posix_fd *pfd = NULL; + int ret = -1; + uint64_t tmp_pfd = 0; + + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + ret = fd_ctx_del(fd, this, &tmp_pfd); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + pfd = (struct posix_fd *)(long)tmp_pfd; + if (pfd->dir) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DIR_NOT_NULL, + "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd); + } + + posix_add_fd_to_cleanup(this, pfd); + +out: + return 0; +} + +int +posix_batch_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + struct posix_private *priv = NULL; + + priv = this->private; + + stub = fop_fsync_stub(frame, default_fsync, fd, datasync, xdata); + if (!stub) { + STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, 0, 0, 0); + return 0; + } + + pthread_mutex_lock(&priv->fsync_mutex); + { + list_add_tail(&stub->list, &priv->fsyncs); + priv->fsync_queue_count++; + pthread_cond_signal(&priv->fsync_cond); + } + pthread_mutex_unlock(&priv->fsync_mutex); + + return 0; +} + +int32_t +posix_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct posix_fd *pfd = NULL; + int ret = -1; + struct iatt preop = { + 0, + }; + struct iatt postop = { + 0, + }; + struct posix_private *priv = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + +#ifdef GF_DARWIN_HOST_OS + /* Always return success in case of fsync in MAC OS X */ + op_ret = 0; + goto out; +#endif + + priv = this->private; + + if (priv->batch_fsync_mode && xdata && dict_get(xdata, "batch-fsync")) { + posix_batch_fsync(frame, this, fd, datasync, xdata); + return 0; + } + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd not found in fd's ctx"); + goto out; + } + + _fd = pfd->fd; + + op_ret = posix_fdstat(this, fd->inode, _fd, &preop); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + goto out; + } + + if (datasync) { + op_ret = sys_fdatasync(_fd); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSYNC_FAILED, + "fdatasync on fd=%p" + "failed:", + fd); + goto out; + } + } else { + op_ret = sys_fsync(_fd); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSYNC_FAILED, + "fsync on fd=%p " + "failed", + fd); + goto out; + } + } + + op_ret = posix_fdstat(this, fd->inode, _fd, &postop); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd); + goto out; + } + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, &preop, &postop, NULL); + + return 0; +} + +static int gf_posix_xattr_enotsup_log; +static int +_handle_setxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) +{ + posix_xattr_filler_t *filler = NULL; + + filler = tmp; + + return posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v, + filler->flags, filler->stbuf); +} + +#ifdef GF_DARWIN_HOST_OS +static int +map_xattr_flags(int flags) +{ + /* DARWIN has different defines on XATTR_ flags. + There do not seem to be a POSIX standard + Parse any other flags over. + */ + int darwinflags = flags & + ~(GF_XATTR_CREATE | GF_XATTR_REPLACE | XATTR_REPLACE); + if (GF_XATTR_CREATE & flags) + darwinflags |= XATTR_CREATE; + if (GF_XATTR_REPLACE & flags) + darwinflags |= XATTR_REPLACE; + return darwinflags; +} +#endif + +int32_t +posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + char *acl_xattr = NULL; + struct iatt preop = {0}; + struct iatt postop = {0}; + int32_t ret = 0; + ssize_t acl_size = 0; + dict_t *xattr = NULL; + dict_t *subvol_xattrs = NULL; + posix_xattr_filler_t filler = { + 0, + }; + struct posix_private *priv = NULL; + struct iatt tmp_stbuf = { + 0, + }; + data_t *tdata = NULL; + char *cs_var = NULL; + gf_cs_obj_state state = -1; + int i = 0; + int len; + struct mdata_iatt mdata_iatt = { + 0, + }; + int8_t sync_backend_xattrs = _gf_false; + data_pair_t *custom_xattrs; + data_t *keyval = NULL; + char **xattrs_to_heal = get_xattrs_to_heal(); + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(this->private, out); + VALIDATE_OR_GOTO(loc, out); + VALIDATE_OR_GOTO(dict, out); + + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + if (!real_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + ret = dict_get_mdata(dict, CTIME_MDATA_XDATA_KEY, &mdata_iatt); + if (ret == 0) { + /* This is initiated by lookup when ctime feature is enabled to create + * "trusted.glusterfs.mdata" xattr if not present. These are the files + * which were created when ctime feature is disabled. + */ + ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path, + &mdata_iatt, &op_errno); + if (ret != 0) { + op_ret = -1; + } + goto out; + } + + posix_pstat(this, loc->inode, loc->gfid, real_path, &preop, _gf_false); + + op_ret = -1; + + dict_del(dict, GFID_XATTR_KEY); + dict_del(dict, GF_XATTR_VOL_ID_KEY); + /* the io-stats-dump key should not reach disk */ + dict_del(dict, GF_XATTR_IOSTATS_DUMP_KEY); + + tdata = dict_get(dict, GF_CS_OBJECT_UPLOAD_COMPLETE); + if (tdata) { + /*TODO: move the following to a different function */ + LOCK(&loc->inode->lock); + { + state = posix_cs_check_status(this, real_path, NULL, &preop); + if (state != GF_CS_LOCAL) { + op_errno = EINVAL; + ret = posix_cs_set_state(this, &xattr, state, real_path, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "set state failed"); + } + goto unlock; + } + + ret = posix_pstat(this, loc->inode, loc->gfid, real_path, + &tmp_stbuf, _gf_true); + if (ret) { + op_errno = EINVAL; + goto unlock; + } + + cs_var = alloca(4096); + sprintf(cs_var, "%" PRId64, tmp_stbuf.ia_mtime); + + /*TODO: may be should consider nano-second also */ + if (strncmp(cs_var, tdata->data, tdata->len) > 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "mtime " + "passed is different from seen by file now." + " Will skip truncating the file"); + ret = -1; + op_errno = EINVAL; + goto unlock; + } + + len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_size); + + ret = sys_lsetxattr(real_path, GF_CS_OBJECT_SIZE, cs_var, len, + flags); + if (ret) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "setxattr failed. key %s err %d", GF_CS_OBJECT_SIZE, + ret); + goto unlock; + } + + len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_blocks); + + ret = sys_lsetxattr(real_path, GF_CS_NUM_BLOCKS, cs_var, len, + flags); + if (ret) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "setxattr failed. key %s err %d", GF_CS_NUM_BLOCKS, ret); + goto unlock; + } + + len = sprintf(cs_var, "%" PRIu32, tmp_stbuf.ia_blksize); + + ret = sys_lsetxattr(real_path, GF_CS_BLOCK_SIZE, cs_var, len, + flags); + if (ret) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "setxattr failed. key %s err %d", GF_CS_BLOCK_SIZE, ret); + goto unlock; + } + + memset(cs_var, 0, 4096); + if (loc->path[0] == '/') { + for (i = 1; i < strlen(loc->path); i++) { + cs_var[i - 1] = loc->path[i]; + } + + cs_var[i] = '\0'; + gf_msg_debug(this->name, GF_LOG_ERROR, "remotepath %s", cs_var); + } + + ret = sys_lsetxattr(real_path, GF_CS_OBJECT_REMOTE, cs_var, + strlen(cs_var), flags); + if (ret) { + op_errno = errno; + gf_log("POSIX", GF_LOG_ERROR, + "setxattr failed - %s" + " %d", + GF_CS_OBJECT_SIZE, ret); + goto unlock; + } + + ret = sys_truncate(real_path, 0); + if (ret) { + op_errno = errno; + gf_log("POSIX", GF_LOG_ERROR, + "truncate failed - %s" + " %d", + GF_CS_OBJECT_SIZE, ret); + ret = sys_lremovexattr(real_path, GF_CS_OBJECT_REMOTE); + if (ret) { + op_errno = errno; + gf_log("POSIX", GF_LOG_ERROR, + "removexattr " + "failed post processing- %s" + " %d", + GF_CS_OBJECT_SIZE, ret); + } + goto unlock; + } else { + state = GF_CS_REMOTE; + ret = posix_cs_set_state(this, &xattr, state, real_path, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, "set state failed"); + } + } + } + unlock: + UNLOCK(&loc->inode->lock); + op_ret = ret; + goto out; + } + + filler.real_path = real_path; + filler.this = this; + filler.stbuf = &preop; + filler.loc = loc; + +#ifdef GF_DARWIN_HOST_OS + filler.flags = map_xattr_flags(flags); +#else + filler.flags = flags; +#endif + op_ret = dict_foreach(dict, _handle_setxattr_keyvalue_pair, &filler); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + goto out; + } + + ret = dict_get_int8(xdata, "sync_backend_xattrs", &sync_backend_xattrs); + if (ret) { + gf_msg_debug(this->name, -ret, "Unable to get sync_backend_xattrs"); + } + + if (sync_backend_xattrs) { + /* List all custom xattrs */ + subvol_xattrs = dict_new(); + if (!subvol_xattrs) + goto out; + + ret = dict_set_int32_sizen(xdata, "list-xattr", 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, + "Unable to set list-xattr in dict "); + goto out; + } + + subvol_xattrs = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, + NULL); + + /* Remove all user xattrs from the file */ + dict_foreach_fnmatch(subvol_xattrs, "user.*", posix_delete_user_xattr, + real_path); + + /* Remove all custom xattrs from the file */ + for (i = 1; xattrs_to_heal[i]; i++) { + keyval = dict_get(subvol_xattrs, xattrs_to_heal[i]); + if (keyval) { + ret = sys_lremovexattr(real_path, xattrs_to_heal[i]); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, + errno, "removexattr failed. key %s path %s", + xattrs_to_heal[i], loc->path); + goto out; + } + + dict_del(subvol_xattrs, xattrs_to_heal[i]); + keyval = NULL; + } + } + + /* Set custom xattrs based on info provided by DHT */ + custom_xattrs = dict->members_list; + + while (custom_xattrs != NULL) { + ret = sys_lsetxattr(real_path, custom_xattrs->key, + custom_xattrs->value->data, + custom_xattrs->value->len, flags); + if (ret) { + op_errno = errno; + gf_log(this->name, GF_LOG_ERROR, "setxattr failed - %s %d", + custom_xattrs->key, ret); + goto out; + } + + custom_xattrs = custom_xattrs->next; + } + } + + xattr = dict_new(); + if (!xattr) + goto out; + + /* + * FIXFIX: Send the stbuf info in the xdata for now + * This is used by DHT to redirect FOPs if the file is being migrated + * Ignore errors for now + */ + ret = posix_pstat(this, loc->inode, loc->gfid, real_path, &postop, + _gf_false); + if (ret) + goto out; + + ret = posix_set_iatt_in_dict(xattr, &preop, &postop); + + /* + * ACL can be set on a file/folder using GF_POSIX_ACL_*_KEY xattrs which + * won't aware of access-control xlator. To update its context correctly, + * POSIX_ACL_*_XATTR stored in xdata which is send in the call_back path. + */ + if (dict_get(dict, GF_POSIX_ACL_ACCESS)) { + /* + * The size of buffer will be know after calling sys_lgetxattr, + * so first we allocate buffer with large size(~4k), then we + * reduced into required size using GF_REALLO(). + */ + acl_xattr = GF_CALLOC(1, ACL_BUFFER_MAX, gf_posix_mt_char); + if (!acl_xattr) + goto out; + + acl_size = sys_lgetxattr(real_path, POSIX_ACL_ACCESS_XATTR, acl_xattr, + ACL_BUFFER_MAX); + + if (acl_size < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED, + "Posix acl is not set " + "properly at the backend"); + goto out; + } + + /* If acl_size is more than max buffer size, just ignore it */ + if (acl_size >= ACL_BUFFER_MAX) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, P_MSG_BUFFER_OVERFLOW, + "size of acl is more" + "than the buffer"); + goto out; + } + + acl_xattr = GF_REALLOC(acl_xattr, acl_size); + if (!acl_xattr) + goto out; + + ret = dict_set_bin(xattr, POSIX_ACL_ACCESS_XATTR, acl_xattr, acl_size); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, + "failed to set" + "xdata for acl"); + GF_FREE(acl_xattr); + goto out; + } + } + + if (dict_get(dict, GF_POSIX_ACL_DEFAULT)) { + acl_xattr = GF_CALLOC(1, ACL_BUFFER_MAX, gf_posix_mt_char); + if (!acl_xattr) + goto out; + + acl_size = sys_lgetxattr(real_path, POSIX_ACL_DEFAULT_XATTR, acl_xattr, + ACL_BUFFER_MAX); + + if (acl_size < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED, + "Posix acl is not set " + "properly at the backend"); + goto out; + } + + if (acl_size >= ACL_BUFFER_MAX) { + gf_msg(this->name, GF_LOG_WARNING, ENOMEM, P_MSG_BUFFER_OVERFLOW, + "size of acl is more" + "than the buffer"); + goto out; + } + + acl_xattr = GF_REALLOC(acl_xattr, acl_size); + if (!acl_xattr) + goto out; + + ret = dict_set_bin(xattr, POSIX_ACL_DEFAULT_XATTR, acl_xattr, acl_size); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, + "failed to set" + "xdata for acl"); + GF_FREE(acl_xattr); + goto out; + } + } + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xattr); + + if (xattr) + dict_unref(xattr); + + if (subvol_xattrs) + dict_unref(subvol_xattrs); + + return 0; +} + +int +posix_xattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *key, dict_t *dict, dict_t *xdata) +{ + int ret = -1; + int op_ret = -1; + const char *fname = NULL; + char *real_path = NULL; + char *found = NULL; + DIR *fd = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + if (!real_path) { + return -ESTALE; + } + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "posix_xattr_get_real_filename (lstat) on " + "gfid-handle %s (path: %s) failed", + real_path, loc->path); + return -errno; + } + + fd = sys_opendir(real_path); + if (!fd) + return -errno; + + fname = key + SLEN(GF_XATTR_GET_REAL_FILENAME_KEY); + + for (;;) { + errno = 0; + entry = sys_readdir(fd, scratch); + if (!entry || errno != 0) + break; + + if (strcasecmp(entry->d_name, fname) == 0) { + found = gf_strdup(entry->d_name); + if (!found) { + (void)sys_closedir(fd); + return -ENOMEM; + } + break; + } + } + + (void)sys_closedir(fd); + + if (!found) + return -ENOATTR; + + ret = dict_set_dynstr(dict, (char *)key, found); + if (ret) { + GF_FREE(found); + return -ENOMEM; + } + ret = strlen(found) + 1; + + return ret; +} + +int +posix_get_ancestry_directory(xlator_t *this, inode_t *leaf_inode, + gf_dirent_t *head, char **path, int type, + int32_t *op_errno, dict_t *xdata) +{ + ssize_t handle_size = 0; + struct posix_private *priv = NULL; + inode_t *inode = NULL; + int ret = -1; + char dirpath[PATH_MAX] = { + 0, + }; + + priv = this->private; + + handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); + + ret = posix_make_ancestryfromgfid( + this, dirpath, PATH_MAX + 1, head, type | POSIX_ANCESTRY_PATH, + leaf_inode->gfid, handle_size, priv->base_path, leaf_inode->table, + &inode, xdata, op_errno); + if (ret < 0) + goto out; + + /* there is already a reference in loc->inode */ + inode_unref(inode); + + if ((type & POSIX_ANCESTRY_PATH) && (path != NULL)) { + if (strcmp(dirpath, "/")) + dirpath[strlen(dirpath) - 1] = '\0'; + + *path = gf_strdup(dirpath); + } + +out: + return ret; +} + +int32_t +posix_links_in_same_directory(char *dirpath, int count, inode_t *leaf_inode, + inode_t *parent, struct stat *stbuf, + gf_dirent_t *head, char **path, int type, + dict_t *xdata, int32_t *op_errno) +{ + int op_ret = -1; + gf_dirent_t *gf_entry = NULL; + xlator_t *this = NULL; + struct posix_private *priv = NULL; + DIR *dirp = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + char temppath[PATH_MAX] = { + 0, + }; + char scr[PATH_MAX * 4] = { + 0, + }; + + this = THIS; + + priv = this->private; + + dirp = sys_opendir(dirpath); + if (!dirp) { + *op_errno = errno; + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_OPEN_FAILED, + "could not opendir %s", dirpath); + goto out; + } + + while (count > 0) { + errno = 0; + entry = sys_readdir(dirp, scratch); + if (!entry || errno != 0) + break; + + if (entry->d_ino != stbuf->st_ino) + continue; + + /* Linking an inode here, can cause a race in posix_acl. + Parent inode gets linked here, but before + it reaches posix_acl_readdirp_cbk, create/lookup can + come on a leaf-inode, as parent-inode-ctx not yet updated + in posix_acl_readdirp_cbk, create and lookup can fail + with EACCESS. So do the inode linking in the quota xlator + + linked_inode = inode_link (leaf_inode, parent, + entry->d_name, NULL); + + GF_ASSERT (linked_inode == leaf_inode); + inode_unref (linked_inode);*/ + + if (type & POSIX_ANCESTRY_DENTRY) { + loc_t loc = { + 0, + }; + + loc.inode = inode_ref(leaf_inode); + gf_uuid_copy(loc.gfid, leaf_inode->gfid); + + (void)snprintf(temppath, sizeof(temppath), "%s/%s", dirpath, + entry->d_name); + + gf_entry = gf_dirent_for_name(entry->d_name); + if (!gf_entry) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, "gf_entry is NULL"); + op_ret = -1; + *op_errno = ENOMEM; + inode_unref(loc.inode); + goto out; + } + gf_entry->inode = inode_ref(leaf_inode); + gf_entry->dict = posix_xattr_fill(this, temppath, &loc, NULL, -1, + xdata, NULL); + iatt_from_stat(&(gf_entry->d_stat), stbuf); + + list_add_tail(&gf_entry->list, &head->list); + loc_wipe(&loc); + } + + if (type & POSIX_ANCESTRY_PATH) { + (void)snprintf(temppath, sizeof(temppath), "%s/%s", + &dirpath[priv->base_path_length], entry->d_name); + if (!*path) { + *path = gf_strdup(temppath); + } else { + /* creating a colon separated */ + /* list of hard links */ + (void)snprintf(scr, sizeof(scr), "%s:%s", *path, temppath); + + GF_FREE(*path); + *path = gf_strdup(scr); + } + if (!*path) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + } + + count--; + } + + op_ret = 0; +out: + if (dirp) { + op_ret = sys_closedir(dirp); + if (op_ret == -1) { + *op_errno = errno; + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_CLOSE_FAILED, + "closedir failed"); + } + } + + return op_ret; +} + +int +posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode, + gf_dirent_t *head, char **path, int type, + int32_t *op_errno, dict_t *xdata) +{ + size_t remaining_size = 0; + int op_ret = -1, pathlen = -1; + ssize_t handle_size = 0; + uuid_t pgfid = { + 0, + }; + int nlink_samepgfid = 0; + struct stat stbuf = { + 0, + }; + char *list = NULL; + int32_t list_offset = 0; + struct posix_private *priv = NULL; + ssize_t size = 0; + inode_t *parent = NULL; + loc_t *loc = NULL; + char *leaf_path = NULL; + char key[4096] = { + 0, + }; + char dirpath[PATH_MAX] = { + 0, + }; + char pgfidstr[UUID_CANONICAL_FORM_LEN + 1] = { + 0, + }; + int len; + + priv = this->private; + + loc = GF_CALLOC(1, sizeof(*loc), gf_posix_mt_char); + if (loc == NULL) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + + gf_uuid_copy(loc->gfid, leaf_inode->gfid); + + MAKE_INODE_HANDLE(leaf_path, this, loc, NULL); + if (!leaf_path) { + GF_FREE(loc); + *op_errno = ESTALE; + goto out; + } + GF_FREE(loc); + + size = sys_llistxattr(leaf_path, NULL, 0); + if (size == -1) { + *op_errno = errno; + if ((errno == ENOTSUP) || (errno == ENOSYS)) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, + GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting brick" + " with 'user_xattr' flag)"); + + } else { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_XATTR_FAILED, + "listxattr failed on" + "%s", + leaf_path); + } + + goto out; + } + + if (size == 0) { + op_ret = 0; + goto out; + } + + list = alloca(size); + if (!list) { + *op_errno = errno; + goto out; + } + + size = sys_llistxattr(leaf_path, list, size); + if (size < 0) { + op_ret = -1; + *op_errno = errno; + goto out; + } + remaining_size = size; + list_offset = 0; + + op_ret = sys_lstat(leaf_path, &stbuf); + if (op_ret == -1) { + *op_errno = errno; + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED, + "lstat failed on %s", leaf_path); + goto out; + } + + while (remaining_size > 0) { + len = snprintf(key, sizeof(key), "%s", list + list_offset); + if (strncmp(key, PGFID_XATTR_KEY_PREFIX, + SLEN(PGFID_XATTR_KEY_PREFIX)) != 0) + goto next; + + op_ret = sys_lgetxattr(leaf_path, key, &nlink_samepgfid, + sizeof(nlink_samepgfid)); + if (op_ret == -1) { + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "getxattr failed on " + "%s: key = %s ", + leaf_path, key); + goto out; + } + + nlink_samepgfid = ntoh32(nlink_samepgfid); + + snprintf(pgfidstr, sizeof(pgfidstr), "%s", + key + SLEN(PGFID_XATTR_KEY_PREFIX)); + gf_uuid_parse(pgfidstr, pgfid); + + handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); + + /* constructing the absolute real path of parent dir */ + snprintf(dirpath, sizeof(dirpath), "%s", priv->base_path); + pathlen = PATH_MAX + 1 - priv->base_path_length; + + op_ret = posix_make_ancestryfromgfid( + this, dirpath + priv->base_path_length, pathlen, head, + type | POSIX_ANCESTRY_PATH, pgfid, handle_size, priv->base_path, + leaf_inode->table, &parent, xdata, op_errno); + if (op_ret < 0) { + goto next; + } + + dirpath[strlen(dirpath) - 1] = '\0'; + + posix_links_in_same_directory(dirpath, nlink_samepgfid, leaf_inode, + parent, &stbuf, head, path, type, xdata, + op_errno); + + if (parent != NULL) { + inode_unref(parent); + parent = NULL; + } + + next: + remaining_size -= (len + 1); + list_offset += (len + 1); + } /* while (remaining_size > 0) */ + + op_ret = 0; + +out: + return op_ret; +} + +int +posix_get_ancestry(xlator_t *this, inode_t *leaf_inode, gf_dirent_t *head, + char **path, int type, int32_t *op_errno, dict_t *xdata) +{ + int ret = -1; + struct posix_private *priv = NULL; + + priv = this->private; + + if (IA_ISDIR(leaf_inode->ia_type)) { + ret = posix_get_ancestry_directory(this, leaf_inode, head, path, type, + op_errno, xdata); + } else { + if (!priv->update_pgfid_nlinks) + goto out; + ret = posix_get_ancestry_non_directory(this, leaf_inode, head, path, + type, op_errno, xdata); + } + +out: + if (ret && path && *path) { + GF_FREE(*path); + *path = NULL; + } + + return ret; +} + +/** + * posix_getxattr - this function returns a dictionary with all the + * key:value pair present as xattr. used for + * both 'listxattr' and 'getxattr'. + */ +int32_t +posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + struct posix_private *priv = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + char *value = NULL; + char *real_path = NULL; + dict_t *dict = NULL; + int ret = -1; + char *path = NULL; + char *rpath = NULL; + ssize_t size = 0; + char *list = NULL; + int32_t list_offset = 0; + size_t remaining_size = 0; + char *host_buf = NULL; + char *keybuffer = NULL; + int keybuff_len; + char *value_buf = NULL; + gf_boolean_t have_val = _gf_false; + struct iatt buf = { + 0, + }; + dict_t *xattr_rsp = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + VALIDATE_OR_GOTO(this->private, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + + op_ret = -1; + priv = this->private; + + ret = posix_handle_georep_xattrs(frame, name, &op_errno, _gf_true); + if (ret == -1) { + op_ret = -1; + /* errno should be set from the above function*/ + goto out; + } + + ret = posix_handle_mdata_xattr(frame, name, &op_errno); + if (ret == -1) { + op_ret = -1; + /* errno should be set from the above function*/ + goto out; + } + + if (name && posix_is_gfid2path_xattr(name)) { + op_ret = -1; + op_errno = ENOATTR; + goto out; + } + + dict = dict_new(); + if (!dict) { + op_errno = ENOMEM; + goto out; + } + + if (loc->inode && name && GF_POSIX_ACL_REQUEST(name)) { + ret = posix_pacl_get(real_path, -1, name, &value); + if (ret || !value) { + op_errno = errno; + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_FAILED, + "could not get acl (%s) for" + "gfid-handle %s (path: %s)", + name, real_path, loc->path); + op_ret = -1; + goto out; + } + + ret = dict_set_dynstr(dict, (char *)name, value); + if (ret < 0) { + GF_FREE(value); + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_FAILED, + "could not set acl (%s) for %s " + "(gfid-handle: %s) in dictionary", + name, loc->path, real_path); + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + size = ret; + goto done; + } + + if (loc->inode && name && + (strncmp(name, GF_XATTR_GET_REAL_FILENAME_KEY, + SLEN(GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) { + ret = posix_xattr_get_real_filename(frame, this, loc, name, dict, + xdata); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + if (op_errno == ENOATTR) { + gf_msg_debug(this->name, 0, + "Failed to get " + "real filename (%s, %s)", + loc->path, name); + } else { + gf_msg(this->name, GF_LOG_WARNING, op_errno, + P_MSG_GETTING_FILENAME_FAILED, + "Failed to get real filename (%s, %s):", loc->path, + name); + } + goto out; + } + + size = ret; + goto done; + } + + if (loc->inode && name && !strcmp(name, GLUSTERFS_OPEN_FD_COUNT)) { + if (!fd_list_empty(loc->inode)) { + ret = dict_set_uint32(dict, (char *)name, 1); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "Failed to set " + "dictionary value for %s", + name); + op_errno = ENOMEM; + goto out; + } + } else { + ret = dict_set_uint32(dict, (char *)name, 0); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "Failed to set " + "dictionary value for %s", + name); + op_errno = ENOMEM; + goto out; + } + } + goto done; + } + if (loc->inode && name && (XATTR_IS_PATHINFO(name))) { + VALIDATE_OR_GOTO(this->private, out); + if (LOC_HAS_ABSPATH(loc)) { + MAKE_REAL_PATH(rpath, this, loc->path); + } else { + rpath = real_path; + } + size = gf_asprintf( + &host_buf, "<POSIX(%s):%s:%s>", priv->base_path, + ((priv->node_uuid_pathinfo && !gf_uuid_is_null(priv->glusterd_uuid)) + ? uuid_utoa(priv->glusterd_uuid) + : priv->hostname), + rpath); + if (size < 0) { + op_errno = ENOMEM; + goto out; + } + ret = dict_set_dynstr(dict, (char *)name, host_buf); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "could not set value" + " (%s) in dictionary", + host_buf); + GF_FREE(host_buf); + op_errno = ENOMEM; + goto out; + } + + goto done; + } + + if (loc->inode && name && (strcmp(name, GF_XATTR_NODE_UUID_KEY) == 0) && + !gf_uuid_is_null(priv->glusterd_uuid)) { + size = gf_asprintf(&host_buf, "%s", uuid_utoa(priv->glusterd_uuid)); + if (size == -1) { + op_errno = ENOMEM; + goto out; + } + ret = dict_set_dynstr(dict, GF_XATTR_NODE_UUID_KEY, host_buf); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED, + "could not set value" + "(%s) in dictionary", + host_buf); + GF_FREE(host_buf); + op_errno = -ret; + goto out; + } + goto done; + } + + if (loc->inode && name && (strcmp(name, GFID_TO_PATH_KEY) == 0)) { + ret = inode_path(loc->inode, NULL, &path); + if (ret < 0) { + op_errno = -ret; + gf_msg(this->name, GF_LOG_WARNING, op_errno, + P_MSG_INODE_PATH_GET_FAILED, + "%s: could not get " + "inode path", + uuid_utoa(loc->inode->gfid)); + goto out; + } + + size = ret; + ret = dict_set_dynstr(dict, GFID_TO_PATH_KEY, path); + if (ret < 0) { + op_errno = ENOMEM; + GF_FREE(path); + goto out; + } + goto done; + } + + if (loc->inode && name && (strcmp(name, GFID2PATH_VIRT_XATTR_KEY) == 0)) { + if (!priv->gfid2path) { + op_errno = ENOATTR; + op_ret = -1; + goto out; + } + ret = posix_get_gfid2path(this, loc->inode, real_path, &op_errno, dict); + if (ret < 0) { + op_ret = -1; + goto out; + } + size = ret; + goto done; + } + + if (loc->inode && name && (strcmp(name, GET_ANCESTRY_PATH_KEY) == 0)) { + int type = POSIX_ANCESTRY_PATH; + + op_ret = posix_get_ancestry(this, loc->inode, NULL, &path, type, + &op_errno, xdata); + if (op_ret < 0) { + op_ret = -1; + op_errno = ENODATA; + goto out; + } + size = op_ret; + op_ret = dict_set_dynstr(dict, GET_ANCESTRY_PATH_KEY, path); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -op_ret, + P_MSG_GET_KEY_VALUE_FAILED, + "could not get " + "value for key (%s)", + GET_ANCESTRY_PATH_KEY); + GF_FREE(path); + op_errno = ENOMEM; + goto out; + } + + goto done; + } + + if (loc->inode && name && + (strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE, + SLEN(GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) { + op_ret = posix_get_objectsignature(real_path, dict); + if (op_ret < 0) { + op_errno = -op_ret; + goto out; + } + + goto done; + } + + /* here allocate value_buf of 8192 bytes to avoid one extra getxattr + call,If buffer size is small to hold the xattr result then it will + allocate a new buffer value of required size and call getxattr again + */ + + value_buf = alloca(XATTR_VAL_BUF_SIZE); + if (name) { + char *key = (char *)name; + + keybuffer = key; +#if defined(GF_DARWIN_HOST_OS_DISABLED) + if (priv->xattr_user_namespace == XATTR_STRIP) { + if (strncmp(key, "user.", 5) == 0) { + key += 5; + gf_msg_debug(this->name, 0, + "getxattr for file %s (gfid-handle: %s)" + " stripping user key: %s -> %s", + loc->path, real_path, keybuffer, key); + } + } +#endif + size = sys_lgetxattr(real_path, key, value_buf, XATTR_VAL_BUF_SIZE - 1); + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, + "getxattr failed due to overflow of buffer" + " on gfid-handle %s (path: %s) : %s ", + real_path, loc->path, key); + size = sys_lgetxattr(real_path, key, NULL, 0); + } + if (size == -1) { + op_errno = errno; + if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, + GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting" + " brick with 'user_xattr' " + "flag)"); + } + if ((op_errno == ENOATTR) || (op_errno == ENODATA)) { + gf_msg_debug(this->name, 0, + "No such attribute:%s for file %s (path: %s)", + key, real_path, loc->path); + } else { + gf_msg(this->name, GF_LOG_ERROR, op_errno, + P_MSG_XATTR_FAILED, + "getxattr failed on " + "%s (path: %s): %s ", + real_path, loc->path, key); + } + goto out; + } + } + value = GF_MALLOC(size + 1, gf_posix_mt_char); + if (!value) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + if (have_val) { + memcpy(value, value_buf, size); + } else { + bzero(value, size + 1); + size = sys_lgetxattr(real_path, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "getxattr failed on %s (path: %s): key = %s", real_path, + loc->path, key); + GF_FREE(value); + goto out; + } + } + value[size] = '\0'; + op_ret = dict_set_dynptr(dict, key, value, size); + if (op_ret < 0) { + op_errno = -op_ret; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_DICT_SET_FAILED, + "dict set operation " + "on %s (gfid-handle: %s) for the key %s failed.", + loc->path, real_path, key); + GF_FREE(value); + goto out; + } + + goto done; + } + + have_val = _gf_false; + size = sys_llistxattr(real_path, value_buf, XATTR_VAL_BUF_SIZE - 1); + if (size > 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, + "listxattr failed due to overflow of buffer" + " on %s (path: %s) ", + real_path, loc->path); + size = sys_llistxattr(real_path, NULL, 0); + } + if (size == -1) { + op_errno = errno; + if ((errno == ENOTSUP) || (errno == ENOSYS)) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, + GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting" + " brick with 'user_xattr' " + "flag)"); + } else { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "listxattr failed on %s (path: %s)", real_path, + loc->path); + } + goto out; + } + if (size == 0) + goto done; + } + list = alloca(size); + if (!list) { + op_errno = errno; + goto out; + } + if (have_val) { + memcpy(list, value_buf, size); + } else { + size = sys_llistxattr(real_path, list, size); + if (size < 0) { + op_ret = -1; + op_errno = errno; + goto out; + } + } + remaining_size = size; + list_offset = 0; + keybuffer = alloca(XATTR_KEY_BUF_SIZE); + while (remaining_size > 0) { + keybuff_len = snprintf(keybuffer, XATTR_KEY_BUF_SIZE, "%s", + list + list_offset); + + ret = posix_handle_georep_xattrs(frame, keybuffer, NULL, _gf_false); + if (ret == -1) + goto ignore; + + ret = posix_handle_mdata_xattr(frame, keybuffer, &op_errno); + if (ret == -1) { + goto ignore; + } + + if (posix_is_gfid2path_xattr(keybuffer)) { + goto ignore; + } + + have_val = _gf_false; + size = sys_lgetxattr(real_path, keybuffer, value_buf, + XATTR_VAL_BUF_SIZE - 1); + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, op_errno, P_MSG_XATTR_FAILED, + "getxattr failed due to overflow of" + " buffer on %s (path: %s): %s ", + real_path, loc->path, keybuffer); + size = sys_lgetxattr(real_path, keybuffer, NULL, 0); + } + if (size == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "getxattr failed on" + " %s (path: %s): key = %s ", + real_path, loc->path, keybuffer); + goto out; + } + } + value = GF_MALLOC(size + 1, gf_posix_mt_char); + if (!value) { + op_errno = errno; + goto out; + } + if (have_val) { + memcpy(value, value_buf, size); + } else { + bzero(value, size + 1); + size = sys_lgetxattr(real_path, keybuffer, value, size); + if (size == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "getxattr failed on" + " %s (path: %s): key = %s ", + real_path, loc->path, keybuffer); + GF_FREE(value); + goto out; + } + } + value[size] = '\0'; +#ifdef GF_DARWIN_HOST_OS + /* The protocol expect namespace for now */ + char *newkey = NULL; + gf_add_prefix(XATTR_USER_PREFIX, keybuffer, &newkey); + keybuff_len = snprintf(keybuffer, sizeof(keybuffer), "%s", newkey); + GF_FREE(newkey); +#endif + op_ret = dict_set_dynptr(dict, keybuffer, value, size); + if (op_ret < 0) { + op_errno = -op_ret; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_DICT_SET_FAILED, + "dict set operation " + "on %s (gfid-handle: %s) for the key %s failed.", + loc->path, real_path, keybuffer); + GF_FREE(value); + goto out; + } + + ignore: + remaining_size -= keybuff_len + 1; + list_offset += keybuff_len + 1; + + } /* while (remaining_size > 0) */ + +done: + op_ret = size; + + if (xdata && (op_ret >= 0)) { + xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, + &buf); + } + + if (dict) { + dict_del(dict, GFID_XATTR_KEY); + dict_del(dict, GF_XATTR_VOL_ID_KEY); + } + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xattr_rsp); + + if (xattr_rsp) + dict_unref(xattr_rsp); + + if (dict) { + dict_unref(dict); + } + + return 0; +} + +int32_t +posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + struct posix_fd *pfd = NULL; + int _fd = -1; + int32_t list_offset = 0; + ssize_t size = 0; + size_t remaining_size = 0; + char *value = NULL; + char *list = NULL; + dict_t *dict = NULL; + int ret = -1; + char key[4096] = { + 0, + }; + int key_len; + char *value_buf = NULL; + gf_boolean_t have_val = _gf_false; + struct iatt buf = { + 0, + }; + dict_t *xattr_rsp = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + SET_FS_ID(frame->root->uid, frame->root->gid); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + op_ret = -1; + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + _fd = pfd->fd; + + /* Get the total size */ + dict = dict_new(); + if (!dict) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + if (name && !strcmp(name, GLUSTERFS_OPEN_FD_COUNT)) { + ret = dict_set_uint32(dict, (char *)name, 1); + if (ret < 0) { + op_ret = -1; + size = -1; + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "Failed to set " + "dictionary value for %s", + name); + goto out; + } + goto done; + } + + if (name && strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE, + SLEN(GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0) { + op_ret = posix_fdget_objectsignature(_fd, dict); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "posix_fdget_objectsignature failed"); + op_errno = -op_ret; + op_ret = -1; + size = -1; + goto out; + } + + goto done; + } + + /* here allocate value_buf of 8192 bytes to avoid one extra getxattr + call,If buffer size is small to hold the xattr result then it will + allocate a new buffer value of required size and call getxattr again + */ + value_buf = alloca(XATTR_VAL_BUF_SIZE); + + if (name) { + key_len = snprintf(key, sizeof(key), "%s", name); +#ifdef GF_DARWIN_HOST_OS + struct posix_private *priv = NULL; + priv = this->private; + if (priv->xattr_user_namespace == XATTR_STRIP) { + char *newkey = NULL; + gf_add_prefix(XATTR_USER_PREFIX, key, &newkey); + key_len = snprintf(key, sizeof(key), "%s", newkey); + GF_FREE(newkey); + } +#endif + size = sys_fgetxattr(_fd, key, value_buf, XATTR_VAL_BUF_SIZE - 1); + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, + "fgetxattr failed due to overflow of" + "buffer on %s ", + key); + size = sys_fgetxattr(_fd, key, NULL, 0); + } + if (size == -1) { + op_errno = errno; + if (errno == ENODATA || errno == ENOATTR) { + gf_msg_debug(this->name, 0, + "fgetxattr" + " failed on key %s (%s)", + key, strerror(op_errno)); + } else { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "fgetxattr" + " failed on key %s", + key); + } + goto done; + } + } + value = GF_MALLOC(size + 1, gf_posix_mt_char); + if (!value) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + if (have_val) { + memcpy(value, value_buf, size); + } else { + bzero(value, size + 1); + size = sys_fgetxattr(_fd, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "fgetxattr" + " failed on fd %p for the key %s ", + fd, key); + GF_FREE(value); + goto out; + } + } + + value[size] = '\0'; + op_ret = dict_set_dynptr(dict, key, value, size); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DICT_SET_FAILED, + "dict set operation " + "on key %s failed", + key); + GF_FREE(value); + goto out; + } + + goto done; + } + size = sys_flistxattr(_fd, value_buf, XATTR_VAL_BUF_SIZE - 1); + if (size > 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, + "listxattr failed due to overflow of buffer" + " on %p ", + fd); + size = sys_flistxattr(_fd, NULL, 0); + } + if (size == -1) { + op_ret = -1; + op_errno = errno; + if ((errno == ENOTSUP) || (errno == ENOSYS)) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, + GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting " + "brick with 'user_xattr' flag)"); + } else { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "listxattr failed " + "on %p:", + fd); + } + goto out; + } + if (size == 0) + goto done; + } + list = alloca(size + 1); + if (!list) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + if (have_val) + memcpy(list, value_buf, size); + else + size = sys_flistxattr(_fd, list, size); + + remaining_size = size; + list_offset = 0; + while (remaining_size > 0) { + if (*(list + list_offset) == '\0') + break; + + key_len = snprintf(key, sizeof(key), "%s", list + list_offset); + have_val = _gf_false; + size = sys_fgetxattr(_fd, key, value_buf, XATTR_VAL_BUF_SIZE - 1); + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, + "fgetxattr failed due to overflow of buffer" + " on fd %p: for the key %s ", + fd, key); + size = sys_fgetxattr(_fd, key, NULL, 0); + } + if (size == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "fgetxattr failed " + "on fd %p for the key %s ", + fd, key); + break; + } + } + value = GF_MALLOC(size + 1, gf_posix_mt_char); + if (!value) { + op_ret = -1; + op_errno = errno; + goto out; + } + if (have_val) { + memcpy(value, value_buf, size); + } else { + bzero(value, size + 1); + size = sys_fgetxattr(_fd, key, value, size); + if (size == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "fgetxattr failed o" + "n the fd %p for the key %s ", + fd, key); + GF_FREE(value); + break; + } + } + value[size] = '\0'; + + op_ret = dict_set_dynptr(dict, key, value, size); + if (op_ret) { + op_errno = -op_ret; + op_ret = -1; + gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DICT_SET_FAILED, + "dict set operation " + "failed on key %s", + key); + GF_FREE(value); + goto out; + } + remaining_size -= key_len + 1; + list_offset += key_len + 1; + + } /* while (remaining_size > 0) */ + +done: + op_ret = size; + + if (xdata && (op_ret >= 0)) { + xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, pfd->fd, xdata, + &buf); + } + + if (dict) { + dict_del(dict, GFID_XATTR_KEY); + dict_del(dict, GF_XATTR_VOL_ID_KEY); + } + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xattr_rsp); + + if (xattr_rsp) + dict_unref(xattr_rsp); + + if (dict) + dict_unref(dict); + + return 0; +} + +static int +_handle_fsetxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) +{ + posix_xattr_filler_t *filler = NULL; + + filler = tmp; + + return posix_fhandle_pair(filler->frame, filler->this, filler->fdnum, k, v, + filler->flags, filler->stbuf, filler->fd); +} + +int32_t +posix_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + struct posix_fd *pfd = NULL; + int _fd = -1; + int ret = -1; + struct iatt preop = { + 0, + }; + struct iatt postop = { + 0, + }; + dict_t *xattr = NULL; + posix_xattr_filler_t filler = { + 0, + }; + struct posix_private *priv = NULL; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + VALIDATE_OR_GOTO(dict, out); + + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + _fd = pfd->fd; + + ret = posix_fdstat(this, fd->inode, pfd->fd, &preop); + if (ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_FSTAT_FAILED, + "fsetxattr (fstat)" + "failed on fd=%p", + fd); + goto out; + } + + dict_del(dict, GFID_XATTR_KEY); + dict_del(dict, GF_XATTR_VOL_ID_KEY); + + filler.fdnum = _fd; + filler.this = this; + filler.frame = frame; + filler.stbuf = &preop; + filler.fd = fd; +#ifdef GF_DARWIN_HOST_OS + filler.flags = map_xattr_flags(flags); +#else + filler.flags = flags; +#endif + op_ret = dict_foreach(dict, _handle_fsetxattr_keyvalue_pair, &filler); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + } + + if (!ret && xdata && dict_get(xdata, GLUSTERFS_DURABLE_OP)) { + op_ret = sys_fsync(_fd); + if (op_ret < 0) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_WARNING, errno, + P_MSG_DURABILITY_REQ_NOT_SATISFIED, + "could not satisfy durability request: " + "reason "); + } + } + + ret = posix_fdstat(this, fd->inode, pfd->fd, &postop); + if (ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED, + "fsetxattr (fstat)" + "failed on fd=%p", + fd); + goto out; + } + xattr = dict_new(); + if (!xattr) + goto out; + + ret = posix_set_iatt_in_dict(xattr, &preop, &postop); + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xattr); + + if (xattr) + dict_unref(xattr); + + return 0; +} + +int +_posix_remove_xattr(dict_t *dict, char *key, data_t *value, void *data) +{ + int32_t op_ret = 0; + xlator_t *this = NULL; + posix_xattr_filler_t *filler = NULL; + + filler = (posix_xattr_filler_t *)data; + this = filler->this; +#ifdef GF_DARWIN_HOST_OS + struct posix_private *priv = NULL; + priv = (struct posix_private *)this->private; + char *newkey = NULL; + if (priv->xattr_user_namespace == XATTR_STRIP) { + gf_remove_prefix(XATTR_USER_PREFIX, key, &newkey); + gf_msg_debug("remove_xattr", 0, "key %s => %s", key, newkey); + key = newkey; + } +#endif + /* Bulk remove xattr is internal fop in gluster. Some of the xattrs may + * have special behavior. Ex: removexattr("posix.system_acl_access"), + * removes more than one xattr on the file that could be present in the + * bulk-removal request. Removexattr of these deleted xattrs will fail + * with either ENODATA/ENOATTR. Since all this fop cares is removal of the + * xattrs in bulk-remove request and if they are already deleted, it can be + * treated as success. + */ + + if (filler->real_path) + op_ret = sys_lremovexattr(filler->real_path, key); + else + op_ret = sys_fremovexattr(filler->fdnum, key); + + if (op_ret == -1) { + if (errno == ENODATA || errno == ENOATTR) + op_ret = 0; + } + + if (op_ret == -1) { + filler->op_errno = errno; + if (errno != ENOATTR && errno != ENODATA && errno != EPERM) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "removexattr failed on " + "file/dir %s with gfid: %s (for %s)", + filler->real_path ? filler->real_path : "", + uuid_utoa(filler->inode->gfid), key); + } + } +#ifdef GF_DARWIN_HOST_OS + GF_FREE(newkey); +#endif + return op_ret; +} + +int +posix_common_removexattr(call_frame_t *frame, loc_t *loc, fd_t *fd, + const char *name, dict_t *xdata, int *op_errno, + dict_t **xdata_rsp) +{ + gf_boolean_t bulk_removexattr = _gf_false; + gf_boolean_t disallow = _gf_false; + char *real_path = NULL; + struct posix_fd *pfd = NULL; + int op_ret = 0; + struct iatt preop = { + 0, + }; + struct iatt postop = { + 0, + }; + int ret = 0; + int _fd = -1; + xlator_t *this = frame->this; + inode_t *inode = NULL; + posix_xattr_filler_t filler = {0}; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID(frame->root->uid, frame->root->gid); + + if (loc) { + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + if (!real_path) { + op_ret = -1; + *op_errno = ESTALE; + goto out; + } + inode = loc->inode; + } else { + op_ret = posix_fd_ctx_get(fd, this, &pfd, op_errno); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, *op_errno, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + _fd = pfd->fd; + inode = fd->inode; + } + + if (posix_is_gfid2path_xattr(name)) { + op_ret = -1; + *op_errno = ENOATTR; + goto out; + } + + if (loc) { + ret = posix_pstat(this, inode, loc->gfid, real_path, &preop, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PSTAT_FAILED, + "pstat operaton failed on %s", real_path); + } + } else { + ret = posix_fdstat(this, inode, _fd, &preop); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FDSTAT_FAILED, + "fdstat operaton failed on %s", real_path ? real_path : ""); + } + } + + if (gf_get_index_by_elem(disallow_removexattrs, (char *)name) >= 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED, + "Remove xattr called on %s for file/dir %s with gfid: " + "%s", + name, real_path ? real_path : "", uuid_utoa(inode->gfid)); + op_ret = -1; + *op_errno = EPERM; + goto out; + } else if (posix_is_bulk_removexattr((char *)name, xdata)) { + bulk_removexattr = _gf_true; + (void)dict_has_key_from_array(xdata, disallow_removexattrs, &disallow); + if (disallow) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED, + "Bulk removexattr has keys that shouldn't be " + "removed for file/dir %s with gfid: %s", + real_path ? real_path : "", uuid_utoa(inode->gfid)); + op_ret = -1; + *op_errno = EPERM; + goto out; + } + } + + if (bulk_removexattr) { + filler.real_path = real_path; + filler.this = this; + filler.fdnum = _fd; + filler.inode = inode; + op_ret = dict_foreach(xdata, _posix_remove_xattr, &filler); + if (op_ret) { + *op_errno = filler.op_errno; + goto out; + } + } else { + if (loc) + op_ret = sys_lremovexattr(real_path, name); + else + op_ret = sys_fremovexattr(_fd, name); + if (op_ret == -1) { + *op_errno = errno; + if (*op_errno != ENOATTR && *op_errno != ENODATA && + *op_errno != EPERM) { + gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_XATTR_FAILED, + "removexattr on %s with gfid %s " + "(for %s)", + real_path, uuid_utoa(inode->gfid), name); + } + goto out; + } + } + + if (loc) { + posix_set_ctime(frame, this, real_path, -1, inode, NULL); + ret = posix_pstat(this, inode, loc->gfid, real_path, &postop, + _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PSTAT_FAILED, + "pstat operaton failed on %s", real_path); + } + } else { + posix_set_ctime(frame, this, NULL, _fd, inode, NULL); + ret = posix_fdstat(this, inode, _fd, &postop); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FDSTAT_FAILED, + "fdstat operaton failed on %s", real_path); + } + } + if (ret) + goto out; + *xdata_rsp = dict_new(); + if (!*xdata_rsp) + goto out; + + ret = posix_set_iatt_in_dict(*xdata_rsp, &preop, &postop); + + op_ret = 0; +out: + SET_TO_OLD_FS_ID(); + return op_ret; +} + +int32_t +posix_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + int op_ret = -1; + int op_errno = EINVAL; + dict_t *xdata_rsp = NULL; + + VALIDATE_OR_GOTO(loc, out); + + op_ret = posix_common_removexattr(frame, loc, NULL, name, xdata, &op_errno, + &xdata_rsp); +out: + STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata_rsp); + + if (xdata_rsp) + dict_unref(xdata_rsp); + + return 0; +} + +int32_t +posix_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + dict_t *xdata_rsp = NULL; + + VALIDATE_OR_GOTO(fd, out); + + op_ret = posix_common_removexattr(frame, NULL, fd, name, xdata, &op_errno, + &xdata_rsp); +out: + STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata_rsp); + + if (xdata_rsp) + dict_unref(xdata_rsp); + + return 0; +} + +int32_t +posix_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + struct posix_fd *pfd = NULL; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + goto out; + } + + op_ret = 0; + +out: + STACK_UNWIND_STRICT(fsyncdir, frame, op_ret, op_errno, NULL); + + return 0; +} + +void +posix_print_xattr(dict_t *this, char *key, data_t *value, void *data) +{ + gf_msg_debug("posix", 0, "(key/val) = (%s/%d)", key, data_to_int32(value)); +} + +/** + * add_array - add two arrays of 32-bit numbers (stored in network byte order) + * dest = dest + src + * @count: number of 32-bit numbers + * FIXME: handle overflow + */ + +static void +__add_array(int32_t *dest, int32_t *src, int count) +{ + int i = 0; + int32_t destval = 0; + for (i = 0; i < count; i++) { + destval = ntoh32(dest[i]); + dest[i] = hton32(destval + ntoh32(src[i])); + } +} + +static void +__add_long_array(int64_t *dest, int64_t *src, int count) +{ + int i = 0; + for (i = 0; i < count; i++) { + dest[i] = hton64(ntoh64(dest[i]) + ntoh64(src[i])); + } +} + +/* functions: + __add_array_with_default + __add_long_array_with_default + + xattrop type: + GF_XATTROP_ADD_ARRAY_WITH_DEFAULT + GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT + + These operations are similar to 'GF_XATTROP_ADD_ARRAY', + except that it adds a default value if xattr is missing + or its value is zero on disk. + + One use-case of this operation is in inode-quota. + When a new directory is created, its default dir_count + should be set to 1. So when a xattrop performed setting + inode-xattrs, it should account initial dir_count + 1 if the xattrs are not present + + Here is the usage of this operation + + value required in xdata for each key + struct array { + int32_t newvalue_1; + int32_t newvalue_2; + ... + int32_t newvalue_n; + int32_t default_1; + int32_t default_2; + ... + int32_t default_n; + }; + + or + + struct array { + int32_t value_1; + int32_t value_2; + ... + int32_t value_n; + } data[2]; + fill data[0] with new value to add + fill data[1] with default value + + xattrop GF_XATTROP_ADD_ARRAY_WITH_DEFAULT + for i from 1 to n + { + if (xattr (dest_i) is zero or not set in the disk) + dest_i = newvalue_i + default_i + else + dest_i = dest_i + newvalue_i + } + + value in xdata after xattrop is successful + struct array { + int32_t dest_1; + int32_t dest_2; + ... + int32_t dest_n; + }; +*/ +static void +__add_array_with_default(int32_t *dest, int32_t *src, int count) +{ + int i = 0; + int32_t destval = 0; + + for (i = 0; i < count; i++) { + destval = ntoh32(dest[i]); + if (destval == 0) + dest[i] = hton32(ntoh32(src[i]) + ntoh32(src[count + i])); + else + dest[i] = hton32(destval + ntoh32(src[i])); + } +} + +static void +__add_long_array_with_default(int64_t *dest, int64_t *src, int count) +{ + int i = 0; + int64_t destval = 0; + + for (i = 0; i < count; i++) { + destval = ntoh64(dest[i]); + if (destval == 0) + dest[i] = hton64(ntoh64(src[i]) + ntoh64(src[i + count])); + else + dest[i] = hton64(destval + ntoh64(src[i])); + } +} + +static int +_posix_handle_xattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) +{ + int size = 0; + int count = 0; + int op_ret = 0; + int op_errno = 0; + gf_xattrop_flags_t optype = 0; + char *array = NULL; + char *dst_data = NULL; + inode_t *inode = NULL; + xlator_t *this = NULL; + posix_xattr_filler_t *filler = NULL; + posix_inode_ctx_t *ctx = NULL; + + filler = tmp; + + optype = (gf_xattrop_flags_t)(filler->flags); + this = filler->this; + inode = filler->inode; + count = v->len; + if (optype == GF_XATTROP_ADD_ARRAY_WITH_DEFAULT || + optype == GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT) + count = count / 2; + + array = GF_CALLOC(count, sizeof(char), gf_posix_mt_char); + +#ifdef GF_DARWIN_HOST_OS + struct posix_private *priv = NULL; + priv = this->private; + if (priv->xattr_user_namespace == XATTR_STRIP) { + if (strncmp(k, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) == 0) { + k += XATTR_USER_PREFIX_LEN; + } + } +#endif + op_ret = posix_inode_ctx_get_all(inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + + pthread_mutex_lock(&ctx->xattrop_lock); + { + if (filler->real_path) { + size = sys_lgetxattr(filler->real_path, k, (char *)array, count); + } else { + size = sys_fgetxattr(filler->fdnum, k, (char *)array, count); + } + + op_errno = errno; + if ((size == -1) && (op_errno != ENODATA) && (op_errno != ENOATTR)) { + if (op_errno == ENOTSUP) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, + GF_LOG_WARNING, + "Extended attributes not " + "supported by filesystem"); + } else if (op_errno != ENOENT || + !posix_special_xattr(marker_xattrs, k)) { + if (filler->real_path) + gf_msg(this->name, fop_log_level(GF_FOP_XATTROP, op_errno), + op_errno, P_MSG_XATTR_FAILED, + "getxattr failed on %s while " + "doing xattrop: Key:%s ", + filler->real_path, k); + else + gf_msg( + this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED, + "fgetxattr failed on gfid=%s " + "while doing xattrop: " + "Key:%s (%s)", + uuid_utoa(filler->inode->gfid), k, strerror(op_errno)); + } + + op_ret = -1; + goto unlock; + } + + if (size == -1 && optype == GF_XATTROP_GET_AND_SET) { + GF_FREE(array); + array = NULL; + } + + /* We only write back the xattr if it has been really modified + * (i.e. v->data is not all 0's). Otherwise we return its value + * but we don't update anything. + * + * If the xattr does not exist, a value of all 0's is returned + * without creating it. */ + size = count; + if (optype != GF_XATTROP_GET_AND_SET && + mem_0filled(v->data, v->len) == 0) + goto unlock; + + dst_data = array; + switch (optype) { + case GF_XATTROP_ADD_ARRAY: + __add_array((int32_t *)array, (int32_t *)v->data, count / 4); + break; + + case GF_XATTROP_ADD_ARRAY64: + __add_long_array((int64_t *)array, (int64_t *)v->data, + count / 8); + break; + + case GF_XATTROP_ADD_ARRAY_WITH_DEFAULT: + __add_array_with_default((int32_t *)array, (int32_t *)v->data, + count / 4); + break; + + case GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT: + __add_long_array_with_default((int64_t *)array, + (int64_t *)v->data, count / 8); + break; + + case GF_XATTROP_GET_AND_SET: + dst_data = v->data; + break; + + default: + gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_UNKNOWN_OP, + "Unknown xattrop type (%d)" + " on %s. Please send a bug report to " + "gluster-devel@gluster.org", + optype, filler->real_path); + op_ret = -1; + op_errno = EINVAL; + goto unlock; + } + + if (filler->real_path) { + size = sys_lsetxattr(filler->real_path, k, dst_data, count, 0); + } else { + size = sys_fsetxattr(filler->fdnum, k, (char *)dst_data, count, 0); + } + op_errno = errno; + } +unlock: + pthread_mutex_unlock(&ctx->xattrop_lock); + + if (op_ret == -1) + goto out; + + if (size == -1) { + if (filler->real_path) + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED, + "setxattr failed on %s " + "while doing xattrop: key=%s", + filler->real_path, k); + else + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_XATTR_FAILED, + "fsetxattr failed on gfid=%s while doing " + "xattrop: key=%s (%s)", + uuid_utoa(filler->inode->gfid), k, strerror(op_errno)); + op_ret = -1; + goto out; + } else if (array) { + op_ret = dict_set_bin(filler->xattr, k, array, count); + if (op_ret) { + if (filler->real_path) + gf_msg_debug(this->name, 0, + "dict_set_bin failed (path=%s): " + "key=%s (%s)", + filler->real_path, k, strerror(-size)); + else + gf_msg_debug(this->name, 0, + "dict_set_bin failed (gfid=%s): " + "key=%s (%s)", + uuid_utoa(filler->inode->gfid), k, + strerror(-size)); + + op_ret = -1; + op_errno = EINVAL; + GF_FREE(array); + array = NULL; + goto out; + } + array = NULL; + } + +out: + if (op_ret < 0) + filler->op_errno = op_errno; + + if (array) + GF_FREE(array); + + return op_ret; +} + +/** + * xattrop - xattr operations - for internal use by GlusterFS + * @optype: ADD_ARRAY: + * dict should contain: + * "key" ==> array of 32-bit numbers + */ + +int +do_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + int op_ret = 0; + int op_errno = 0; + int _fd = -1; + char *real_path = NULL; + struct posix_fd *pfd = NULL; + inode_t *inode = NULL; + posix_xattr_filler_t filler = { + 0, + }; + dict_t *xattr_rsp = NULL; + dict_t *xdata_rsp = NULL; + struct iatt stbuf = {0}; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(xattr, out); + VALIDATE_OR_GOTO(this, out); + + if (fd) { + op_ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, + fop_log_level(GF_FOP_FXATTROP, op_errno), + P_MSG_PFD_GET_FAILED, + "failed to get pfd from" + " fd=%p", + fd); + goto out; + } + _fd = pfd->fd; + } + + if (loc && !gf_uuid_is_null(loc->gfid)) { + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + if (!real_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + } + + if (real_path) { + inode = loc->inode; + } else if (fd) { + inode = fd->inode; + } + + xattr_rsp = dict_new(); + if (xattr_rsp == NULL) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + filler.this = this; + filler.fdnum = _fd; + filler.real_path = real_path; + filler.flags = (int)optype; + filler.inode = inode; + filler.xattr = xattr_rsp; + + op_ret = dict_foreach(xattr, _posix_handle_xattr_keyvalue_pair, &filler); + op_errno = filler.op_errno; + if (op_ret < 0) + goto out; + + if (!xdata) + goto out; + + if (fd) { + op_ret = posix_fdstat(this, inode, _fd, &stbuf); + } else { + op_ret = posix_pstat(this, inode, inode->gfid, real_path, &stbuf, + _gf_false); + } + if (op_ret < 0) { + op_errno = errno; + goto out; + } + xdata_rsp = posix_xattr_fill(this, real_path, loc, fd, _fd, xdata, &stbuf); + if (!xdata_rsp) { + op_ret = -1; + op_errno = ENOMEM; + } + posix_set_mode_in_dict(xdata, xdata_rsp, &stbuf); +out: + + STACK_UNWIND_STRICT(xattrop, frame, op_ret, op_errno, xattr_rsp, xdata_rsp); + + if (xattr_rsp) + dict_unref(xattr_rsp); + + if (xdata_rsp) + dict_unref(xdata_rsp); + return 0; +} + +int +posix_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + do_xattrop(frame, this, loc, NULL, optype, xattr, xdata); + return 0; +} + +int +posix_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + do_xattrop(frame, this, NULL, fd, optype, xattr, xdata); + return 0; +} + +int +posix_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(loc, out); + + MAKE_INODE_HANDLE(real_path, this, loc, NULL); + if (!real_path) { + op_ret = -1; + op_errno = errno; + goto out; + } + + op_ret = sys_access(real_path, mask & 07); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_ACCESS_FAILED, + "access failed on %s", real_path); + goto out; + } + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(access, frame, op_ret, op_errno, NULL); + return 0; +} + +int32_t +posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct iatt preop = { + 0, + }; + struct iatt postop = { + 0, + }; + struct posix_fd *pfd = NULL; + int ret = -1; + struct posix_private *priv = NULL; + dict_t *rsp_xdata = NULL; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + goto out; + } + + _fd = pfd->fd; + + op_ret = posix_fdstat(this, fd->inode, _fd, &preop); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + goto out; + } + + if (xdata) { + op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata, + &rsp_xdata, _gf_false); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + op_errno = EIO; + goto out; + } + } + + posix_update_iatt_buf(&preop, _fd, NULL, xdata); + op_ret = sys_ftruncate(_fd, offset); + + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED, + "ftruncate failed on fd=%p (%" PRId64 "", fd, offset); + goto out; + } + + op_ret = posix_fdstat(this, fd->inode, _fd, &postop); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd); + goto out; + } + + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &postop); + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, &preop, &postop, + NULL); + + return 0; +} + +int32_t +posix_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + int _fd = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; + struct iatt buf = { + 0, + }; + struct posix_fd *pfd = NULL; + dict_t *xattr_rsp = NULL; + int ret = -1; + struct posix_private *priv = NULL; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID(frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + + if (!xdata) + gf_msg_trace(this->name, 0, "null xdata passed, fd %p", fd); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + goto out; + } + + _fd = pfd->fd; + + op_ret = posix_fdstat(this, fd->inode, _fd, &buf); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fstat failed on fd=%p", fd); + goto out; + } + + if (xdata) { + xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, _fd, xdata, &buf); + + op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &buf, NULL, xdata, + &xattr_rsp, _gf_false); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + } + posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, _fd, NULL); + } + + posix_update_iatt_buf(&buf, _fd, NULL, xdata); + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + + STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, &buf, xattr_rsp); + if (xattr_rsp) + dict_unref(xattr_rsp); + return 0; +} + +int32_t +posix_lease(call_frame_t *frame, xlator_t *this, loc_t *loc, + struct gf_lease *lease, dict_t *xdata) +{ + struct gf_lease nullease = { + 0, + }; + + gf_msg(this->name, GF_LOG_CRITICAL, EINVAL, P_MSG_LEASE_DISABLED, + "\"features/leases\" translator is not loaded. You need" + "to use it for proper functioning of your application"); + + STACK_UNWIND_STRICT(lease, frame, -1, ENOSYS, &nullease, NULL); + return 0; +} + +static int gf_posix_lk_log; + +int32_t +posix_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) +{ + struct gf_flock nullock = { + 0, + }; + + GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT(lk, frame, -1, ENOSYS, &nullock, NULL); + return 0; +} + +int32_t +posix_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT(inodelk, frame, -1, ENOSYS, NULL); + return 0; +} + +int32_t +posix_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT(finodelk, frame, -1, ENOSYS, NULL); + return 0; +} + +int32_t +posix_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, + loc_t *loc, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT(entrylk, frame, -1, ENOSYS, NULL); + return 0; +} + +int32_t +posix_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, + fd_t *fd, const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY(gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOSYS, NULL); + return 0; +} + +int +posix_fill_readdir(fd_t *fd, DIR *dir, off_t off, size_t size, + gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs) +{ + off_t in_case = -1; + off_t last_off = 0; + size_t filled = 0; + int count = 0; + int32_t this_size = -1; + gf_dirent_t *this_entry = NULL; + struct posix_fd *pfd = NULL; + struct stat stbuf = { + 0, + }; + char *hpath = NULL; + int len = 0; + int ret = 0; + int op_errno = 0; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + count = -1; + errno = op_errno; + goto out; + } + + if (skip_dirs) { + hpath = alloca(PATH_MAX); + len = posix_handle_path(this, fd->inode->gfid, NULL, hpath, PATH_MAX); + if (len <= 0) { + errno = ESTALE; + count = -1; + goto out; + } + len = strlen(hpath); + hpath[len] = '/'; + } + + if (!off) { + rewinddir(dir); + } else { + seekdir(dir, off); +#ifndef GF_LINUX_HOST_OS + if ((u_long)telldir(dir) != off && off != pfd->dir_eof) { + gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, P_MSG_DIR_OPERATION_FAILED, + "seekdir(0x%llx) failed on dir=%p: " + "Invalid argument (offset reused from " + "another DIR * structure?)", + off, dir); + errno = EINVAL; + count = -1; + goto out; + } +#endif /* GF_LINUX_HOST_OS */ + } + + while (filled <= size) { + in_case = (u_long)telldir(dir); + + if (in_case == -1) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, P_MSG_DIR_OPERATION_FAILED, + "telldir failed on dir=%p", dir); + goto out; + } + + errno = 0; + + entry = sys_readdir(dir, scratch); + + if (!entry || errno != 0) { + if (errno == EBADF) { + gf_msg(THIS->name, GF_LOG_WARNING, errno, + P_MSG_DIR_OPERATION_FAILED, "readdir failed on dir=%p", + dir); + goto out; + } + break; + } + +#ifdef __NetBSD__ + /* + * NetBSD with UFS1 backend uses backing files for + * extended attributes. They can be found in a + * .attribute file located at the root of the filesystem + * We hide it to glusterfs clients, since chaos will occur + * when the cluster/dht xlator decides to distribute + * exended attribute backing file across storage servers. + */ + if (__is_root_gfid(fd->inode->gfid) == 0 && + (!strcmp(entry->d_name, ".attribute"))) + continue; +#endif /* __NetBSD__ */ + + if (__is_root_gfid(fd->inode->gfid) && + (!strcmp(GF_HIDDEN_PATH, entry->d_name))) { + continue; + } + + if (skip_dirs) { + if (DT_ISDIR(entry->d_type)) { + continue; + } else if (hpath) { + strcpy(&hpath[len + 1], entry->d_name); + ret = sys_lstat(hpath, &stbuf); + if (!ret && S_ISDIR(stbuf.st_mode)) + continue; + } + } + + this_size = max(sizeof(gf_dirent_t), sizeof(gfs3_dirplist)) + + strlen(entry->d_name) + 1; + + if (this_size + filled > size) { + seekdir(dir, in_case); +#ifndef GF_LINUX_HOST_OS + if ((u_long)telldir(dir) != in_case && in_case != pfd->dir_eof) { + gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, + P_MSG_DIR_OPERATION_FAILED, + "seekdir(0x%llx) failed on dir=%p: " + "Invalid argument (offset reused from " + "another DIR * structure?)", + in_case, dir); + errno = EINVAL; + count = -1; + goto out; + } +#endif /* GF_LINUX_HOST_OS */ + break; + } + + this_entry = gf_dirent_for_name(entry->d_name); + + if (!this_entry) { + gf_msg(THIS->name, GF_LOG_ERROR, errno, + P_MSG_GF_DIRENT_CREATE_FAILED, + "could not create " + "gf_dirent for entry %s", + entry->d_name); + goto out; + } + /* + * we store the offset of next entry here, which is + * probably not intended, but code using syncop_readdir() + * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it + * for directory read resumption. + */ + last_off = (u_long)telldir(dir); + this_entry->d_off = last_off; + this_entry->d_ino = entry->d_ino; + this_entry->d_type = entry->d_type; + + list_add_tail(&this_entry->list, &entries->list); + + filled += this_size; + count++; + } + + if ((!sys_readdir(dir, scratch) && (errno == 0))) { + /* Indicate EOF */ + errno = ENOENT; + /* Remember EOF offset for later detection */ + pfd->dir_eof = (u_long)last_off; + } +out: + return count; +} + +dict_t * +posix_entry_xattr_fill(xlator_t *this, inode_t *inode, fd_t *fd, + char *entry_path, dict_t *dict, struct iatt *stbuf) +{ + loc_t tmp_loc = { + 0, + }; + + /* if we don't send the 'loc', open-fd-count be a problem. */ + tmp_loc.inode = inode; + + return posix_xattr_fill(this, entry_path, &tmp_loc, NULL, -1, dict, stbuf); +} + +int +posix_readdirp_fill(xlator_t *this, fd_t *fd, gf_dirent_t *entries, + dict_t *dict) +{ + gf_dirent_t *entry = NULL; + inode_table_t *itable = NULL; + inode_t *inode = NULL; + char *hpath = NULL; + int len = 0; + struct iatt stbuf = { + 0, + }; + uuid_t gfid; + int ret = -1; + + if (list_empty(&entries->list)) + return 0; + + itable = fd->inode->table; + + hpath = alloca(PATH_MAX); + len = posix_handle_path(this, fd->inode->gfid, NULL, hpath, PATH_MAX); + if (len <= 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLEPATH_FAILED, + "Failed to create handle path, fd=%p, gfid=%s", fd, + uuid_utoa(fd->inode->gfid)); + return -1; + } + len = strlen(hpath); + hpath[len] = '/'; + + list_for_each_entry(entry, &entries->list, list) + { + inode = inode_grep(fd->inode->table, fd->inode, entry->d_name); + if (inode) + gf_uuid_copy(gfid, inode->gfid); + else + bzero(gfid, 16); + + strcpy(&hpath[len + 1], entry->d_name); + + ret = posix_pstat(this, inode, gfid, hpath, &stbuf, _gf_false); + + if (ret == -1) { + if (inode) + inode_unref(inode); + continue; + } + + posix_update_iatt_buf(&stbuf, -1, hpath, dict); + + if (!inode) + inode = inode_find(itable, stbuf.ia_gfid); + + if (!inode) + inode = inode_new(itable); + + entry->inode = inode; + + if (dict) { + entry->dict = posix_entry_xattr_fill(this, entry->inode, fd, hpath, + dict, &stbuf); + } + + entry->d_stat = stbuf; + if (stbuf.ia_ino) + entry->d_ino = stbuf.ia_ino; + + if (entry->d_type == DT_UNKNOWN && !IA_ISINVAL(stbuf.ia_type)) { + /* The platform supports d_type but the underlying + filesystem doesn't. We set d_type to the correct + value from ia_type */ + entry->d_type = gf_d_type_from_ia_type(stbuf.ia_type); + } + + inode = NULL; + } + + return 0; +} + +int32_t +posix_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, int whichop, dict_t *dict) +{ + struct posix_fd *pfd = NULL; + DIR *dir = NULL; + int ret = -1; + int count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + gf_dirent_t entries; + int32_t skip_dirs = 0; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + INIT_LIST_HEAD(&entries.list); + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + goto out; + } + + dir = pfd->dir; + + if (!dir) { + gf_msg(this->name, GF_LOG_WARNING, EINVAL, P_MSG_PFD_NULL, + "dir is NULL for fd=%p", fd); + op_errno = EINVAL; + goto out; + } + + /* When READDIR_FILTER option is set to on, we can filter out + * directory's entry from the entry->list. + */ + ret = dict_get_int32(dict, GF_READDIR_SKIP_DIRS, &skip_dirs); + + LOCK(&fd->lock); + { + /* posix_fill_readdir performs multiple separate individual + readdir() calls to fill up the buffer. + + In case of NFS where the same anonymous FD is shared between + different applications, reading a common directory can + result in the anonymous fd getting re-used unsafely between + the two readdir requests (in two different io-threads). + + It would also help, in the future, to replace the loop + around readdir() with a single large getdents() call. + */ + count = posix_fill_readdir(fd, dir, off, size, &entries, this, + skip_dirs); + } + UNLOCK(&fd->lock); + + /* pick ENOENT to indicate EOF */ + op_errno = errno; + op_ret = count; + + if (whichop != GF_FOP_READDIRP) + goto out; + + posix_readdirp_fill(this, fd, &entries, dict); + +out: + if (whichop == GF_FOP_READDIR) + STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, &entries, NULL); + else + STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, &entries, NULL); + + gf_dirent_free(&entries); + + return 0; +} + +int32_t +posix_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + posix_do_readdir(frame, this, fd, size, off, GF_FOP_READDIR, xdata); + return 0; +} + +int32_t +posix_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *dict) +{ + gf_dirent_t entries; + int32_t op_ret = -1, op_errno = 0; + gf_dirent_t *entry = NULL; + + if ((dict != NULL) && (dict_get(dict, GET_ANCESTRY_DENTRY_KEY))) { + INIT_LIST_HEAD(&entries.list); + + op_ret = posix_get_ancestry(this, fd->inode, &entries, NULL, + POSIX_ANCESTRY_DENTRY, &op_errno, dict); + if (op_ret >= 0) { + op_ret = 0; + + list_for_each_entry(entry, &entries.list, list) { op_ret++; } + } + + STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, &entries, NULL); + + gf_dirent_free(&entries); + return 0; + } + + posix_do_readdir(frame, this, fd, size, off, GF_FOP_READDIRP, dict); + return 0; +} + +int32_t +posix_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + int32_t len, dict_t *xdata) +{ + char *alloc_buf = NULL; + char *buf = NULL; + int _fd = -1; + struct posix_fd *pfd = NULL; + int op_ret = -1; + int op_errno = 0; + int ret = 0; + ssize_t bytes_read = 0; + int32_t weak_checksum = 0; + int32_t zerofillcheck = 0; + /* Protocol version 4 uses 32 bytes i.e SHA256_DIGEST_LENGTH, + so this is used. */ + unsigned char md5_checksum[SHA256_DIGEST_LENGTH] = {0}; + unsigned char strong_checksum[SHA256_DIGEST_LENGTH] = {0}; + unsigned char *checksum = NULL; + struct posix_private *priv = NULL; + dict_t *rsp_xdata = NULL; + gf_boolean_t buf_has_zeroes = _gf_false; + struct iatt preop = { + 0, + }; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd, out); + + priv = this->private; + + alloc_buf = _page_aligned_alloc(len, &buf); + if (!alloc_buf) { + op_errno = ENOMEM; + goto out; + } + + rsp_xdata = dict_new(); + if (!rsp_xdata) { + op_errno = ENOMEM; + goto out; + } + + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + goto out; + } + + _fd = pfd->fd; + + if (xdata) { + op_ret = posix_fdstat(this, fd->inode, _fd, &preop); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + goto out; + } + + op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &preop, NULL, xdata, + &rsp_xdata, _gf_false); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + op_errno = EIO; + goto out; + } + } + + LOCK(&fd->lock); + { + if (priv->aio_capable && priv->aio_init_done) + __posix_fd_set_odirect(fd, pfd, 0, offset, len); + + bytes_read = sys_pread(_fd, buf, len, offset); + if (bytes_read < 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PREAD_FAILED, + "pread of %d bytes returned %zd", len, bytes_read); + + op_errno = errno; + } + } + UNLOCK(&fd->lock); + + if (bytes_read < 0) + goto out; + + if (xdata && + dict_get_int32(xdata, "check-zero-filled", &zerofillcheck) == 0) { + buf_has_zeroes = (mem_0filled(buf, bytes_read)) ? _gf_false : _gf_true; + ret = dict_set_uint32(rsp_xdata, "buf-has-zeroes", buf_has_zeroes); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for key: %s", + uuid_utoa(fd->inode->gfid), "buf-has-zeroes"); + op_errno = -ret; + goto out; + } + } + weak_checksum = gf_rsync_weak_checksum((unsigned char *)buf, (size_t)ret); + + if (priv->fips_mode_rchecksum) { + ret = dict_set_int32(rsp_xdata, "fips-mode-rchecksum", 1); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, -ret, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for key: %s", + uuid_utoa(fd->inode->gfid), "fips-mode-rchecksum"); + goto out; + } + checksum = strong_checksum; + gf_rsync_strong_checksum((unsigned char *)buf, (size_t)bytes_read, + (unsigned char *)checksum); + } else { + checksum = md5_checksum; + gf_rsync_md5_checksum((unsigned char *)buf, (size_t)bytes_read, + (unsigned char *)checksum); + } + op_ret = 0; + + posix_set_ctime(frame, this, NULL, _fd, fd->inode, NULL); + +out: + STACK_UNWIND_STRICT(rchecksum, frame, op_ret, op_errno, weak_checksum, + checksum, rsp_xdata); + if (rsp_xdata) + dict_unref(rsp_xdata); + GF_FREE(alloc_buf); + + return 0; +} + +int +posix_forget(xlator_t *this, inode_t *inode) +{ + int ret = 0; + char *unlink_path = NULL; + uint64_t ctx_uint1 = 0; + uint64_t ctx_uint2 = 0; + posix_inode_ctx_t *ctx = NULL; + posix_mdata_t *mdata = NULL; + struct posix_private *priv_posix = NULL; + + priv_posix = (struct posix_private *)this->private; + if (!priv_posix) + return 0; + + ret = inode_ctx_del2(inode, this, &ctx_uint1, &ctx_uint2); + if (!ctx_uint1) + goto check_ctx2; + + ctx = (posix_inode_ctx_t *)(uintptr_t)ctx_uint1; + + if (ctx->unlink_flag == GF_UNLINK_TRUE) { + POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path, inode->gfid, + unlink_path); + if (!unlink_path) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED, + "Failed to remove gfid :%s", uuid_utoa(inode->gfid)); + ret = -1; + goto ctx_free; + } + ret = sys_unlink(unlink_path); + } +ctx_free: + pthread_mutex_destroy(&ctx->xattrop_lock); + pthread_mutex_destroy(&ctx->write_atomic_lock); + pthread_mutex_destroy(&ctx->pgfid_lock); + GF_FREE(ctx); + +check_ctx2: + if (ctx_uint2) { + mdata = (posix_mdata_t *)(uintptr_t)ctx_uint2; + } + + GF_FREE(mdata); + return ret; +} |
