diff options
author | ShyamsundarR <srangana@redhat.com> | 2017-11-28 18:51:00 -0500 |
---|---|---|
committer | ShyamsundarR <srangana@redhat.com> | 2017-12-02 13:56:19 -0500 |
commit | 8a0b115b20cfa2dd3c5a9e22a8244c9c2f03e17b (patch) | |
tree | e0dc5088e9931d4d2cff442e8a780da0ff7caa15 | |
parent | 09cb795587772b60ba102f4369ab3f4501cdc01a (diff) |
posix: Reorganize posix xlator to prepare for reuse with rio
1. Split out entry and inode/fd based FOPs into
separate files from posix.c
2. Split out common routines (init, fini, reconf,
and such) into its own file, from posix.c
3. Retain just the method assignments in posix.c
(such that posix2 for RIO can assign its own methods in
the future for entry operations and such)
4. Based on the split in (1) and (2) split out
posix-handle.h into 2 files, such that macros that are
needed for inode ops are in one and rest are in the other
If the split is done as above, posix2 can compile with
its own entry ops, and hence not compile, the entry ops
as split in (1) above.
The split described in (4) can again help posix2 to
define its own macros to make entry and inode handles,
thus not impact existing POSIX xlator code.
Noted problems
- There are path references in certain cases where
quota is used (in the xattr FOPs), and thus will fail
on reuse in posix2, this needs to be handled when we
get there.
- posix_init does set root GFID on the brick root,
and this is incorrect for posix2, again will need
handling later when posix2 evolves based on this
code (other init checks seem fine on current inspection)
Merge of experimental branch patches with the following
gerrit change-IDs
> Change-Id: I965ce6dffe70a62c697f790f3438559520e0af20
> Change-Id: I089a4d9cf470c2f9c121611e8ef18dea92b2be70
> Change-Id: I2cec103f6ba8f3084443f3066bcc70b2f5ecb49a
Fixes gluster/glusterfs#327
Change-Id: I0ccfa78559a7c5a68f5e861e144cf856f5c9e19c
Signed-off-by: ShyamsundarR <srangana@redhat.com>
-rw-r--r-- | xlators/storage/posix/src/Makefile.am | 5 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-common.c | 1326 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-entry-ops.c | 2125 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-handle.c | 3 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-handle.h | 108 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-helpers.c | 35 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-inode-fd-ops.c | 4975 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-inode-handle.h | 106 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix.c | 8242 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix.h | 243 | ||||
-rw-r--r-- | xlators/xlator.sym | 1 |
11 files changed, 8831 insertions, 8338 deletions
diff --git a/xlators/storage/posix/src/Makefile.am b/xlators/storage/posix/src/Makefile.am index 8ec460578b5..17ea98016c9 100644 --- a/xlators/storage/posix/src/Makefile.am +++ b/xlators/storage/posix/src/Makefile.am @@ -5,12 +5,13 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/storage posix_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) posix_la_SOURCES = posix.c posix-helpers.c posix-handle.c posix-aio.c \ - posix-gfid-path.c + posix-gfid-path.c posix-entry-ops.c posix-inode-fd-ops.c \ + posix-common.c posix_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBAIO) \ $(ACL_LIBS) noinst_HEADERS = posix.h posix-mem-types.h posix-handle.h posix-aio.h \ - posix-messages.h posix-gfid-path.h + posix-messages.h posix-gfid-path.h posix-inode-handle.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c new file mode 100644 index 00000000000..011bef8a720 --- /dev/null +++ b/xlators/storage/posix/src/posix-common.c @@ -0,0 +1,1326 @@ +/* + Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#define __XOPEN_SOURCE 500 + +/* for SEEK_HOLE and SEEK_DATA */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <openssl/md5.h> +#include <stdint.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <errno.h> +#include <libgen.h> +#include <pthread.h> +#include <ftw.h> +#include <sys/stat.h> +#include <signal.h> +#include <sys/uio.h> +#include <unistd.h> +#include <ftw.h> + +#ifndef GF_BSD_HOST_OS +#include <alloca.h> +#endif /* GF_BSD_HOST_OS */ + +#ifdef HAVE_LINKAT +#include <fcntl.h> +#endif /* HAVE_LINKAT */ + +#include "glusterfs.h" +#include "checksum.h" +#include "dict.h" +#include "logging.h" +#include "posix.h" +#include "posix-inode-handle.h" +#include "xlator.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" +#include "byte-order.h" +#include "syscall.h" +#include "statedump.h" +#include "locking.h" +#include "timer.h" +#include "glusterfs3-xdr.h" +#include "hashfn.h" +#include "posix-aio.h" +#include "glusterfs-acl.h" +#include "posix-messages.h" +#include "events.h" +#include "posix-gfid-path.h" +#include "compat-uuid.h" + +extern char *marker_xattrs[]; +#define ALIGN_SIZE 4096 + +#undef HAVE_SET_FSID +#ifdef HAVE_SET_FSID + +#define DECLARE_OLD_FS_ID_VAR uid_t old_fsuid; gid_t old_fsgid; + +#define SET_FS_ID(uid, gid) do { \ + old_fsuid = setfsuid (uid); \ + old_fsgid = setfsgid (gid); \ + } while (0) + +#define SET_TO_OLD_FS_ID() do { \ + setfsuid (old_fsuid); \ + setfsgid (old_fsgid); \ + } while (0) + +#else + +#define DECLARE_OLD_FS_ID_VAR +#define SET_FS_ID(uid, gid) +#define SET_TO_OLD_FS_ID() + +#endif + +/* Setting microseconds or nanoseconds depending on what's supported: + The passed in `tv` can be + struct timespec + if supported (better, because it supports nanosecond resolution) or + struct timeval + otherwise. */ +#if HAVE_UTIMENSAT +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ + tv.tv_nsec = nanosecs +#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \ + (sys_utimensat (AT_FDCWD, path, tv, AT_SYMLINK_NOFOLLOW)) +#else +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ + tv.tv_usec = nanosecs / 1000 +#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \ + (lutimes (path, tv)) +#endif + +int32_t +posix_priv (xlator_t *this) +{ + struct posix_private *priv = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + + (void) snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", + this->type, this->name); + gf_proc_dump_add_section(key_prefix); + + if (!this) + return 0; + + priv = this->private; + + if (!priv) + return 0; + + gf_proc_dump_write("base_path", "%s", priv->base_path); + gf_proc_dump_write("base_path_length", "%d", priv->base_path_length); + gf_proc_dump_write("max_read", "%d", priv->read_value); + gf_proc_dump_write("max_write", "%d", priv->write_value); + gf_proc_dump_write("nr_files", "%ld", priv->nr_files); + + return 0; +} + +int32_t +posix_inode (xlator_t *this) +{ + return 0; +} + +/** + * notify - when parent sends PARENT_UP, send CHILD_UP event from here + */ +int32_t +posix_notify (xlator_t *this, + int32_t event, + void *data, + ...) +{ + struct posix_private *priv = NULL; + + priv = this->private; + switch (event) + { + case GF_EVENT_PARENT_UP: + { + /* Tell the parent that posix xlator is up */ + default_notify (this, GF_EVENT_CHILD_UP, data); + } + break; + case GF_EVENT_CLEANUP: + if (priv->health_check) { + priv->health_check_active = _gf_false; + pthread_cancel (priv->health_check); + priv->health_check = 0; + } + if (priv->disk_space_check) { + priv->disk_space_check_active = _gf_false; + pthread_cancel (priv->disk_space_check); + priv->disk_space_check = 0; + } + if (priv->janitor) { + (void) gf_thread_cleanup_xint (priv->janitor); + priv->janitor = 0; + } + if (priv->fsyncer) { + (void) gf_thread_cleanup_xint (priv->fsyncer); + priv->fsyncer = 0; + } + if (priv->mount_lock) { + (void) sys_closedir (priv->mount_lock); + priv->mount_lock = NULL; + } + + break; + default: + /* */ + break; + } + return 0; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init (this, gf_posix_mt_end + 1); + + if (ret != 0) { + return ret; + } + + return ret; +} + +static int +posix_set_owner (xlator_t *this, uid_t uid, gid_t gid) +{ + struct posix_private *priv = NULL; + int ret = -1; + struct stat st = {0,}; + + priv = this->private; + + ret = sys_lstat (priv->base_path, &st); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_DIR_OPERATION_FAILED, "Failed to stat " + "brick path %s", + priv->base_path); + return ret; + } + + if ((uid == -1 || st.st_uid == uid) && + (gid == -1 || st.st_gid == gid)) + return 0; + + ret = sys_chown (priv->base_path, uid, gid); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_DIR_OPERATION_FAILED, "Failed to set uid/gid for" + " brick path %s", priv->base_path); + + return ret; +} +static int +set_gfid2path_separator (struct posix_private *priv, const char *str) +{ + int str_len = 0; + + str_len = strlen(str); + if (str_len > 0 && str_len < 8) { + strcpy (priv->gfid2path_sep, str); + return 0; + } + + return -1; +} + +static int +set_batch_fsync_mode (struct posix_private *priv, const char *str) +{ + if (strcmp (str, "none") == 0) + priv->batch_fsync_mode = BATCH_NONE; + else if (strcmp (str, "syncfs") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS; + else if (strcmp (str, "syncfs-single-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC; + else if (strcmp (str, "syncfs-reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC; + else if (strcmp (str, "reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_REVERSE_FSYNC; + else + return -1; + + return 0; +} + +#ifdef GF_DARWIN_HOST_OS +static int +set_xattr_user_namespace_mode (struct posix_private *priv, const char *str) +{ + if (strcmp (str, "none") == 0) + priv->xattr_user_namespace = XATTR_NONE; + else if (strcmp (str, "strip") == 0) + priv->xattr_user_namespace = XATTR_STRIP; + else if (strcmp (str, "append") == 0) + priv->xattr_user_namespace = XATTR_APPEND; + else if (strcmp (str, "both") == 0) + priv->xattr_user_namespace = XATTR_BOTH; + else + return -1; + return 0; +} +#endif + +int +posix_reconfigure (xlator_t *this, dict_t *options) +{ + int ret = -1; + struct posix_private *priv = NULL; + int32_t uid = -1; + int32_t gid = -1; + char *batch_fsync_mode_str = NULL; + char *gfid2path_sep = NULL; + int32_t force_create_mode = -1; + int32_t force_directory_mode = -1; + int32_t create_mask = -1; + int32_t create_directory_mask = -1; + + priv = this->private; + + GF_OPTION_RECONF ("brick-uid", uid, options, int32, out); + GF_OPTION_RECONF ("brick-gid", gid, options, int32, out); + if (uid != -1 || gid != -1) + posix_set_owner (this, uid, gid); + + GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec, + options, uint32, out); + + GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str, + options, str, out); + + if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Unknown mode string: %s", batch_fsync_mode_str); + goto out; + } + + GF_OPTION_RECONF ("gfid2path-separator", gfid2path_sep, options, + str, out); + if (set_gfid2path_separator (priv, gfid2path_sep) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Length of separator exceeds 7: %s", gfid2path_sep); + goto out; + } + +#ifdef GF_DARWIN_HOST_OS + + char *xattr_user_namespace_mode_str = NULL; + + GF_OPTION_RECONF ("xattr-user-namespace-mode", xattr_user_namespace_mode_str, + options, str, out); + + if (set_xattr_user_namespace_mode (priv, xattr_user_namespace_mode_str) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_UNKNOWN_ARGUMENT, + "Unknown xattr user namespace mode string: %s", + xattr_user_namespace_mode_str); + goto out; + } + +#endif + + GF_OPTION_RECONF ("linux-aio", priv->aio_configured, + options, bool, out); + + if (priv->aio_configured) + posix_aio_on (this); + else + posix_aio_off (this); + + GF_OPTION_RECONF ("update-link-count-parent", priv->update_pgfid_nlinks, + options, bool, out); + + GF_OPTION_RECONF ("gfid2path", priv->gfid2path, + options, bool, out); + + GF_OPTION_RECONF ("node-uuid-pathinfo", priv->node_uuid_pathinfo, + options, bool, out); + + if (priv->node_uuid_pathinfo && + (gf_uuid_is_null (priv->glusterd_uuid))) { + gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_UUID_NULL, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to <hostname>:<export>"); + } + + GF_OPTION_RECONF ("reserve", priv->disk_reserve, + options, uint32, out); + if (priv->disk_reserve) + posix_spawn_disk_space_check_thread (this); + + GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval, + options, uint32, out); + GF_OPTION_RECONF ("health-check-timeout", priv->health_check_timeout, + options, uint32, out); + posix_spawn_health_check_thread (this); + + GF_OPTION_RECONF ("shared-brick-count", priv->shared_brick_count, + options, int32, out); + GF_OPTION_RECONF ("force-create-mode", force_create_mode, + options, int32, out); + priv->force_create_mode = force_create_mode; + + GF_OPTION_RECONF ("force-directory-mode", force_directory_mode, + options, int32, out); + priv->force_directory_mode = force_directory_mode; + + GF_OPTION_RECONF ("create-mask", create_mask, + options, int32, out); + priv->create_mask = create_mask; + + GF_OPTION_RECONF ("create-directory-mask", create_directory_mask, + options, int32, out); + priv->create_directory_mask = create_directory_mask; + + ret = 0; +out: + return ret; +} + +int32_t +posix_delete_unlink_entry (const char *fpath, const struct stat *sb, + int typeflag, struct FTW *ftwbuf) { + + int ret = 0; + + if (!fpath) + goto out; + + switch (typeflag) { + case FTW_SL: + case FTW_NS: + case FTW_F: + case FTW_SLN: + ret = sys_unlink(fpath); + break; + case FTW_D: + case FTW_DP: + case FTW_DNR: + if (ftwbuf->level != 0) { + ret = sys_rmdir(fpath); + } + break; + default: + break; + } + if (ret) { + gf_msg ("posix_delete_unlink_entry", GF_LOG_WARNING, errno, + P_MSG_HANDLE_CREATE, + "Deletion of entries %s failed" + "Please delete it manually", + fpath); + } +out: + return 0; +} + +int32_t +posix_delete_unlink (const char *unlink_path) { + + int ret = -1; + int flags = 0; + + flags |= (FTW_DEPTH | FTW_PHYS); + + ret = nftw(unlink_path, posix_delete_unlink_entry, 2, flags); + if (ret) { + gf_msg ("posix_delete_unlink", GF_LOG_ERROR, 0, + P_MSG_HANDLE_CREATE, + "Deleting files from %s failed", + unlink_path); + } + return ret; +} + +int32_t +posix_create_unlink_dir (xlator_t *this) { + + struct posix_private *priv = NULL; + struct stat stbuf; + int ret = -1; + uuid_t gfid = {0}; + char gfid_str[64] = {0}; + char unlink_path[PATH_MAX] = {0,}; + char landfill_path[PATH_MAX] = {0,}; + + priv = this->private; + + (void) snprintf (unlink_path, sizeof(unlink_path), "%s/%s", + priv->base_path, GF_UNLINK_PATH); + + gf_uuid_generate (gfid); + uuid_utoa_r (gfid, gfid_str); + + (void) snprintf (landfill_path, sizeof(landfill_path), "%s/%s/%s", + priv->base_path, GF_LANDFILL_PATH, gfid_str); + + ret = sys_stat (unlink_path, &stbuf); + switch (ret) { + case -1: + if (errno != ENOENT) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_HANDLE_CREATE, + "Checking for %s failed", + unlink_path); + return -1; + } + break; + case 0: + if (!S_ISDIR (stbuf.st_mode)) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_HANDLE_CREATE, + "Not a directory: %s", + unlink_path); + return -1; + } + ret = posix_delete_unlink (unlink_path); + return 0; + default: + break; + } + ret = sys_mkdir (unlink_path, 0600); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_HANDLE_CREATE, + "Creating directory %s failed", + unlink_path); + return -1; + } + + return 0; +} + +/** + * init - + */ +int +posix_init (xlator_t *this) +{ + struct posix_private *_private = NULL; + data_t *dir_data = NULL; + data_t *tmp_data = NULL; + struct stat buf = {0,}; + gf_boolean_t tmp_bool = 0; + int ret = 0; + int op_ret = -1; + int op_errno = 0; + ssize_t size = -1; + uuid_t old_uuid = {0,}; + uuid_t dict_uuid = {0,}; + uuid_t gfid = {0,}; + uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; + char *guuid = NULL; + int32_t uid = -1; + int32_t gid = -1; + char *batch_fsync_mode_str; + char *gfid2path_sep = NULL; + int force_create = -1; + int force_directory = -1; + int create_mask = -1; + int create_directory_mask = -1; + + dir_data = dict_get (this->options, "directory"); + + if (this->children) { + gf_msg (this->name, GF_LOG_CRITICAL, 0, P_MSG_SUBVOLUME_ERROR, + "FATAL: storage/posix cannot have subvolumes"); + ret = -1; + goto out; + } + + if (!this->parents) { + gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_VOLUME_DANGLING, + "Volume is dangling. Please check the volume file."); + } + + if (!dir_data) { + gf_msg (this->name, GF_LOG_CRITICAL, 0, + P_MSG_EXPORT_DIR_MISSING, + "Export directory not specified in volume file."); + ret = -1; + goto out; + } + + umask (000); // umask `masking' is done at the client side + + /* Check whether the specified directory exists, if not log it. */ + op_ret = sys_stat (dir_data->data, &buf); + if ((op_ret != 0) || !S_ISDIR (buf.st_mode)) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_DIR_OPERATION_FAILED, + "Directory '%s' doesn't exist, exiting.", + dir_data->data); + ret = -1; + goto out; + } + + _private = GF_CALLOC (1, sizeof (*_private), + gf_posix_mt_posix_private); + if (!_private) { + ret = -1; + goto out; + } + + _private->base_path = gf_strdup (dir_data->data); + _private->base_path_length = strlen (_private->base_path); + + ret = dict_get_str (this->options, "hostname", &_private->hostname); + if (ret) { + _private->hostname = GF_CALLOC (256, sizeof (char), + gf_common_mt_char); + if (!_private->hostname) { + goto out; + } + ret = gethostname (_private->hostname, 256); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_HOSTNAME_MISSING, + "could not find hostname "); + } + } + + /* Check for Extended attribute support, if not present, log it */ + op_ret = sys_lsetxattr (dir_data->data, + "trusted.glusterfs.test", "working", 8, 0); + if (op_ret != -1) { + sys_lremovexattr (dir_data->data, "trusted.glusterfs.test"); + } else { + tmp_data = dict_get (this->options, + "mandate-attribute"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &tmp_bool) == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION, + "wrong option provided for key " + "\"mandate-attribute\""); + ret = -1; + goto out; + } + if (!tmp_bool) { + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_XATTR_NOTSUP, + "Extended attribute not supported, " + "starting as per option"); + } else { + gf_msg (this->name, GF_LOG_CRITICAL, 0, + P_MSG_XATTR_NOTSUP, + "Extended attribute not supported, " + "exiting."); + ret = -1; + goto out; + } + } else { + gf_msg (this->name, GF_LOG_CRITICAL, 0, + P_MSG_XATTR_NOTSUP, + "Extended attribute not supported, exiting."); + ret = -1; + goto out; + } + } + + tmp_data = dict_get (this->options, "volume-id"); + if (tmp_data) { + op_ret = gf_uuid_parse (tmp_data->data, dict_uuid); + if (op_ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_VOLUME_ID, + "wrong volume-id (%s) set" + " in volume file", tmp_data->data); + ret = -1; + goto out; + } + size = sys_lgetxattr (dir_data->data, + "trusted.glusterfs.volume-id", old_uuid, 16); + if (size == 16) { + if (gf_uuid_compare (old_uuid, dict_uuid)) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_VOLUME_ID, + "mismatching volume-id (%s) received. " + "already is a part of volume %s ", + tmp_data->data, uuid_utoa (old_uuid)); + gf_event (EVENT_POSIX_ALREADY_PART_OF_VOLUME, + "volume-id=%s;brick=%s:%s", + uuid_utoa (old_uuid), + _private->hostname, _private->base_path); + ret = -1; + goto out; + } + } else if ((size == -1) && + (errno == ENODATA || errno == ENOATTR)) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_VOLUME_ID_ABSENT, + "Extended attribute trusted.glusterfs." + "volume-id is absent"); + gf_event (EVENT_POSIX_BRICK_NOT_IN_VOLUME, + "brick=%s:%s", + _private->hostname, _private->base_path); + ret = -1; + goto out; + + } else if ((size == -1) && (errno != ENODATA) && + (errno != ENOATTR)) { + /* Wrong 'volume-id' is set, it should be error */ + gf_event (EVENT_POSIX_BRICK_VERIFICATION_FAILED, + "brick=%s:%s", + _private->hostname, _private->base_path); + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_VOLUME_ID_FETCH_FAILED, + "%s: failed to fetch volume-id", + dir_data->data); + ret = -1; + goto out; + } else { + ret = -1; + gf_event (EVENT_POSIX_BRICK_VERIFICATION_FAILED, + "brick=%s:%s", + _private->hostname, _private->base_path); + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_VOLUME_ID_FETCH_FAILED, + "failed to fetch proper volume id from export"); + goto out; + } + } + + /* Now check if the export directory has some other 'gfid', + other than that of root '/' */ + size = sys_lgetxattr (dir_data->data, "trusted.gfid", gfid, 16); + if (size == 16) { + if (!__is_root_gfid (gfid)) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_GFID_SET_FAILED, + "%s: gfid (%s) is not that of glusterfs '/' ", + dir_data->data, uuid_utoa (gfid)); + ret = -1; + goto out; + } + } else if (size != -1) { + /* Wrong 'gfid' is set, it should be error */ + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_GFID_SET_FAILED, + "%s: wrong value set as gfid", + dir_data->data); + ret = -1; + goto out; + } else if ((size == -1) && (errno != ENODATA) && + (errno != ENOATTR)) { + /* Wrong 'gfid' is set, it should be error */ + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_GFID_SET_FAILED, + "%s: failed to fetch gfid", + dir_data->data); + ret = -1; + goto out; + } else { + /* First time volume, set the GFID */ + size = sys_lsetxattr (dir_data->data, "trusted.gfid", rootgfid, + 16, XATTR_CREATE); + if (size == -1) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_GFID_SET_FAILED, + "%s: failed to set gfid", + dir_data->data); + ret = -1; + goto out; + } + } + + ret = 0; + + size = sys_lgetxattr (dir_data->data, POSIX_ACL_ACCESS_XATTR, + NULL, 0); + if ((size < 0) && (errno == ENOTSUP)) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_ACL_NOTSUP, + "Posix access control list is not supported."); + gf_event (EVENT_POSIX_ACL_NOT_SUPPORTED, + "brick=%s:%s", _private->hostname, _private->base_path); + } + + /* + * _XOPEN_PATH_MAX is the longest file path len we MUST + * support according to POSIX standard. When prepended + * by the brick base path it may exceed backed filesystem + * capacity (which MAY be bigger than _XOPEN_PATH_MAX). If + * this is the case, chdir() to the brick base path and + * use relative paths when they are too long. See also + * MAKE_REAL_PATH in posix-handle.h + */ + _private->path_max = pathconf(_private->base_path, _PC_PATH_MAX); + if (_private->path_max != -1 && + _XOPEN_PATH_MAX + _private->base_path_length > _private->path_max) { + ret = chdir(_private->base_path); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_BASEPATH_CHDIR_FAILED, + "chdir() to \"%s\" failed", + _private->base_path); + goto out; + } +#ifdef __NetBSD__ + /* + * At least on NetBSD, the chdir() above uncovers a + * race condition which cause file lookup to fail + * with ENODATA for a few seconds. The volume quickly + * reaches a sane state, but regression tests are fast + * enough to choke on it. The reason is obscure (as + * often with race conditions), but sleeping here for + * a second seems to workaround the problem. + */ + sleep(1); +#endif + } + + + LOCK_INIT (&_private->lock); + + _private->export_statfs = 1; + tmp_data = dict_get (this->options, "export-statfs-size"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &_private->export_statfs) == -1) { + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION_VAL, + "'export-statfs-size' takes only boolean " + "options"); + goto out; + } + if (!_private->export_statfs) + gf_msg_debug (this->name, 0, + "'statfs()' returns dummy size"); + } + + _private->background_unlink = 0; + tmp_data = dict_get (this->options, "background-unlink"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &_private->background_unlink) == -1) { + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION_VAL, "'background-unlink'" + " takes only boolean options"); + goto out; + } + + if (_private->background_unlink) + gf_msg_debug (this->name, 0, + "unlinks will be performed in background"); + } + + tmp_data = dict_get (this->options, "o-direct"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &_private->o_direct) == -1) { + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION_VAL, + "wrong option provided for 'o-direct'"); + goto out; + } + if (_private->o_direct) + gf_msg_debug (this->name, 0, "o-direct mode is enabled" + " (O_DIRECT for every open)"); + } + + tmp_data = dict_get (this->options, "update-link-count-parent"); + if (tmp_data) { + if (gf_string2boolean (tmp_data->data, + &_private->update_pgfid_nlinks) == -1) { + ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION, "wrong value provided " + "for 'update-link-count-parent'"); + goto out; + } + if (_private->update_pgfid_nlinks) + gf_msg_debug (this->name, 0, "update-link-count-parent" + " is enabled. Thus for each file an " + "extended attribute representing the " + "number of hardlinks for that file " + "within the same parent directory is" + " set."); + } + + ret = dict_get_str (this->options, "glusterd-uuid", &guuid); + if (!ret) { + if (gf_uuid_parse (guuid, _private->glusterd_uuid)) + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_INVALID_NODE_UUID, "Cannot parse " + "glusterd (node) UUID, node-uuid xattr " + "request would return - \"No such attribute\""); + } else { + gf_msg_debug (this->name, 0, "No glusterd (node) UUID passed -" + " node-uuid xattr request will return \"No such" + " attribute\""); + } + ret = 0; + + GF_OPTION_INIT ("janitor-sleep-duration", + _private->janitor_sleep_duration, int32, out); + + /* performing open dir on brick dir locks the brick dir + * and prevents it from being unmounted + */ + _private->mount_lock = sys_opendir (dir_data->data); + if (!_private->mount_lock) { + ret = -1; + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_DIR_OPERATION_FAILED, + "Could not lock brick directory (%s)", + strerror (op_errno)); + goto out; + } +#ifndef GF_DARWIN_HOST_OS + { + struct rlimit lim; + lim.rlim_cur = 1048576; + lim.rlim_max = 1048576; + + if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_SET_ULIMIT_FAILED, + "Failed to set 'ulimit -n " + " 1048576'"); + lim.rlim_cur = 65536; + lim.rlim_max = 65536; + + if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_SET_FILE_MAX_FAILED, + "Failed to set maximum allowed open " + "file descriptors to 64k"); + } + else { + gf_msg (this->name, GF_LOG_INFO, 0, + P_MSG_MAX_FILE_OPEN, "Maximum allowed " + "open file descriptors set to 65536"); + } + } + } +#endif + _private->shared_brick_count = 1; + ret = dict_get_int32 (this->options, "shared-brick-count", + &_private->shared_brick_count); + if (ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_INVALID_OPTION_VAL, + "'shared-brick-count' takes only integer " + "values"); + goto out; + } + + this->private = (void *)_private; + + op_ret = posix_handle_init (this); + if (op_ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE, + "Posix handle setup failed"); + ret = -1; + goto out; + } + + op_ret = posix_handle_trash_init (this); + if (op_ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE_TRASH, + "Posix landfill setup failed"); + ret = -1; + goto out; + } + + op_ret = posix_create_unlink_dir (this); + if (op_ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_HANDLE_CREATE, + "Creation of unlink directory failed"); + ret = -1; + goto out; + } + + _private->aio_init_done = _gf_false; + _private->aio_capable = _gf_false; + + GF_OPTION_INIT ("brick-uid", uid, int32, out); + GF_OPTION_INIT ("brick-gid", gid, int32, out); + if (uid != -1 || gid != -1) + posix_set_owner (this, uid, gid); + + GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out); + + if (_private->aio_configured) { + op_ret = posix_aio_on (this); + + if (op_ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_POSIX_AIO, + "Posix AIO init failed"); + ret = -1; + goto out; + } + } + + GF_OPTION_INIT ("node-uuid-pathinfo", + _private->node_uuid_pathinfo, bool, out); + if (_private->node_uuid_pathinfo && + (gf_uuid_is_null (_private->glusterd_uuid))) { + gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_UUID_NULL, + "glusterd uuid is NULL, pathinfo xattr would" + " fallback to <hostname>:<export>"); + } + + _private->disk_space_check_active = _gf_false; + _private->disk_space_full = 0; + GF_OPTION_INIT ("reserve", + _private->disk_reserve, uint32, out); + if (_private->disk_reserve) + posix_spawn_disk_space_check_thread (this); + + _private->health_check_active = _gf_false; + GF_OPTION_INIT ("health-check-interval", + _private->health_check_interval, uint32, out); + GF_OPTION_INIT ("health-check-timeout", + _private->health_check_timeout, uint32, out); + if (_private->health_check_interval) + posix_spawn_health_check_thread (this); + + pthread_mutex_init (&_private->janitor_lock, NULL); + pthread_cond_init (&_private->janitor_cond, NULL); + INIT_LIST_HEAD (&_private->janitor_fds); + + posix_spawn_janitor_thread (this); + + pthread_mutex_init (&_private->fsync_mutex, NULL); + pthread_cond_init (&_private->fsync_cond, NULL); + INIT_LIST_HEAD (&_private->fsyncs); + + ret = gf_thread_create (&_private->fsyncer, NULL, posix_fsyncer, this, + "posixfsy"); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_FSYNCER_THREAD_CREATE_FAILED, + "fsyncer thread creation failed"); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out); + + if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Unknown mode string: %s", batch_fsync_mode_str); + goto out; + } + + GF_OPTION_INIT ("gfid2path", _private->gfid2path, bool, out); + + GF_OPTION_INIT ("gfid2path-separator", gfid2path_sep, str, out); + if (set_gfid2path_separator (_private, gfid2path_sep) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Length of separator exceeds 7: %s", gfid2path_sep); + goto out; + } + +#ifdef GF_DARWIN_HOST_OS + + char *xattr_user_namespace_mode_str = NULL; + + GF_OPTION_INIT ("xattr-user-namespace-mode", + xattr_user_namespace_mode_str, str, out); + + if (set_xattr_user_namespace_mode (_private, + xattr_user_namespace_mode_str) != 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, + "Unknown xattr user namespace mode string: %s", + xattr_user_namespace_mode_str); + goto out; + } +#endif + + GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, + uint32, out); + GF_OPTION_INIT ("force-create-mode", force_create, int32, out); + _private->force_create_mode = force_create; + + GF_OPTION_INIT ("force-directory-mode", force_directory, int32, out); + _private->force_directory_mode = force_directory; + + GF_OPTION_INIT ("create-mask", + create_mask, int32, out); + _private->create_mask = create_mask; + + GF_OPTION_INIT ("create-directory-mask", + create_directory_mask, int32, out); + _private->create_directory_mask = create_directory_mask; +out: + if (ret) { + if (_private) { + GF_FREE (_private->base_path); + + GF_FREE (_private->hostname); + + GF_FREE (_private->trash_path); + + GF_FREE (_private); + } + + this->private = NULL; + } + return ret; +} + +void +posix_fini (xlator_t *this) +{ + struct posix_private *priv = this->private; + if (!priv) + return; + this->private = NULL; + /*unlock brick dir*/ + if (priv->mount_lock) + (void) sys_closedir (priv->mount_lock); + + GF_FREE (priv->base_path); + GF_FREE (priv->hostname); + GF_FREE (priv->trash_path); + GF_FREE (priv); + + return; +} + +struct volume_options options[] = { + { .key = {"o-direct"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"directory"}, + .type = GF_OPTION_TYPE_PATH, + .default_value = "{{brick.path}}" + }, + { .key = {"hostname"}, + .type = GF_OPTION_TYPE_ANY }, + { .key = {"export-statfs-size"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"mandate-attribute"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"background-unlink"}, + .type = GF_OPTION_TYPE_BOOL }, + { .key = {"janitor-sleep-duration"}, + .type = GF_OPTION_TYPE_INT, + .min = 1, + .validate = GF_OPT_VALIDATE_MIN, + .default_value = "10", + .description = "Interval (in seconds) between times the internal " + "'landfill' directory is emptied." + }, + { .key = {"volume-id"}, + .type = GF_OPTION_TYPE_ANY, + .default_value = "{{brick.volumeid}}" + }, + { .key = {"glusterd-uuid"}, + .type = GF_OPTION_TYPE_STR }, + { + .key = {"linux-aio"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Support for native Linux AIO", + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { + .key = {"brick-uid"}, + .type = GF_OPTION_TYPE_INT, + .min = -1, + .validate = GF_OPT_VALIDATE_MIN, + .default_value = "-1", + .description = "Support for setting uid of brick's owner", + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { + .key = {"brick-gid"}, + .type = GF_OPTION_TYPE_INT, + .min = -1, + .validate = GF_OPT_VALIDATE_MIN, + .default_value = "-1", + .description = "Support for setting gid of brick's owner", + .op_version = {1}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { .key = {"node-uuid-pathinfo"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "return glusterd's node-uuid in pathinfo xattr" + " string instead of hostname", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { + .key = {"health-check-interval"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "30", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Interval in seconds for a filesystem health check, " + "set to 0 to disable", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { + .key = {"health-check-timeout"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "10", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Interval in seconds to wait aio_write finish for health check, " + "set to 0 to disable", + .op_version = {GD_OP_VERSION_4_0_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { + .key = {"reserve"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "1", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Percentage of disk space to be reserved." + " Set to 0 to disable", + .op_version = {GD_OP_VERSION_3_13_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { .key = {"batch-fsync-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "reverse-fsync", + .description = "Possible values:\n" + "\t- syncfs: Perform one syncfs() on behalf oa batch" + "of fsyncs.\n" + "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch" + " of fsyncs and one fsync() per batch.\n" + "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch" + " of fsyncs and fsync() each file in the batch in reverse order.\n" + " in reverse order.\n" + "\t- reverse-fsync: Perform fsync() of each file in the batch in" + " reverse order.", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { .key = {"batch-fsync-delay-usec"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "0", + .description = "Num of usecs to wait for aggregating fsync" + " requests", + .op_version = {3}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { .key = {"update-link-count-parent"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Enable placeholders for gfid to path conversion", + .op_version = {GD_OP_VERSION_3_6_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, + { .key = {"gfid2path"}, + .type = GF_OPTION_TYPE_BOOL, +#ifdef __NetBSD__ + /* + * NetBSD storage of extended attributes for UFS1 badly + * scales when the list of extended attributes names rises. + * This option can add as many extended attributes names + * as we have files, hence we keep it disabled for performance + * sake. + */ + .default_value = "off", +#else + .default_value = "on", +#endif + .description = "Enable logging metadata for gfid to path conversion", + .op_version = {GD_OP_VERSION_3_12_0}, + .flags = OPT_FLAG_SETTABLE + }, + { .key = {"gfid2path-separator"}, + .type = GF_OPTION_TYPE_STR, + .default_value = ":", + .description = "Path separator for glusterfs.gfidtopath virt xattr", + .op_version = {GD_OP_VERSION_3_12_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, +#if GF_DARWIN_HOST_OS + { .key = {"xattr-user-namespace-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "none", + .description = "Option to control XATTR user namespace on the raw filesystem: " + "\t- None: Will use the user namespace, so files will be exchangable with Linux.\n" + " The raw filesystem will not be compatible with OS X Finder.\n" + "\t- Strip: Will strip the user namespace before setting. The raw filesystem will work in OS X.\n", + .op_version = {GD_OP_VERSION_3_6_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC + }, +#endif + { .key = {"shared-brick-count"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "1", + .description = "Number of bricks sharing the same backend export." + " Useful for displaying the proper usable size through statvfs() " + "call (df command)", + }, + { .key = {"force-create-mode"}, + .type = GF_OPTION_TYPE_INT, + .min = 0000, + .max = 0777, + .default_value = "0000", + .validate = GF_OPT_VALIDATE_MIN, + .validate = GF_OPT_VALIDATE_MAX, + .description = "Mode bit permission that will always be set on a file." + }, + { .key = {"force-directory-mode"}, + .type = GF_OPTION_TYPE_INT, + .min = 0000, + .max = 0777, + .default_value = "0000", + .validate = GF_OPT_VALIDATE_MIN, + .validate = GF_OPT_VALIDATE_MAX, + .description = "Mode bit permission that will be always set on directory" + }, + { .key = {"create-mask"}, + .type = GF_OPTION_TYPE_INT, + .min = 0000, + .max = 0777, + .default_value = "0777", + .validate = GF_OPT_VALIDATE_MIN, + .validate = GF_OPT_VALIDATE_MAX, + .description = "Any bit not set here will be removed from the" + "modes set on a file when it is created" + }, + { .key = {"create-directory-mask"}, + .type = GF_OPTION_TYPE_INT, + .min = 0000, + .max = 0777, + .default_value = "0777", + .validate = GF_OPT_VALIDATE_MIN, + .validate = GF_OPT_VALIDATE_MAX, + .description = "Any bit not set here will be removed from the" + "modes set on a directory when it is created" + }, + { .key = {NULL} } +}; diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c new file mode 100644 index 00000000000..7a83eb3dfba --- /dev/null +++ b/xlators/storage/posix/src/posix-entry-ops.c @@ -0,0 +1,2125 @@ +/* + Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#define __XOPEN_SOURCE 500 + +/* for SEEK_HOLE and SEEK_DATA */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <openssl/md5.h> +#include <stdint.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <errno.h> +#include <libgen.h> +#include <pthread.h> +#include <ftw.h> +#include <sys/stat.h> +#include <signal.h> +#include <sys/uio.h> +#include <unistd.h> +#include <ftw.h> + +#ifndef GF_BSD_HOST_OS +#include <alloca.h> +#endif /* GF_BSD_HOST_OS */ + +#ifdef HAVE_LINKAT +#include <fcntl.h> +#endif /* HAVE_LINKAT */ + +#include "glusterfs.h" +#include "checksum.h" +#include "dict.h" +#include "logging.h" +#include "posix.h" +#include "posix-handle.h" +#include "xlator.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" +#include "byte-order.h" +#include "syscall.h" +#include "statedump.h" +#include "locking.h" +#include "timer.h" +#include "glusterfs3-xdr.h" +#include "hashfn.h" +#include "posix-aio.h" +#include "glusterfs-acl.h" +#include "posix-messages.h" +#include "events.h" +#include "posix-gfid-path.h" +#include "compat-uuid.h" + +extern char *marker_xattrs[]; +#define ALIGN_SIZE 4096 + +#undef HAVE_SET_FSID +#ifdef HAVE_SET_FSID + +#define DECLARE_OLD_FS_ID_VAR uid_t old_fsuid; gid_t old_fsgid; + +#define SET_FS_ID(uid, gid) do { \ + old_fsuid = setfsuid (uid); \ + old_fsgid = setfsgid (gid); \ + } while (0) + +#define SET_TO_OLD_FS_ID() do { \ + setfsuid (old_fsuid); \ + setfsgid (old_fsgid); \ + } while (0) + +#else + +#define DECLARE_OLD_FS_ID_VAR +#define SET_FS_ID(uid, gid) +#define SET_TO_OLD_FS_ID() + +#endif + +/* Setting microseconds or nanoseconds depending on what's supported: + The passed in `tv` can be + struct timespec + if supported (better, because it supports nanosecond resolution) or + struct timeval + otherwise. */ +#if HAVE_UTIMENSAT +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ + tv.tv_nsec = nanosecs +#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \ + (sys_utimensat (AT_FDCWD, path, tv, AT_SYMLINK_NOFOLLOW)) +#else +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ + tv.tv_usec = nanosecs / 1000 +#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \ + (lutimes (path, tv)) +#endif + +gf_boolean_t +posix_symlinks_match (xlator_t *this, loc_t *loc, uuid_t gfid) +{ + struct posix_private *priv = NULL; + char linkname_actual[PATH_MAX] = {0,}; + char linkname_expected[PATH_MAX] = {0}; + char *dir_handle = NULL; + ssize_t len = 0; + size_t handle_size = 0; + gf_boolean_t ret = _gf_false; + + priv = this->private; + handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); + dir_handle = alloca0 (handle_size); + + snprintf (linkname_expected, handle_size, "../../%02x/%02x/%s/%s", + loc->pargfid[0], loc->pargfid[1], uuid_utoa (loc->pargfid), + loc->name); + + MAKE_HANDLE_GFID_PATH (dir_handle, this, gfid, NULL); + len = sys_readlink (dir_handle, linkname_actual, PATH_MAX); + if (len < 0) + goto out; + linkname_actual[len] = '\0'; + + if (!strncmp (linkname_actual, linkname_expected, handle_size)) + ret = _gf_true; + +out: + return ret; +} + +dict_t* +posix_dict_set_nlink (dict_t *req, dict_t *res, int32_t nlink) +{ + int ret = -1; + + if (req == NULL || !dict_get (req, GF_REQUEST_LINK_COUNT_XDATA)) + goto out; + + if (res == NULL) + res = dict_new (); + if (res == NULL) + goto out; + + ret = dict_set_uint32 (res, GF_RESPONSE_LINK_COUNT_XDATA, nlink); + if (ret == -1) + gf_msg ("posix", GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, + "Failed to set GF_RESPONSE_LINK_COUNT_XDATA"); +out: + return res; +} + +/* Regular fops */ + +int32_t +posix_lookup (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xdata) +{ + struct iatt buf = {0, }; + int32_t op_ret = -1; + int32_t entry_ret = 0; + int32_t op_errno = 0; + dict_t * xattr = NULL; + char * real_path = NULL; + char * par_path = NULL; + struct iatt postparent = {0,}; + int32_t gfidless = 0; + char *pgfid_xattr_key = NULL; + int32_t nlink_samepgfid = 0; + struct posix_private *priv = NULL; + posix_inode_ctx_t *ctx = NULL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + priv = this->private; + + /* The Hidden directory should be for housekeeping purpose and it + should not get any gfid on it */ + if (__is_root_gfid (loc->pargfid) && loc->name + && (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { + gf_msg (this->name, GF_LOG_WARNING, EPERM, + P_MSG_LOOKUP_NOT_PERMITTED, "Lookup issued on %s," + " which is not permitted", GF_HIDDEN_PATH); + op_errno = EPERM; + op_ret = -1; + goto out; + } + + op_ret = dict_get_int32 (xdata, GF_GFIDLESS_LOOKUP, &gfidless); + op_ret = -1; + if (gf_uuid_is_null (loc->pargfid) || (loc->name == NULL)) { + /* nameless lookup */ + MAKE_INODE_HANDLE (real_path, this, loc, &buf); + } else { + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &buf); + + if (gf_uuid_is_null (loc->inode->gfid)) { + op_ret = posix_gfid_heal (this, real_path, loc, xdata); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + goto out; + } + MAKE_ENTRY_HANDLE (real_path, par_path, this, + loc, &buf); + } + } + + op_errno = errno; + + if (op_ret == -1) { + if (op_errno != ENOENT) { + gf_msg (this->name, GF_LOG_WARNING, op_errno, + P_MSG_LSTAT_FAILED, + "lstat on %s failed", + real_path ? real_path : "null"); + } + + entry_ret = -1; + goto parent; + } + + if (xdata && (op_ret == 0)) { + xattr = posix_xattr_fill (this, real_path, loc, NULL, -1, xdata, + &buf); + } + + if (priv->update_pgfid_nlinks) { + if (!gf_uuid_is_null (loc->pargfid) && !IA_ISDIR (buf.ia_type)) { + MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, + PGFID_XATTR_KEY_PREFIX, + loc->pargfid); + + op_ret = posix_inode_ctx_get_all (loc->inode, this, + &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + + pthread_mutex_lock (&ctx->pgfid_lock); + { + SET_PGFID_XATTR_IF_ABSENT (real_path, + pgfid_xattr_key, + nlink_samepgfid, + XATTR_CREATE, op_ret, + this, unlock); + } +unlock: + pthread_mutex_unlock (&ctx->pgfid_lock); + } + } + +parent: + if (par_path) { + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_LSTAT_FAILED, "post-operation lstat on" + " parent %s failed", par_path); + if (op_errno == ENOENT) + /* If parent directory is missing in a lookup, + errno should be ESTALE (bad handle) and not + ENOENT (missing entry) + */ + op_errno = ESTALE; + goto out; + } + } + + op_ret = entry_ret; +out: + if (!op_ret && !gfidless && gf_uuid_is_null (buf.ia_gfid)) { + gf_msg (this->name, GF_LOG_ERROR, ENODATA, P_MSG_NULL_GFID, + "buf->ia_gfid is null for " + "%s", (real_path) ? real_path: ""); + op_ret = -1; + op_errno = ENODATA; + } + + if (op_ret == 0) + op_errno = 0; + STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, + (loc)?loc->inode:NULL, &buf, xattr, &postparent); + + if (xattr) + dict_unref (xattr); + + return 0; +} + +int +posix_mknod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata) +{ + int tmp_fd = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = 0; + char *par_path = 0; + struct iatt stbuf = { 0, }; + struct posix_private *priv = NULL; + gid_t gid = 0; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + void * uuid_req = NULL; + int32_t nlink_samepgfid = 0; + char *pgfid_xattr_key = NULL; + gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; + gf_boolean_t linked = _gf_false; + gf_loglevel_t level = GF_LOG_NONE; + mode_t mode_bit = 0; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno, + out); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL); + + mode_bit = (priv->create_mask & mode) | priv->force_create_mode; + mode = posix_override_umask (mode, mode_bit); + + gid = frame->root->gid; + + SET_FS_ID (frame->root->uid, gid); + DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); + if (!real_path || !par_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent of %s failed", + real_path); + goto out; + } + + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + } + + /* Check if the 'gfid' already exists, because this mknod may be an + internal call from distribute for creating 'linkfile', and that + linkfile may be for a hardlinked file */ + if (dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { + dict_del (xdata, GLUSTERFS_INTERNAL_FOP_KEY); + op_ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); + if (op_ret) { + gf_msg_debug (this->name, 0, "failed to get the gfid from " + "dict for %s", loc->path); + goto real_op; + } + op_ret = posix_create_link_if_gfid_exists (this, uuid_req, + real_path, + loc->inode->table); + if (!op_ret) { + linked = _gf_true; + goto post_op; + } + } + +real_op: +#ifdef __NetBSD__ + if (S_ISFIFO(mode)) + op_ret = mkfifo (real_path, mode); + else +#endif /* __NetBSD__ */ + op_ret = sys_mknod (real_path, mode, dev); + + if (op_ret == -1) { + op_errno = errno; + if ((op_errno == EINVAL) && S_ISREG (mode)) { + /* Over Darwin, mknod with (S_IFREG|mode) + doesn't work */ + tmp_fd = sys_creat (real_path, mode); + if (tmp_fd == -1) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_CREATE_FAILED, "create failed on" + "%s", real_path); + goto out; + } + sys_close (tmp_fd); + } else { + if (op_errno == EEXIST) + level = GF_LOG_DEBUG; + else + level = GF_LOG_ERROR; + gf_msg (this->name, level, errno, P_MSG_MKNOD_FAILED, + "mknod on %s failed", real_path); + goto out; + } + } + + entry_created = _gf_true; + +#ifndef HAVE_SET_FSID + op_ret = sys_lchown (real_path, frame->root->uid, gid); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED, + "lchown on %s failed", real_path); + goto out; + } +#endif + +post_op: + op_ret = posix_acl_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_ACL_FAILED, + "setting ACLs on %s failed", real_path); + } + + if (priv->update_pgfid_nlinks) { + MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, + loc->pargfid); + nlink_samepgfid = 1; + + SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid, + XATTR_CREATE, op_ret, this, ignore); + } + + if (priv->gfid2path) { + posix_set_gfid2path_xattr (this, real_path, loc->pargfid, + loc->name); + } + +ignore: + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); + if (op_ret) { + if (errno != EEXIST) + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_XATTR_FAILED, + "setting xattrs on %s failed", real_path); + else + gf_msg_debug (this->name, 0, + "setting xattrs on %s failed", real_path); + } + + if (!linked) { + op_ret = posix_gfid_set (this, real_path, loc, xdata); + if (op_ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_GFID_FAILED, + "setting gfid on %s failed", real_path); + } else { + gfid_set = _gf_true; + } + } + + op_ret = posix_pstat (this, NULL, real_path, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_MKNOD_FAILED, + "mknod on %s failed", real_path); + goto out; + } + + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent %s failed", + par_path); + goto out; + } + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); + + if (op_ret < 0) { + if (entry_created) { + if (S_ISREG (mode)) + sys_unlink (real_path); + else + sys_rmdir (real_path); + } + + if (gfid_set) + posix_gfid_unset (this, xdata); + } + + return 0; +} + +int +posix_mkdir (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL, *gfid_path = NULL; + char *par_path = NULL, *xattr_name = NULL; + struct iatt stbuf = {0, }; + struct posix_private *priv = NULL; + gid_t gid = 0; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; + void *uuid_req = NULL; + ssize_t size = 0; + dict_t *xdata_rsp = NULL; + void *disk_xattr = NULL; + data_t *arg_data = NULL; + char pgfid[GF_UUID_BUF_SIZE] = {0}; + char value_buf[4096] = {0,}; + gf_boolean_t have_val = _gf_false; + mode_t mode_bit = 0; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + /* The Hidden directory should be for housekeeping purpose and it + should not get created from a user request */ + if (__is_root_gfid (loc->pargfid) && + (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { + gf_msg (this->name, GF_LOG_WARNING, EPERM, + P_MSG_MKDIR_NOT_PERMITTED, "mkdir issued on %s, which" + "is not permitted", GF_HIDDEN_PATH); + op_errno = EPERM; + op_ret = -1; + goto out; + } + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno, + out); + DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); + + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL); + if (!real_path || !par_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + if (loc->parent) + gf_uuid_unparse (loc->parent->gfid, pgfid); + else + gf_uuid_unparse (loc->pargfid, pgfid); + + gid = frame->root->gid; + + op_ret = posix_pstat (this, NULL, real_path, &stbuf); + + SET_FS_ID (frame->root->uid, gid); + + mode_bit = (priv->create_directory_mask & mode) + | priv->force_directory_mode; + mode = posix_override_umask (mode, mode_bit); + + if (xdata) { + op_ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); + if (!op_ret && !gf_uuid_compare (stbuf.ia_gfid, uuid_req)) { + op_ret = -1; + op_errno = EEXIST; + goto out; + } + } + + if (uuid_req && !gf_uuid_is_null (uuid_req)) { + op_ret = posix_istat (this, uuid_req, NULL, &stbuf); + if ((op_ret == 0) && IA_ISDIR (stbuf.ia_type)) { + size = posix_handle_path (this, uuid_req, NULL, NULL, + 0); + if (size > 0) + gfid_path = alloca (size); + + if (gfid_path) + posix_handle_path (this, uuid_req, NULL, + gfid_path, size); + + if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) { + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_DIR_OF_SAME_ID, "mkdir (%s): " + "gfid (%s) is already associated with " + "directory (%s). Hence, both " + "directories will share same gfid and " + "this can lead to inconsistencies.", + loc->path, uuid_utoa (uuid_req), + gfid_path ? gfid_path : "<NULL>"); + + gf_event (EVENT_POSIX_SAME_GFID, "gfid=%s;" + "path=%s;newpath=%s;brick=%s:%s", + uuid_utoa (uuid_req), + gfid_path ? gfid_path : "<NULL>", + loc->path, priv->hostname, + priv->base_path); + } + if (!posix_symlinks_match (this, loc, uuid_req)) + /* For afr selfheal of dir renames, we need to + * remove the old symlink in order for + * posix_gfid_set to set the symlink to the + * new dir.*/ + posix_handle_unset (this, stbuf.ia_gfid, NULL); + } + } else if (!uuid_req && frame->root->pid != GF_SERVER_PID_TRASH) { + op_ret = -1; + op_errno = EPERM; + gf_msg_callingfn (this->name, GF_LOG_WARNING, op_errno, + P_MSG_NULL_GFID, "mkdir (%s): is issued without " + "gfid-req %p", loc->path, xdata); + goto out; + } + + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent %s failed", + par_path); + goto out; + } + + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + mode |= S_ISGID; + } + + op_ret = dict_get_str (xdata, GF_PREOP_PARENT_KEY, &xattr_name); + if (xattr_name != NULL) { + arg_data = dict_get (xdata, xattr_name); + if (arg_data) { + size = sys_lgetxattr (par_path, xattr_name, value_buf, + sizeof(value_buf) - 1); + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg (this->name, GF_LOG_INFO, errno, + P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): getxattr on key " + "(%s) path (%s) failed due to " + " buffer overflow", pgfid, + loc->name, xattr_name, + par_path); + size = sys_lgetxattr (par_path, + xattr_name, NULL, + 0); + } + if (size < 0) { + op_ret = -1; + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): getxattr on key (%s)" + " path (%s) failed ", pgfid, + loc->name, xattr_name, + par_path); + goto out; + } + } + disk_xattr = alloca (size); + if (disk_xattr == NULL) { + op_ret = -1; + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): alloca failed during" + " preop of mkdir (%s)", pgfid, + loc->name, real_path); + goto out; + } + if (have_val) { + memcpy (disk_xattr, value_buf, size); + } else { + size = sys_lgetxattr (par_path, xattr_name, + disk_xattr, size); + if (size < 0) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): getxattr on " + " key (%s) path (%s) failed " + "(%s)", pgfid, loc->name, + xattr_name, par_path, + strerror (errno)); + goto out; + } + } + if ((arg_data->len != size) + || (memcmp (arg_data->data, disk_xattr, size))) { + gf_msg (this->name, GF_LOG_INFO, EIO, + P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): failing preop of " + "mkdir (%s) as on-disk" + " xattr value differs from argument " + "value for key %s", pgfid, loc->name, + real_path, xattr_name); + op_ret = -1; + op_errno = EIO; + + xdata_rsp = dict_new (); + if (xdata_rsp == NULL) { + gf_msg (this->name, GF_LOG_ERROR, + ENOMEM, + P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): " + "dict allocation failed", pgfid, + loc->name); + op_errno = ENOMEM; + goto out; + } + + op_errno = dict_set_int8 (xdata_rsp, + GF_PREOP_CHECK_FAILED, 1); + goto out; + } + + dict_del (xdata, xattr_name); + } + + dict_del (xdata, GF_PREOP_PARENT_KEY); + } + + op_ret = sys_mkdir (real_path, mode); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_MKDIR_FAILED, + "mkdir of %s failed", real_path); + goto out; + } + + entry_created = _gf_true; + +#ifndef HAVE_SET_FSID + op_ret = sys_chown (real_path, frame->root->uid, gid); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_CHOWN_FAILED, + "chown on %s failed", real_path); + goto out; + } +#endif + op_ret = posix_acl_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_ACL_FAILED, + "setting ACLs on %s failed ", real_path); + } + + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "setting xattrs on %s failed", real_path); + } + + op_ret = posix_gfid_set (this, real_path, loc, xdata); + if (op_ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_GFID_FAILED, + "setting gfid on %s failed", real_path); + } else { + gfid_set = _gf_true; + } + + op_ret = posix_pstat (this, NULL, real_path, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat on %s failed", real_path); + goto out; + } + + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent of %s failed", + real_path); + goto out; + } + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, xdata_rsp); + + if (op_ret < 0) { + if (entry_created) + sys_rmdir (real_path); + + if (gfid_set) + posix_gfid_unset (this, xdata); + } + + if (xdata_rsp) + dict_unref (xdata_rsp); + + return 0; +} + +int +posix_add_unlink_to_ctx (inode_t *inode, xlator_t *this, char *unlink_path) +{ + uint64_t ctx = GF_UNLINK_FALSE; + int ret = 0; + + if (!unlink_path) { + gf_msg (this->name, GF_LOG_ERROR, ENOMEM, + P_MSG_UNLINK_FAILED, + "Creation of unlink entry failed for gfid: %s", + unlink_path); + ret = -1; + goto out; + } + + ctx = GF_UNLINK_TRUE; + ret = posix_inode_ctx_set_unlink_flag (inode, this, ctx); + if (ret < 0) { + goto out; + } + +out: + return ret; +} + +int32_t +posix_move_gfid_to_unlink (xlator_t *this, uuid_t gfid, loc_t *loc) +{ + char *unlink_path = NULL; + char *gfid_path = NULL; + int ret = 0; + struct posix_private *priv_posix = NULL; + + priv_posix = (struct posix_private *) this->private; + + MAKE_HANDLE_GFID_PATH (gfid_path, this, gfid, NULL); + + POSIX_GET_FILE_UNLINK_PATH (priv_posix->base_path, + loc->inode->gfid, unlink_path); + if (!unlink_path) { + ret = -1; + goto out; + } + gf_msg_debug (this->name, 0, + "Moving gfid: %s to unlink_path : %s", + gfid_path, unlink_path); + ret = sys_rename (gfid_path, unlink_path); + if (ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_UNLINK_FAILED, + "Creation of unlink entry failed for gfid: %s", + unlink_path); + goto out; + } + ret = posix_add_unlink_to_ctx (loc->inode, this, unlink_path); + if (ret < 0) + goto out; + +out: + return ret; +} + +int32_t +posix_unlink_gfid_handle_and_entry (xlator_t *this, const char *real_path, + struct iatt *stbuf, int32_t *op_errno, + loc_t *loc, gf_boolean_t get_link_count, + dict_t *rsp_dict) +{ + int32_t ret = 0; + struct iatt prebuf = {0,}; + gf_boolean_t locked = _gf_false; + + /* Unlink the gfid_handle_first */ + if (stbuf && stbuf->ia_nlink == 1) { + + LOCK (&loc->inode->lock); + + if (loc->inode->fd_count == 0) { + UNLOCK (&loc->inode->lock); + ret = posix_handle_unset (this, stbuf->ia_gfid, NULL); + } else { + UNLOCK (&loc->inode->lock); + ret = posix_move_gfid_to_unlink (this, stbuf->ia_gfid, + loc); + } + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_UNLINK_FAILED, "unlink of gfid handle " + "failed for path:%s with gfid %s", + real_path, uuid_utoa (stbuf->ia_gfid)); + } + } + + if (get_link_count) { + LOCK (&loc->inode->lock); + locked = _gf_true; + ret = posix_pstat (this, loc->gfid, real_path, &prebuf); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_LSTAT_FAILED, "lstat on %s failed", + real_path); + goto err; + } + } + + /* Unlink the actual file */ + ret = sys_unlink (real_path); + if (ret == -1) { + if (op_errno) + *op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_UNLINK_FAILED, + "unlink of %s failed", real_path); + goto err; + } + + if (locked) { + UNLOCK (&loc->inode->lock); + locked = _gf_false; + } + + ret = dict_set_uint32 (rsp_dict, GET_LINK_COUNT, prebuf.ia_nlink); + if (ret) + gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, + "failed to set "GET_LINK_COUNT" for %s", real_path); + + return 0; + +err: + if (locked) { + UNLOCK (&loc->inode->lock); + locked = _gf_false; + } + return -1; +} + +gf_boolean_t +posix_skip_non_linkto_unlink (dict_t *xdata, loc_t *loc, char *key, + const char *linkto_xattr, struct iatt *stbuf, + const char *real_path) +{ + gf_boolean_t skip_unlink = _gf_false; + gf_boolean_t is_dht_linkto_file = _gf_false; + int unlink_if_linkto = 0; + ssize_t xattr_size = -1; + int op_ret = -1; + + op_ret = dict_get_int32 (xdata, key, + &unlink_if_linkto); + + if (!op_ret && unlink_if_linkto) { + + is_dht_linkto_file = IS_DHT_LINKFILE_MODE (stbuf); + if (!is_dht_linkto_file) + return _gf_true; + + LOCK (&loc->inode->lock); + + xattr_size = sys_lgetxattr (real_path, linkto_xattr, NULL, 0); + + if (xattr_size <= 0) + skip_unlink = _gf_true; + + UNLOCK (&loc->inode->lock); + + gf_msg ("posix", GF_LOG_INFO, 0, P_MSG_XATTR_STATUS, + "linkto_xattr status: %"PRIu32" for %s", skip_unlink, + real_path); + } + return skip_unlink; + +} + +int32_t +posix_unlink (call_frame_t *frame, xlator_t *this, + loc_t *loc, int xflag, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + char *par_path = NULL; + int32_t fd = -1; + struct iatt stbuf = {0,}; + struct iatt postbuf = {0,}; + struct posix_private *priv = NULL; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + char *pgfid_xattr_key = NULL; + int32_t nlink_samepgfid = 0; + int32_t check_open_fd = 0; + int32_t skip_unlink = 0; + int32_t fdstat_requested = 0; + dict_t *unwind_dict = NULL; + void *uuid = NULL; + char uuid_str[GF_UUID_BUF_SIZE] = {0}; + char gfid_str[GF_UUID_BUF_SIZE] = {0}; + gf_boolean_t get_link_count = _gf_false; + posix_inode_ctx_t *ctx = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); + if (!real_path || !par_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent %s failed", + par_path); + goto out; + } + + priv = this->private; + + op_ret = dict_get_ptr (xdata, TIER_LINKFILE_GFID, &uuid); + + if (!op_ret && gf_uuid_compare (uuid, stbuf.ia_gfid)) { + op_errno = ENOENT; + op_ret = -1; + gf_uuid_unparse (uuid, uuid_str); + gf_uuid_unparse (stbuf.ia_gfid, gfid_str); + gf_msg_debug (this->name, op_errno, "Mismatch in gfid for path " + "%s. Aborting the unlink. loc->gfid = %s, " + "stbuf->ia_gfid = %s", real_path, + uuid_str, gfid_str); + goto out; + } + + op_ret = dict_get_int32 (xdata, DHT_SKIP_OPEN_FD_UNLINK, + &check_open_fd); + + if (!op_ret && check_open_fd) { + + LOCK (&loc->inode->lock); + + if (loc->inode->fd_count) { + skip_unlink = 1; + } + + UNLOCK (&loc->inode->lock); + + gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_KEY_STATUS_INFO, + "open-fd-key-status: %"PRIu32" for %s", skip_unlink, + real_path); + + if (skip_unlink) { + op_ret = -1; + op_errno = EBUSY; + goto out; + } + } + /* + * If either of the function return true, skip_unlink. + * If first first function itself return true, + * we don't need to call second function, skip unlink. + */ + skip_unlink = posix_skip_non_linkto_unlink (xdata, loc, + DHT_SKIP_NON_LINKTO_UNLINK, + DHT_LINKTO, &stbuf, + real_path); + skip_unlink = skip_unlink || posix_skip_non_linkto_unlink (xdata, loc, + TIER_SKIP_NON_LINKTO_UNLINK, + TIER_LINKTO, &stbuf, + real_path); + if (skip_unlink) { + op_ret = -1; + op_errno = EBUSY; + goto out; + } + + if (IA_ISREG (loc->inode->ia_type) && + xdata && dict_get (xdata, DHT_IATT_IN_XDATA_KEY)) { + fdstat_requested = 1; + } + + if (fdstat_requested || + (priv->background_unlink && IA_ISREG (loc->inode->ia_type))) { + fd = sys_open (real_path, O_RDONLY, 0); + if (fd == -1) { + op_ret = -1; + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_OPEN_FAILED, + "open of %s failed", real_path); + goto out; + } + } + + if (priv->update_pgfid_nlinks && (stbuf.ia_nlink > 1)) { + MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, + loc->pargfid); + op_ret = posix_inode_ctx_get_all (loc->inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + pthread_mutex_lock (&ctx->pgfid_lock); + { + UNLINK_MODIFY_PGFID_XATTR (real_path, pgfid_xattr_key, + nlink_samepgfid, 0, op_ret, + this, unlock); + } + unlock: + pthread_mutex_unlock (&ctx->pgfid_lock); + + if (op_ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_XATTR_FAILED, "modification of " + "parent gfid xattr failed (path:%s gfid:%s)", + real_path, uuid_utoa (loc->inode->gfid)); + if (op_errno != ENOATTR) + /* Allow unlink if pgfid xattr is not set. */ + goto out; + } + } + + if (priv->gfid2path && (stbuf.ia_nlink > 1)) { + op_ret = posix_remove_gfid2path_xattr (this, real_path, + loc->pargfid, + loc->name); + if (op_ret < 0) { + /* Allow unlink if pgfid xattr is not set. */ + if (errno != ENOATTR) + goto out; + } + } + + unwind_dict = dict_new (); + if (!unwind_dict) { + op_errno = -ENOMEM; + op_ret = -1; + goto out; + } + + if (xdata && dict_get (xdata, GET_LINK_COUNT)) + get_link_count = _gf_true; + op_ret = posix_unlink_gfid_handle_and_entry (this, real_path, &stbuf, + &op_errno, loc, + get_link_count, + unwind_dict); + if (op_ret == -1) { + goto out; + } + + if (fdstat_requested) { + op_ret = posix_fdstat (this, fd, &postbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_FSTAT_FAILED, "post operation " + "fstat failed on fd=%d", fd); + goto out; + } + op_ret = posix_set_iatt_in_dict (unwind_dict, &postbuf); + } + + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent %s failed", + par_path); + goto out; + } + + unwind_dict = posix_dict_set_nlink (xdata, unwind_dict, stbuf.ia_nlink); + op_ret = 0; +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, + &preparent, &postparent, unwind_dict); + + if (fd != -1) { + sys_close (fd); + } + + /* unref unwind_dict*/ + if (unwind_dict) { + dict_unref (unwind_dict); + } + + return 0; +} + + +int +posix_rmdir (call_frame_t *frame, xlator_t *this, + loc_t *loc, int flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + char *par_path = NULL; + char *gfid_str = NULL; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + struct iatt stbuf = {0,}; + struct posix_private *priv = NULL; + char tmp_path[PATH_MAX] = {0,}; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + + /* The Hidden directory should be for housekeeping purpose and it + should not get deleted from inside process */ + if (__is_root_gfid (loc->pargfid) && + (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { + gf_msg (this->name, GF_LOG_WARNING, EPERM, + P_MSG_RMDIR_NOT_PERMITTED, "rmdir issued on %s, which" + "is not permitted", GF_HIDDEN_PATH); + op_errno = EPERM; + op_ret = -1; + goto out; + } + + priv = this->private; + + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); + if (!real_path || !par_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent %s failed", + par_path); + goto out; + } + + if (flags) { + gfid_str = uuid_utoa (stbuf.ia_gfid); + + op_ret = sys_mkdir (priv->trash_path, 0755); + if (errno != EEXIST && op_ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_MKDIR_FAILED, + "mkdir of %s failed", priv->trash_path); + } else { + (void) snprintf (tmp_path, sizeof(tmp_path), "%s/%s", + priv->trash_path, gfid_str); + op_ret = sys_rename (real_path, tmp_path); + pthread_cond_signal (&priv->janitor_cond); + } + } else { + op_ret = sys_rmdir (real_path); + } + op_errno = errno; + + if (op_ret == 0) { + if (posix_symlinks_match (this, loc, stbuf.ia_gfid)) + posix_handle_unset (this, stbuf.ia_gfid, NULL); + } + + if (op_errno == EEXIST) + /* Solaris sets errno = EEXIST instead of ENOTEMPTY */ + op_errno = ENOTEMPTY; + + /* No need to log a common error as ENOTEMPTY */ + if (op_ret == -1 && op_errno != ENOTEMPTY) { + gf_msg (this->name, GF_LOG_ERROR, op_errno, P_MSG_RMDIR_FAILED, + "rmdir of %s failed", real_path); + } + + if (op_ret == -1) { + if (op_errno == ENOTEMPTY) { + gf_msg_debug (this->name, 0, "%s on %s failed", (flags) + ? "rename" : "rmdir", real_path); + } else { + gf_msg (this->name, GF_LOG_ERROR, op_errno, + P_MSG_DIR_OPERATION_FAILED, "%s on %s failed", + (flags) ? "rename" : "rmdir", real_path); + } + goto out; + } + + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent of %s failed", + par_path); + goto out; + } + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, + &preparent, &postparent, NULL); + + return 0; +} + + +int +posix_symlink (call_frame_t *frame, xlator_t *this, + const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = 0; + char * par_path = 0; + struct iatt stbuf = { 0, }; + struct posix_private *priv = NULL; + gid_t gid = 0; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + char *pgfid_xattr_key = NULL; + int32_t nlink_samepgfid = 0; + gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (linkname, out); + VALIDATE_OR_GOTO (loc, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno, + out); + DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); + + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); + + gid = frame->root->gid; + if (!real_path || !par_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + SET_FS_ID (frame->root->uid, gid); + + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent %s failed", + par_path); + goto out; + } + + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + } + + op_ret = sys_symlink (linkname, real_path); + + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_SYMLINK_FAILED, + "symlink of %s --> %s failed", + real_path, linkname); + goto out; + } + + entry_created = _gf_true; + +#ifndef HAVE_SET_FSID + op_ret = sys_lchown (real_path, frame->root->uid, gid); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED, + "lchown failed on %s", real_path); + goto out; + } +#endif + op_ret = posix_acl_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_ACL_FAILED, + "setting ACLs on %s failed", real_path); + } + + if (priv->update_pgfid_nlinks) { + MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, + loc->pargfid); + nlink_samepgfid = 1; + SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid, + XATTR_CREATE, op_ret, this, ignore); + } + + if (priv->gfid2path) { + posix_set_gfid2path_xattr (this, real_path, loc->pargfid, + loc->name); + } + +ignore: + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "setting xattrs on %s failed ", real_path); + } + + op_ret = posix_gfid_set (this, real_path, loc, xdata); + if (op_ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_GFID_FAILED, + "setting gfid on %s failed", real_path); + } else { + gfid_set = _gf_true; + } + + op_ret = posix_pstat (this, NULL, real_path, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat failed on %s", real_path); + goto out; + } + + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent %s failed", + par_path); + goto out; + } + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, + (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); + + if (op_ret < 0) { + if (entry_created) + sys_unlink (real_path); + + if (gfid_set) + posix_gfid_unset (this, xdata); + } + + return 0; +} + + +int +posix_rename (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_oldpath = NULL; + char *real_newpath = NULL; + char *par_oldpath = NULL; + char *par_newpath = NULL; + struct iatt stbuf = {0, }; + struct posix_private *priv = NULL; + char was_present = 1; + struct iatt preoldparent = {0, }; + struct iatt postoldparent = {0, }; + struct iatt prenewparent = {0, }; + struct iatt postnewparent = {0, }; + char olddirid[64]; + char newdirid[64]; + uuid_t victim = {0}; + int was_dir = 0; + int nlink = 0; + char *pgfid_xattr_key = NULL; + int32_t nlink_samepgfid = 0; + char *gfid_path = NULL; + dict_t *unwind_dict = NULL; + gf_boolean_t locked = _gf_false; + gf_boolean_t get_link_count = _gf_false; + posix_inode_ctx_t *ctx_old = NULL; + posix_inode_ctx_t *ctx_new = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (oldloc, out); + VALIDATE_OR_GOTO (newloc, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_ENTRY_HANDLE (real_oldpath, par_oldpath, this, oldloc, NULL); + if (!real_oldpath || !par_oldpath) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf); + if (!real_newpath || !par_newpath) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + unwind_dict = dict_new (); + if (!unwind_dict) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &preoldparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent %s failed", + par_oldpath); + goto out; + } + + op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &prenewparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent of %s failed", + par_newpath); + goto out; + } + + op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); + if ((op_ret == -1) && (errno == ENOENT)){ + was_present = 0; + } else { + gf_uuid_copy (victim, stbuf.ia_gfid); + if (IA_ISDIR (stbuf.ia_type)) + was_dir = 1; + nlink = stbuf.ia_nlink; + } + + if (was_present && IA_ISDIR(stbuf.ia_type) && !newloc->inode) { + gf_msg (this->name, GF_LOG_WARNING, EEXIST, P_MSG_DIR_FOUND, + "found directory at %s while expecting ENOENT", + real_newpath); + op_ret = -1; + op_errno = EEXIST; + goto out; + } + + if (was_present && IA_ISDIR(stbuf.ia_type) && + gf_uuid_compare (newloc->inode->gfid, stbuf.ia_gfid)) { + gf_msg (this->name, GF_LOG_WARNING, EEXIST, P_MSG_DIR_FOUND, + "found directory %s at %s while renaming %s", + uuid_utoa_r (newloc->inode->gfid, olddirid), + real_newpath, + uuid_utoa_r (stbuf.ia_gfid, newdirid)); + op_ret = -1; + op_errno = EEXIST; + goto out; + } + + op_ret = posix_inode_ctx_get_all (oldloc->inode, this, &ctx_old); + if (op_ret < 0) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + if (newloc->inode) { + op_ret = posix_inode_ctx_get_all (newloc->inode, this, &ctx_new); + if (op_ret < 0) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + } + + if (IA_ISDIR (oldloc->inode->ia_type)) + posix_handle_unset (this, oldloc->inode->gfid, NULL); + + pthread_mutex_lock (&ctx_old->pgfid_lock); + { + if (!IA_ISDIR (oldloc->inode->ia_type) + && priv->update_pgfid_nlinks) { + MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, + PGFID_XATTR_KEY_PREFIX, + oldloc->pargfid); + UNLINK_MODIFY_PGFID_XATTR (real_oldpath, + pgfid_xattr_key, + nlink_samepgfid, 0, + op_ret, + this, unlock); + } + + if ((xdata) && (dict_get (xdata, GET_LINK_COUNT)) + && (real_newpath) && (was_present)) { + pthread_mutex_lock (&ctx_new->pgfid_lock); + locked = _gf_true; + get_link_count = _gf_true; + op_ret = posix_pstat (this, newloc->gfid, real_newpath, + &stbuf); + if ((op_ret == -1) && (errno != ENOENT)) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_LSTAT_FAILED, + "lstat on %s failed", real_newpath); + goto unlock; + } + } + + op_ret = sys_rename (real_oldpath, real_newpath); + if (op_ret == -1) { + op_errno = errno; + if (op_errno == ENOTEMPTY) { + gf_msg_debug (this->name, 0, "rename of %s to" + " %s failed: %s", real_oldpath, + real_newpath, + strerror (op_errno)); + } else { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_RENAME_FAILED, + "rename of %s to %s failed", + real_oldpath, real_newpath); + } + + if (priv->update_pgfid_nlinks + && !IA_ISDIR (oldloc->inode->ia_type)) { + LINK_MODIFY_PGFID_XATTR (real_oldpath, + pgfid_xattr_key, + nlink_samepgfid, 0, + op_ret, + this, unlock); + } + + goto unlock; + } + + if (locked) { + pthread_mutex_unlock (&ctx_new->pgfid_lock); + locked = _gf_false; + } + + if ((get_link_count) && + (dict_set_uint32 (unwind_dict, GET_LINK_COUNT, + stbuf.ia_nlink))) + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_SET_XDATA_FAIL, "failed to set " + GET_LINK_COUNT" for %s", real_newpath); + + if (!IA_ISDIR (oldloc->inode->ia_type) + && priv->update_pgfid_nlinks) { + MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, + PGFID_XATTR_KEY_PREFIX, + newloc->pargfid); + LINK_MODIFY_PGFID_XATTR (real_newpath, + pgfid_xattr_key, + nlink_samepgfid, 0, + op_ret, + this, unlock); + } + + if (!IA_ISDIR (oldloc->inode->ia_type) && priv->gfid2path) { + MAKE_HANDLE_ABSPATH (gfid_path, this, + oldloc->inode->gfid); + + posix_remove_gfid2path_xattr (this, gfid_path, + oldloc->pargfid, + oldloc->name); + posix_set_gfid2path_xattr (this, gfid_path, + newloc->pargfid, + newloc->name); + } + } + +unlock: + if (locked) { + pthread_mutex_unlock (&ctx_new->pgfid_lock); + locked = _gf_false; + } + pthread_mutex_unlock (&ctx_old->pgfid_lock); + + if (op_ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_FAILED, + "modification of " + "parent gfid xattr failed (gfid:%s)", + uuid_utoa (oldloc->inode->gfid)); + goto out; + } + + if (was_dir) + posix_handle_unset (this, victim, NULL); + + if (was_present && !was_dir && nlink == 1) + posix_handle_unset (this, victim, NULL); + + if (IA_ISDIR (oldloc->inode->ia_type)) { + posix_handle_soft (this, real_newpath, newloc, + oldloc->inode->gfid, NULL); + } + + op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat on %s failed", real_newpath); + goto out; + } + + op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &postoldparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent %s failed", + par_oldpath); + goto out; + } + + op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postnewparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent %s failed", + par_newpath); + goto out; + } + + if (was_present) + unwind_dict = posix_dict_set_nlink (xdata, unwind_dict, nlink); + op_ret = 0; +out: + + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, &stbuf, + &preoldparent, &postoldparent, + &prenewparent, &postnewparent, unwind_dict); + + if (unwind_dict) + dict_unref (unwind_dict); + + return 0; +} + + +int +posix_link (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_oldpath = 0; + char *real_newpath = 0; + char *par_newpath = 0; + struct iatt stbuf = {0, }; + struct posix_private *priv = NULL; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + int32_t nlink_samepgfid = 0; + char *pgfid_xattr_key = NULL; + gf_boolean_t entry_created = _gf_false; + posix_inode_ctx_t *ctx = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (oldloc, out); + VALIDATE_OR_GOTO (newloc, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_INODE_HANDLE (real_oldpath, this, oldloc, &stbuf); + if (!real_oldpath) { + op_errno = errno; + goto out; + } + + MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf); + if (!real_newpath || !par_newpath) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &preparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat failed: %s", par_newpath); + goto out; + } + + + op_ret = sys_link (real_oldpath, real_newpath); + + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LINK_FAILED, + "link %s to %s failed", + real_oldpath, real_newpath); + goto out; + } + + entry_created = _gf_true; + + op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat on %s failed", real_newpath); + goto out; + } + + op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat failed: %s", par_newpath); + goto out; + } + + if (priv->update_pgfid_nlinks) { + MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, + newloc->pargfid); + + op_ret = posix_inode_ctx_get_all (newloc->inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + + pthread_mutex_lock (&ctx->pgfid_lock); + { + LINK_MODIFY_PGFID_XATTR (real_newpath, pgfid_xattr_key, + nlink_samepgfid, 0, op_ret, + this, unlock); + } + unlock: + pthread_mutex_unlock (&ctx->pgfid_lock); + + if (op_ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_XATTR_FAILED, "modification of " + "parent gfid xattr failed (path:%s gfid:%s)", + real_newpath, uuid_utoa (newloc->inode->gfid)); + goto out; + } + } + + if (priv->gfid2path) { + if (stbuf.ia_nlink <= MAX_GFID2PATH_LINK_SUP) { + op_ret = posix_set_gfid2path_xattr (this, real_newpath, + newloc->pargfid, + newloc->name); + if (op_ret) { + op_errno = errno; + goto out; + } + } else { + gf_msg (this->name, GF_LOG_INFO, 0, + P_MSG_XATTR_NOTSUP, "Link count exceeded. " + "gfid2path xattr not set (path:%s gfid:%s)", + real_newpath, uuid_utoa (newloc->inode->gfid)); + } + } + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, + (oldloc)?oldloc->inode:NULL, &stbuf, &preparent, + &postparent, NULL); + + if (op_ret < 0) { + if (entry_created) + sys_unlink (real_newpath); + } + + return 0; +} + +int +posix_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, + mode_t umask, fd_t *fd, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t _fd = -1; + int _flags = 0; + char * real_path = NULL; + char * par_path = NULL; + struct iatt stbuf = {0, }; + struct posix_fd * pfd = NULL; + struct posix_private * priv = NULL; + char was_present = 1; + + gid_t gid = 0; + struct iatt preparent = {0,}; + struct iatt postparent = {0,}; + + int nlink_samepgfid = 0; + char * pgfid_xattr_key = NULL; + gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; + mode_t mode_bit = 0; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + VALIDATE_OR_GOTO (loc, out); + VALIDATE_OR_GOTO (fd, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno, + out); + DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); + + MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); + + gid = frame->root->gid; + + SET_FS_ID (frame->root->uid, gid); + if (!real_path || !par_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on parent %s failed", + par_path); + goto out; + } + + if (preparent.ia_prot.sgid) { + gid = preparent.ia_gid; + } + + if (!flags) { + _flags = O_CREAT | O_RDWR | O_EXCL; + } + else { + _flags = flags | O_CREAT; + } + + op_ret = posix_pstat (this, NULL, real_path, &stbuf); + if ((op_ret == -1) && (errno == ENOENT)) { + was_present = 0; + } + + if (priv->o_direct) + _flags |= O_DIRECT; + + mode_bit = (priv->create_mask & mode) | priv->force_create_mode; + mode = posix_override_umask (mode, mode_bit); + _fd = sys_open (real_path, _flags, mode); + + if (_fd == -1) { + op_errno = errno; + op_ret = -1; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_OPEN_FAILED, + "open on %s failed", real_path); + goto out; + } + + if ((_flags & O_CREAT) && (_flags & O_EXCL)) { + entry_created = _gf_true; + } + + + if (was_present) + goto fill_stat; + +#ifndef HAVE_SET_FSID + op_ret = sys_chown (real_path, frame->root->uid, gid); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_CHOWN_FAILED, + "chown on %s failed", real_path); + } +#endif + op_ret = posix_acl_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_ACL_FAILED, + "setting ACLs on %s failed", real_path); + } + + if (priv->update_pgfid_nlinks) { + MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, + loc->pargfid); + nlink_samepgfid = 1; + SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid, + XATTR_CREATE, op_ret, this, ignore); + } + + if (priv->gfid2path) { + posix_set_gfid2path_xattr (this, real_path, loc->pargfid, + loc->name); + } +ignore: + op_ret = posix_entry_create_xattr_set (this, real_path, xdata); + if (op_ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, + "setting xattrs on %s failed ", real_path); + } + +fill_stat: + op_ret = posix_gfid_set (this, real_path, loc, xdata); + if (op_ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_GFID_FAILED, + "setting gfid on %s failed", real_path); + } else { + gfid_set = _gf_true; + } + + op_ret = posix_fdstat (this, _fd, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fstat on %d failed", _fd); + goto out; + } + + op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "post-operation lstat on parent %s failed", + par_path); + goto out; + } + + op_ret = -1; + pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); + if (!pfd) { + op_errno = errno; + goto out; + } + + pfd->flags = flags; + pfd->fd = _fd; + + op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); + if (op_ret) + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_FD_PATH_SETTING_FAILED, + "failed to set the fd context path=%s fd=%p", + real_path, fd); + + LOCK (&priv->lock); + { + priv->nr_files++; + } + UNLOCK (&priv->lock); + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID (); + + if ((-1 == op_ret) && (_fd != -1)) { + sys_close (_fd); + } + + STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, + fd, (loc)?loc->inode:NULL, &stbuf, &preparent, + &postparent, xdata); + + if (op_ret < 0) { + if (entry_created) + sys_unlink (real_path); + + if (gfid_set) + posix_gfid_unset (this, xdata); + } + + return 0; +} diff --git a/xlators/storage/posix/src/posix-handle.c b/xlators/storage/posix/src/posix-handle.c index 38e3198d812..99501177138 100644 --- a/xlators/storage/posix/src/posix-handle.c +++ b/xlators/storage/posix/src/posix-handle.c @@ -26,6 +26,9 @@ #include "compat-errno.h" +int +posix_handle_mkdir_hashes (xlator_t *this, const char *newpath); + inode_t * posix_resolve (xlator_t *this, inode_table_t *itable, inode_t *parent, char *bname, struct iatt *iabuf) diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h index c76e618546f..97186f91e64 100644 --- a/xlators/storage/posix/src/posix-handle.h +++ b/xlators/storage/posix/src/posix-handle.h @@ -1,5 +1,5 @@ /* - Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2011-2017 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -10,29 +10,12 @@ #ifndef _POSIX_HANDLE_H #define _POSIX_HANDLE_H -#include <limits.h> -#include <sys/types.h> -#include "xlator.h" -#include "posix-messages.h" - -/* From Open Group Base Specifications Issue 6 */ -#ifndef _XOPEN_PATH_MAX -#define _XOPEN_PATH_MAX 1024 -#endif - -#define TRASH_DIR "landfill" - -#define UUID0_STR "00000000-0000-0000-0000-000000000000" -#define SLEN(str) (sizeof(str) - 1) +#include "posix-inode-handle.h" #define HANDLE_ABSPATH_LEN(this) (POSIX_BASE_PATH_LEN(this) + \ SLEN("/" GF_HIDDEN_PATH "/00/00/" \ UUID0_STR) + 1) -#define LOC_HAS_ABSPATH(loc) (loc && (loc->path) && (loc->path[0] == '/')) -#define LOC_IS_DIR(loc) (loc && (loc->inode) && \ - (loc->inode->ia_type == IA_IFDIR)) - #define MAKE_PGFID_XATTR_KEY(var, prefix, pgfid) do { \ var = alloca (strlen (prefix) + UUID_CANONICAL_FORM_LEN + 1); \ strcpy (var, prefix); \ @@ -123,32 +106,6 @@ } \ } while (0) -#define MAKE_REAL_PATH(var, this, path) do { \ - size_t path_len = strlen(path); \ - size_t var_len = path_len + POSIX_BASE_PATH_LEN(this) + 1; \ - if (POSIX_PATH_MAX(this) != -1 && \ - var_len >= POSIX_PATH_MAX(this)) { \ - var = alloca (path_len + 1); \ - strcpy (var, (path[0] == '/') ? path + 1 : path); \ - } else { \ - var = alloca (var_len); \ - strcpy (var, POSIX_BASE_PATH(this)); \ - strcpy (&var[POSIX_BASE_PATH_LEN(this)], path); \ - } \ - } while (0) - -#define MAKE_HANDLE_PATH(var, this, gfid, base) do { \ - int __len; \ - __len = posix_handle_path (this, gfid, base, NULL, 0); \ - if (__len <= 0) \ - break; \ - var = alloca (__len); \ - __len = posix_handle_path (this, gfid, base, var, __len); \ - if (__len <= 0) \ - var = NULL; \ - } while (0) - - #define MAKE_HANDLE_GFID_PATH(var, this, gfid, base) do { \ int __len = 0; \ __len = posix_handle_gfid_path (this, gfid, base, NULL, 0); \ @@ -168,7 +125,6 @@ __len = posix_handle_relpath (this, gfid, base, var, __len); \ } while (0) - #define MAKE_HANDLE_ABSPATH(var, this, gfid) do { \ struct posix_private * __priv = this->private; \ int __len = HANDLE_ABSPATH_LEN(this); \ @@ -177,36 +133,6 @@ __priv->base_path, gfid[0], gfid[1], uuid_utoa(gfid)); \ } while (0) - -#define MAKE_INODE_HANDLE(rpath, this, loc, iatt_p) do { \ - if (gf_uuid_is_null (loc->gfid)) { \ - gf_msg (this->name, GF_LOG_ERROR, 0, \ - P_MSG_INODE_HANDLE_CREATE, \ - "null gfid for path %s", (loc)->path); \ - break; \ - } \ - if (LOC_IS_DIR (loc) && LOC_HAS_ABSPATH (loc)) { \ - MAKE_REAL_PATH (rpath, this, (loc)->path); \ - op_ret = posix_pstat (this, (loc)->gfid, rpath, iatt_p); \ - break; \ - } \ - errno = 0; \ - op_ret = posix_istat (this, loc->gfid, NULL, iatt_p); \ - if (errno != ELOOP) { \ - MAKE_HANDLE_PATH (rpath, this, (loc)->gfid, NULL); \ - if (!rpath) { \ - op_ret = -1; \ - gf_msg (this->name, GF_LOG_ERROR, errno, \ - P_MSG_INODE_HANDLE_CREATE, \ - "Failed to create inode handle " \ - "for path %s", (loc)->path); \ - } \ - break; \ - } \ - /* __ret == -1 && errno == ELOOP */ \ - } while (0) - - #define MAKE_ENTRY_HANDLE(entp, parp, this, loc, ent_p) do { \ char *__parp; \ \ @@ -240,25 +166,6 @@ /* expand ELOOP */ \ } while (0) - -#define POSIX_ANCESTRY_PATH (1 << 0) -#define POSIX_ANCESTRY_DENTRY (1 << 1) - -int -posix_handle_path (xlator_t *this, uuid_t gfid, const char *basename, char *buf, - size_t len); - -int -posix_make_ancestryfromgfid (xlator_t *this, char *path, int pathsize, - gf_dirent_t *head, int type, uuid_t gfid, - const size_t handle_size, - const char *priv_base_path, - inode_table_t *table, inode_t **parent, - dict_t *xdata, int32_t *op_errno); -int -posix_handle_path_safe (xlator_t *this, uuid_t gfid, const char *basename, - char *buf, size_t len); - int posix_handle_gfid_path (xlator_t *this, uuid_t gfid, const char *basename, char *buf, size_t len); @@ -275,13 +182,8 @@ posix_handle_soft (xlator_t *this, const char *real_path, loc_t *loc, int posix_handle_unset (xlator_t *this, uuid_t gfid, const char *basename); -int posix_handle_mkdir_hashes (xlator_t *this, const char *newpath); - -int posix_handle_init (xlator_t *this); - -int posix_create_link_if_gfid_exists (xlator_t *this, uuid_t gfid, - char *real_path, inode_table_t *itable); - int -posix_handle_trash_init (xlator_t *this); +posix_create_link_if_gfid_exists (xlator_t *this, uuid_t gfid, + char *real_path, inode_table_t *itable); + #endif /* !_POSIX_HANDLE_H */ diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index 5a3f4b129fb..8f550f9fe0d 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -38,6 +38,8 @@ #include "dict.h" #include "logging.h" #include "posix.h" +#include "posix-messages.h" +#include "posix-handle.h" #include "xlator.h" #include "defaults.h" #include "common-utils.h" @@ -2643,3 +2645,36 @@ posix_is_bulk_removexattr (char *name, dict_t *xdata) return _gf_true; return _gf_false; } + +int32_t +posix_set_iatt_in_dict (dict_t *dict, struct iatt *in_stbuf) +{ + int ret = -1; + struct iatt *stbuf = NULL; + int32_t len = sizeof(struct iatt); + + if (!dict || !in_stbuf) + return ret; + + stbuf = GF_CALLOC (1, len, gf_common_mt_char); + if (!stbuf) + return ret; + + memcpy (stbuf, in_stbuf, len); + + ret = dict_set_bin (dict, DHT_IATT_IN_XDATA_KEY, stbuf, len); + if (ret) + GF_FREE (stbuf); + + return ret; +} + +mode_t +posix_override_umask (mode_t mode, mode_t mode_bit) +{ + gf_msg_debug ("posix", 0, "The value of mode is %u", mode); + mode = mode >> 9; /* 3x3 (bits for each octal digit)*/ + mode = (mode << 9) | mode_bit; + gf_msg_debug ("posix", 0, "The value of mode is %u", mode); + return mode; +} diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c new file mode 100644 index 00000000000..9d1b19ac9a0 --- /dev/null +++ b/xlators/storage/posix/src/posix-inode-fd-ops.c @@ -0,0 +1,4975 @@ +/* + Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#define __XOPEN_SOURCE 500 + +/* for SEEK_HOLE and SEEK_DATA */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include <openssl/md5.h> +#include <stdint.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <errno.h> +#include <libgen.h> +#include <pthread.h> +#include <ftw.h> +#include <sys/stat.h> +#include <signal.h> +#include <sys/uio.h> +#include <unistd.h> +#include <ftw.h> + +#ifndef GF_BSD_HOST_OS +#include <alloca.h> +#endif /* GF_BSD_HOST_OS */ + +#ifdef HAVE_LINKAT +#include <fcntl.h> +#endif /* HAVE_LINKAT */ + +#include "glusterfs.h" +#include "checksum.h" +#include "dict.h" +#include "logging.h" +#include "posix.h" +#include "xlator.h" +#include "defaults.h" +#include "common-utils.h" +#include "compat-errno.h" +#include "compat.h" +#include "byte-order.h" +#include "syscall.h" +#include "statedump.h" +#include "locking.h" +#include "timer.h" +#include "glusterfs3-xdr.h" +#include "hashfn.h" +#include "posix-aio.h" +#include "glusterfs-acl.h" +#include "posix-messages.h" +#include "events.h" +#include "posix-gfid-path.h" +#include "compat-uuid.h" + +extern char *marker_xattrs[]; +#define ALIGN_SIZE 4096 + +#undef HAVE_SET_FSID +#ifdef HAVE_SET_FSID + +#define DECLARE_OLD_FS_ID_VAR uid_t old_fsuid; gid_t old_fsgid; + +#define SET_FS_ID(uid, gid) do { \ + old_fsuid = setfsuid (uid); \ + old_fsgid = setfsgid (gid); \ + } while (0) + +#define SET_TO_OLD_FS_ID() do { \ + setfsuid (old_fsuid); \ + setfsgid (old_fsgid); \ + } while (0) + +#else + +#define DECLARE_OLD_FS_ID_VAR +#define SET_FS_ID(uid, gid) +#define SET_TO_OLD_FS_ID() + +#endif + +/* Setting microseconds or nanoseconds depending on what's supported: + The passed in `tv` can be + struct timespec + if supported (better, because it supports nanosecond resolution) or + struct timeval + otherwise. */ +#if HAVE_UTIMENSAT +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ + tv.tv_nsec = nanosecs +#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \ + (sys_utimensat (AT_FDCWD, path, tv, AT_SYMLINK_NOFOLLOW)) +#else +#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ + tv.tv_usec = nanosecs / 1000 +#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \ + (lutimes (path, tv)) +#endif + +static char *disallow_removexattrs[] = { + GF_XATTR_VOL_ID_KEY, + GFID_XATTR_KEY, + NULL +}; + +int32_t +posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + struct iatt buf = {0,}; + int32_t op_ret = -1; + int32_t op_errno = 0; + struct posix_private *priv = NULL; + char *real_path = NULL; + dict_t *xattr_rsp = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + + MAKE_INODE_HANDLE (real_path, this, loc, &buf); + + if (op_ret == -1) { + op_errno = errno; + if (op_errno == ENOENT) { + gf_msg_debug(this->name, 0, "lstat on %s failed: %s", + real_path ? real_path : "<null>", + strerror (op_errno)); + } else { + gf_msg (this->name, GF_LOG_ERROR, op_errno, + P_MSG_LSTAT_FAILED, "lstat on %s failed", + real_path ? real_path : "<null>"); + } + goto out; + } + if (xdata) + xattr_rsp = posix_xattr_fill (this, real_path, loc, NULL, -1, + xdata, &buf); + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID(); + STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf, xattr_rsp); + if (xattr_rsp) + dict_unref (xattr_rsp); + + return 0; +} + +static int +posix_do_chmod (xlator_t *this, const char *path, struct iatt *stbuf) +{ + int32_t ret = -1; + mode_t mode = 0; + mode_t mode_bit = 0; + struct posix_private *priv = NULL; + struct stat stat; + int is_symlink = 0; + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + ret = sys_lstat (path, &stat); + if (ret != 0) { + gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_LSTAT_FAILED, + "lstat failed: %s", path); + goto out; + } + + if (S_ISLNK (stat.st_mode)) + is_symlink = 1; + + if (S_ISDIR (stat.st_mode)) { + mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type); + mode_bit = (mode & priv->create_directory_mask) + | priv->force_directory_mode; + mode = posix_override_umask(mode, mode_bit); + } else { + mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type); + mode_bit = (mode & priv->create_mask) + | priv->force_create_mode; + mode = posix_override_umask(mode, mode_bit); + } + ret = lchmod (path, mode); + if ((ret == -1) && (errno == ENOSYS)) { + /* in Linux symlinks are always in mode 0777 and no + such call as lchmod exists. + */ + gf_msg_debug (this->name, 0, "%s (%s)", path, strerror (errno)); + if (is_symlink) { + ret = 0; + goto out; + } + + ret = sys_chmod (path, mode); + } +out: + return ret; +} + +static int +posix_do_chown (xlator_t *this, + const char *path, + struct iatt *stbuf, + int32_t valid) +{ + int32_t ret = -1; + uid_t uid = -1; + gid_t gid = -1; + + if (valid & GF_SET_ATTR_UID) + uid = stbuf->ia_uid; + + if (valid & GF_SET_ATTR_GID) + gid = stbuf->ia_gid; + + ret = sys_lchown (path, uid, gid); + + return ret; +} + +static int +posix_do_utimes (xlator_t *this, + const char *path, + struct iatt *stbuf, + int valid) +{ + int32_t ret = -1; +#if defined(HAVE_UTIMENSAT) + struct timespec tv[2] = { {0,}, {0,} }; +#else + struct timeval tv[2] = { {0,}, {0,} }; +#endif + struct stat stat; + int is_symlink = 0; + + ret = sys_lstat (path, &stat); + if (ret != 0) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_FILE_OP_FAILED, "%s", path); + goto out; + } + + if (S_ISLNK (stat.st_mode)) + is_symlink = 1; + + if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { + tv[0].tv_sec = stbuf->ia_atime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[0], stbuf->ia_atime_nsec); + } else { + /* atime is not given, use current values */ + tv[0].tv_sec = ST_ATIM_SEC (&stat); + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[0], ST_ATIM_NSEC (&stat)); + } + + if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { + tv[1].tv_sec = stbuf->ia_mtime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[1], stbuf->ia_mtime_nsec); + } else { + /* mtime is not given, use current values */ + tv[1].tv_sec = ST_MTIM_SEC (&stat); + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[1], ST_MTIM_NSEC (&stat)); + } + + ret = PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv); + if ((ret == -1) && (errno == ENOSYS)) { + gf_msg_debug (this->name, 0, "%s (%s)", + path, strerror (errno)); + if (is_symlink) { + ret = 0; + goto out; + } + + ret = PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv); + } + +out: + return ret; +} + +int +posix_setattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + dict_t *xattr_rsp = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_INODE_HANDLE (real_path, this, loc, &statpre); + + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "setattr (lstat) on %s failed", + real_path ? real_path : "<null>"); + goto out; + } + + if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)){ + op_ret = posix_do_chown (this, real_path, stbuf, valid); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_CHOWN_FAILED, "setattr (chown) on %s " + "failed", real_path); + goto out; + } + } + + if (valid & GF_SET_ATTR_MODE) { + op_ret = posix_do_chmod (this, real_path, stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_CHMOD_FAILED, "setattr (chmod) on %s " + "failed", real_path); + goto out; + } + } + + if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { + op_ret = posix_do_utimes (this, real_path, stbuf, valid); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_UTIMES_FAILED, "setattr (utimes) on %s " + "failed", real_path); + goto out; + } + } + + if (!valid) { + op_ret = sys_lchown (real_path, -1, -1); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_LCHOWN_FAILED, "lchown (%s, -1, -1) " + "failed", real_path); + + goto out; + } + } + + op_ret = posix_pstat (this, loc->gfid, real_path, &statpost); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "setattr (lstat) on %s failed", real_path); + goto out; + } + + if (xdata) + xattr_rsp = posix_xattr_fill (this, real_path, loc, NULL, -1, + xdata, &statpost); + op_ret = 0; + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, + &statpre, &statpost, xattr_rsp); + if (xattr_rsp) + dict_unref (xattr_rsp); + + return 0; +} + +int32_t +posix_do_fchown (xlator_t *this, + int fd, + struct iatt *stbuf, + int32_t valid) +{ + int ret = -1; + uid_t uid = -1; + gid_t gid = -1; + + if (valid & GF_SET_ATTR_UID) + uid = stbuf->ia_uid; + + if (valid & GF_SET_ATTR_GID) + gid = stbuf->ia_gid; + + ret = sys_fchown (fd, uid, gid); + + return ret; +} + + +int32_t +posix_do_fchmod (xlator_t *this, + int fd, struct iatt *stbuf) +{ + int32_t ret = -1; + mode_t mode = 0; + mode_t mode_bit = 0; + struct posix_private *priv = NULL; + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type); + mode_bit = (mode & priv->create_mask) + | priv->force_create_mode; + mode = posix_override_umask (mode, mode_bit); + ret = sys_fchmod (fd, mode); +out: + return ret; +} + +static int +posix_do_futimes (xlator_t *this, int fd, struct iatt *stbuf, int valid) +{ + int32_t ret = -1; + struct timeval tv[2] = { {0,}, {0,} }; + struct stat stat = {0,}; + + ret = sys_fstat (fd, &stat); + if (ret != 0) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_FILE_OP_FAILED, "%d", fd); + goto out; + } + + if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { + tv[0].tv_sec = stbuf->ia_atime; + tv[0].tv_usec = stbuf->ia_atime_nsec / 1000; + } else { + /* atime is not given, use current values */ + tv[0].tv_sec = ST_ATIM_SEC (&stat); + tv[0].tv_usec = ST_ATIM_NSEC (&stat) / 1000; + } + + if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { + tv[1].tv_sec = stbuf->ia_mtime; + tv[1].tv_usec = stbuf->ia_mtime_nsec / 1000; + } else { + /* mtime is not given, use current values */ + tv[1].tv_sec = ST_MTIM_SEC (&stat); + tv[1].tv_usec = ST_MTIM_NSEC (&stat) / 1000; + } + + ret = sys_futimes (fd, tv); + if (ret == -1) + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FUTIMES_FAILED, + "%d", fd); + +out: + return ret; +} + +int +posix_fsetattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + struct posix_fd *pfd = NULL; + dict_t *xattr_rsp = NULL; + int32_t ret = -1; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg_debug (this->name, 0, "pfd is NULL from fd=%p", fd); + goto out; + } + + op_ret = posix_fdstat (this, pfd->fd, &statpre); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fsetattr (fstat) failed on fd=%p", fd); + goto out; + } + + if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) { + op_ret = posix_do_fchown (this, pfd->fd, stbuf, valid); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_FCHOWN_FAILED, "fsetattr (fchown) failed" + " on fd=%p", fd); + goto out; + } + + } + + if (valid & GF_SET_ATTR_MODE) { + op_ret = posix_do_fchmod (this, pfd->fd, stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_FCHMOD_FAILED, "fsetattr (fchmod) failed" + " on fd=%p", fd); + goto out; + } + } + + if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { + op_ret = posix_do_futimes (this, pfd->fd, stbuf, valid); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_FUTIMES_FAILED, "fsetattr (futimes) on " + "failed fd=%p", fd); + goto out; + } + } + + if (!valid) { + op_ret = sys_fchown (pfd->fd, -1, -1); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_FCHOWN_FAILED, + "fchown (%d, -1, -1) failed", + pfd->fd); + + goto out; + } + } + + op_ret = posix_fdstat (this, pfd->fd, &statpost); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fsetattr (fstat) failed on fd=%p", fd); + goto out; + } + + if (xdata) + xattr_rsp = posix_xattr_fill (this, NULL, NULL, fd, pfd->fd, + xdata, &statpost); + op_ret = 0; + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, + &statpre, &statpost, xattr_rsp); + if (xattr_rsp) + dict_unref (xattr_rsp); + + return 0; +} + +static int32_t +posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t flags, off_t offset, size_t len, + struct iatt *statpre, struct iatt *statpost, dict_t *xdata) +{ + int32_t ret = -1; + int32_t op_errno = 0; + struct posix_fd *pfd = NULL; + gf_boolean_t locked = _gf_false; + posix_inode_ctx_t *ctx = NULL; + struct posix_private *priv = NULL; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, ret, ret, out); + + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg_debug (this->name, 0, "pfd is NULL from fd=%p", fd); + goto out; + } + + ret = posix_inode_ctx_get_all (fd->inode, this, &ctx); + if (ret < 0) { + ret = -ENOMEM; + goto out; + } + + if (xdata && dict_get (xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) { + locked = _gf_true; + pthread_mutex_lock (&ctx->write_atomic_lock); + } + + ret = posix_fdstat (this, pfd->fd, statpre); + if (ret == -1) { + ret = -errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fallocate (fstat) failed on fd=%p", fd); + goto out; + } + + ret = sys_fallocate (pfd->fd, flags, offset, len); + if (ret == -1) { + ret = -errno; + gf_msg (this->name, GF_LOG_ERROR, -ret, P_MSG_FALLOCATE_FAILED, + "fallocate failed on %s offset: %jd, " + "len:%zu, flags: %d", uuid_utoa (fd->inode->gfid), + offset, len, flags); + goto out; + } + + ret = posix_fdstat (this, pfd->fd, statpost); + if (ret == -1) { + ret = -errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fallocate (fstat) failed on fd=%p", fd); + goto out; + } + +out: + if (locked) { + pthread_mutex_unlock (&ctx->write_atomic_lock); + locked = _gf_false; + } + SET_TO_OLD_FS_ID (); + if (ret == ENOSPC) + ret = -ENOSPC; + + return ret; +} + +char* +_page_aligned_alloc (size_t size, char **aligned_buf) +{ + char *alloc_buf = NULL; + char *buf = NULL; + + alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char); + if (!alloc_buf) + goto out; + /* page aligned buffer */ + buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE); + *aligned_buf = buf; +out: + return alloc_buf; +} + +static int32_t +_posix_do_zerofill(int fd, off_t offset, off_t len, int o_direct) +{ + off_t num_vect = 0; + off_t num_loop = 1; + off_t idx = 0; + int32_t op_ret = -1; + int32_t vect_size = VECTOR_SIZE; + off_t remain = 0; + off_t extra = 0; + struct iovec *vector = NULL; + char *iov_base = NULL; + char *alloc_buf = NULL; + + if (len == 0) + return 0; + if (len < VECTOR_SIZE) + vect_size = len; + + num_vect = len / (vect_size); + remain = len % vect_size ; + if (num_vect > MAX_NO_VECT) { + extra = num_vect % MAX_NO_VECT; + num_loop = num_vect / MAX_NO_VECT; + num_vect = MAX_NO_VECT; + } + + vector = GF_CALLOC (num_vect, sizeof(struct iovec), + gf_common_mt_iovec); + if (!vector) + return -1; + if (o_direct) { + alloc_buf = _page_aligned_alloc(vect_size, &iov_base); + if (!alloc_buf) { + GF_FREE(vector); + return -1; + } + } else { + iov_base = GF_CALLOC (vect_size, sizeof(char), + gf_common_mt_char); + if (!iov_base) { + GF_FREE(vector); + return -1; + } + } + + for (idx = 0; idx < num_vect; idx++) { + vector[idx].iov_base = iov_base; + vector[idx].iov_len = vect_size; + } + if (sys_lseek (fd, offset, SEEK_SET) < 0) { + op_ret = -1; + goto err; + } + + for (idx = 0; idx < num_loop; idx++) { + op_ret = sys_writev (fd, vector, num_vect); + if (op_ret < 0) + goto err; + } + if (extra) { + op_ret = sys_writev (fd, vector, extra); + if (op_ret < 0) + goto err; + } + if (remain) { + vector[0].iov_len = remain; + op_ret = sys_writev (fd, vector , 1); + if (op_ret < 0) + goto err; + } +err: + if (o_direct) + GF_FREE(alloc_buf); + else + GF_FREE(iov_base); + GF_FREE(vector); + return op_ret; +} + +static int32_t +posix_do_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, struct iatt *statpre, struct iatt *statpost, + dict_t *xdata) +{ + int32_t ret = -1; + int32_t op_errno = 0; + int32_t flags = 0; + struct posix_fd *pfd = NULL; + gf_boolean_t locked = _gf_false; + posix_inode_ctx_t *ctx = NULL; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg_debug (this->name, 0, "pfd is NULL from fd=%p", fd); + goto out; + } + + ret = posix_inode_ctx_get_all (fd->inode, this, &ctx); + if (ret < 0) { + ret = -ENOMEM; + goto out; + } + + if (dict_get (xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) { + locked = _gf_true; + pthread_mutex_lock (&ctx->write_atomic_lock); + } + + ret = posix_fdstat (this, pfd->fd, statpre); + if (ret == -1) { + ret = -errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd = %p", fd); + goto out; + } + + /* See if we can use FALLOC_FL_ZERO_RANGE to perform the zero fill. + * If it fails, fall back to _posix_do_zerofill() and an optional fsync. + */ + flags = FALLOC_FL_ZERO_RANGE; + ret = sys_fallocate (pfd->fd, flags, offset, len); + if (ret == 0) + goto fsync; + + ret = _posix_do_zerofill (pfd->fd, offset, len, pfd->flags & O_DIRECT); + if (ret < 0) { + ret = -errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_ZEROFILL_FAILED, + "zerofill failed on fd %d length %" PRId64 , + pfd->fd, len); + goto out; + } + +fsync: + if (pfd->flags & (O_SYNC|O_DSYNC)) { + ret = sys_fsync (pfd->fd); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_WRITEV_FAILED, "fsync() in writev on fd" + "%d failed", pfd->fd); + ret = -errno; + goto out; + } + } + + ret = posix_fdstat (this, pfd->fd, statpost); + if (ret == -1) { + ret = -errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post operation fstat failed on fd=%p", fd); + goto out; + } + +out: + if (locked) { + pthread_mutex_unlock (&ctx->write_atomic_lock); + locked = _gf_false; + } + SET_TO_OLD_FS_ID (); + + return ret; +} + +int32_t +posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size, + off_t offset, size_t len, dict_t *xdata) +{ + int32_t ret; + int32_t flags = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + +#ifdef FALLOC_FL_KEEP_SIZE + if (keep_size) + flags = FALLOC_FL_KEEP_SIZE; +#endif /* FALLOC_FL_KEEP_SIZE */ + + ret = posix_do_fallocate (frame, this, fd, flags, offset, len, + &statpre, &statpost, xdata); + if (ret < 0) + goto err; + + STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL); + return 0; + +err: + STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL); + return 0; +} + +int32_t +posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + int32_t ret; +#ifndef FALLOC_FL_KEEP_SIZE + ret = EOPNOTSUPP; + +#else /* FALLOC_FL_KEEP_SIZE */ + int32_t flags = FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + + ret = posix_do_fallocate (frame, this, fd, flags, offset, len, + &statpre, &statpost, xdata); + if (ret < 0) + goto err; + + STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, NULL); + return 0; + +err: +#endif /* FALLOC_FL_KEEP_SIZE */ + STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, NULL); + return 0; +} + +int32_t +posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) +{ + int32_t ret = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + struct posix_private *priv = NULL; + int op_ret = -1; + int op_errno = -EINVAL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); + + ret = posix_do_zerofill (frame, this, fd, offset, len, + &statpre, &statpost, xdata); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + goto out; + } + + STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL); + return 0; + +out: + STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL, NULL); + return 0; +} + +int32_t +posix_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +{ + /* + * IPC is for inter-translator communication. If one gets here, it + * means somebody sent one that nobody else recognized, which is an + * error much like an uncaught exception. + */ + gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_IPC_NOT_HANDLE, + "GF_LOG_IPC(%d) not handled", op); + STACK_UNWIND_STRICT (ipc, frame, -1, -EOPNOTSUPP, NULL); + return 0; + +} + +#ifdef HAVE_SEEK_HOLE +int32_t +posix_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) +{ + struct posix_fd *pfd = NULL; + off_t ret = -1; + int err = 0; + int whence = 0; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + switch (what) { + case GF_SEEK_DATA: + whence = SEEK_DATA; + break; + case GF_SEEK_HOLE: + whence = SEEK_HOLE; + break; + default: + err = ENOTSUP; + gf_msg (this->name, GF_LOG_ERROR, ENOTSUP, + P_MSG_SEEK_UNKOWN, "don't know what to seek"); + goto out; + } + + ret = posix_fd_ctx_get (fd, this, &pfd, &err); + if (ret < 0) { + gf_msg_debug (this->name, 0, "pfd is NULL from fd=%p", fd); + goto out; + } + + ret = sys_lseek (pfd->fd, offset, whence); + if (ret == -1) { + err = errno; + gf_msg (this->name, GF_LOG_ERROR, err, P_MSG_SEEK_FAILED, + "seek failed on fd %d length %" PRId64 , pfd->fd, + offset); + goto out; + } + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (seek, frame, (ret == -1 ? -1 : 0), err, + (ret == -1 ? -1 : ret), xdata); + return 0; +} +#endif + +int32_t +posix_opendir (call_frame_t *frame, xlator_t *this, + loc_t *loc, fd_t *fd, dict_t *xdata) +{ + char * real_path = NULL; + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + DIR * dir = NULL; + struct posix_fd * pfd = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + VALIDATE_OR_GOTO (fd, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + if (!real_path) { + op_errno = ESTALE; + goto out; + } + + op_ret = -1; + dir = sys_opendir (real_path); + + if (dir == NULL) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_OPENDIR_FAILED, + "opendir failed on %s", real_path); + goto out; + } + + op_ret = dirfd (dir); + if (op_ret < 0) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_DIRFD_FAILED, + "dirfd() failed on %s", real_path); + goto out; + } + + pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); + if (!pfd) { + op_errno = errno; + goto out; + } + + pfd->dir = dir; + pfd->dir_eof = -1; + pfd->fd = op_ret; + + op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); + if (op_ret) + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_FD_PATH_SETTING_FAILED, "failed to set the fd" + "context path=%s fd=%p", real_path, fd); + + op_ret = 0; + +out: + if (op_ret == -1) { + if (dir) { + (void) sys_closedir (dir); + dir = NULL; + } + if (pfd) { + GF_FREE (pfd); + pfd = NULL; + } + } + + SET_TO_OLD_FS_ID (); + STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, NULL); + return 0; +} + +int32_t +posix_releasedir (xlator_t *this, + fd_t *fd) +{ + struct posix_fd * pfd = NULL; + uint64_t tmp_pfd = 0; + int ret = 0; + + struct posix_private *priv = NULL; + + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = fd_ctx_del (fd, this, &tmp_pfd); + if (ret < 0) { + gf_msg_debug (this->name, 0, "pfd from fd=%p is NULL", fd); + goto out; + } + + pfd = (struct posix_fd *)(long)tmp_pfd; + if (!pfd->dir) { + gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, + "pfd->dir is NULL for fd=%p", fd); + goto out; + } + + priv = this->private; + + pthread_mutex_lock (&priv->janitor_lock); + { + INIT_LIST_HEAD (&pfd->list); + list_add_tail (&pfd->list, &priv->janitor_fds); + pthread_cond_signal (&priv->janitor_cond); + } + pthread_mutex_unlock (&priv->janitor_lock); + +out: + return 0; +} + + +int32_t +posix_readlink (call_frame_t *frame, xlator_t *this, + loc_t *loc, size_t size, dict_t *xdata) +{ + char * dest = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = NULL; + struct iatt stbuf = {0,}; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + + dest = alloca (size + 1); + + MAKE_INODE_HANDLE (real_path, this, loc, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat on %s failed", + loc->path ? loc->path : "<null>"); + goto out; + } + + op_ret = sys_readlink (real_path, dest, size); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_READYLINK_FAILED, + "readlink on %s failed", real_path); + goto out; + } + + dest[op_ret] = 0; +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, dest, &stbuf, NULL); + + return 0; +} + +int32_t +posix_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = 0; + struct posix_private *priv = NULL; + struct iatt prebuf = {0,}; + struct iatt postbuf = {0,}; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + + MAKE_INODE_HANDLE (real_path, this, loc, &prebuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "pre-operation lstat on %s failed", + real_path ? real_path : "<null>"); + goto out; + } + + op_ret = sys_truncate (real_path, offset); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED, + "truncate on %s failed", real_path); + goto out; + } + + op_ret = posix_pstat (this, loc->gfid, real_path, &postbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "lstat on %s failed", real_path); + goto out; + } + + op_ret = 0; +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, + &prebuf, &postbuf, NULL); + + return 0; +} + +int32_t +posix_open (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + int32_t _fd = -1; + struct posix_fd *pfd = NULL; + struct posix_private *priv = NULL; + struct iatt stbuf = {0, }; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + VALIDATE_OR_GOTO (loc, out); + VALIDATE_OR_GOTO (fd, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + + if (flags & O_CREAT) + DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); + + MAKE_INODE_HANDLE (real_path, this, loc, &stbuf); + if (!real_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + if (IA_ISLNK (stbuf.ia_type)) { + op_ret = -1; + op_errno = ELOOP; + goto out; + } + + op_ret = -1; + SET_FS_ID (frame->root->uid, frame->root->gid); + + if (priv->o_direct) + flags |= O_DIRECT; + + _fd = sys_open (real_path, flags, priv->force_create_mode); + if (_fd == -1) { + op_ret = -1; + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FILE_OP_FAILED, + "open on %s, flags: %d", real_path, flags); + goto out; + } + + pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); + if (!pfd) { + op_errno = errno; + goto out; + } + + pfd->flags = flags; + pfd->fd = _fd; + + op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); + if (op_ret) + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_FD_PATH_SETTING_FAILED, + "failed to set the fd context path=%s fd=%p", + real_path, fd); + + LOCK (&priv->lock); + { + priv->nr_files++; + } + UNLOCK (&priv->lock); + + op_ret = 0; + +out: + if (op_ret == -1) { + if (_fd != -1) { + sys_close (_fd); + } + } + + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, NULL); + + return 0; +} + +int +posix_readv (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct posix_private * priv = NULL; + struct iobuf * iobuf = NULL; + struct iobref * iobref = NULL; + struct iovec vec = {0,}; + struct posix_fd * pfd = NULL; + struct iatt stbuf = {0,}; + int ret = -1; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + if (!size) { + op_errno = EINVAL; + gf_msg (this->name, GF_LOG_WARNING, EINVAL, + P_MSG_INVALID_ARGUMENT, "size=%"GF_PRI_SIZET, size); + goto out; + } + + iobuf = iobuf_get_page_aligned (this->ctx->iobuf_pool, size, + ALIGN_SIZE); + if (!iobuf) { + op_errno = ENOMEM; + goto out; + } + + _fd = pfd->fd; + op_ret = sys_pread (_fd, iobuf->ptr, size, offset); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_READ_FAILED, "read failed on gfid=%s, " + "fd=%p, offset=%"PRIu64" size=%"GF_PRI_SIZET", " + "buf=%p", uuid_utoa (fd->inode->gfid), fd, + offset, size, iobuf->ptr); + goto out; + } + + LOCK (&priv->lock); + { + priv->read_value += op_ret; + } + UNLOCK (&priv->lock); + + vec.iov_base = iobuf->ptr; + vec.iov_len = op_ret; + + iobref = iobref_new (); + + iobref_add (iobref, iobuf); + + /* + * readv successful, and we need to get the stat of the file + * we read from + */ + + op_ret = posix_fdstat (this, _fd, &stbuf); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fstat failed on fd=%p", fd); + goto out; + } + + /* Hack to notify higher layers of EOF. */ + if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size) + op_errno = ENOENT; + + op_ret = vec.iov_len; +out: + + STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, + &vec, 1, &stbuf, iobref, NULL); + + if (iobref) + iobref_unref (iobref); + if (iobuf) + iobuf_unref (iobuf); + + return 0; +} + + +int32_t +__posix_pwritev (int fd, struct iovec *vector, int count, off_t offset) +{ + int32_t op_ret = 0; + int idx = 0; + int retval = 0; + off_t internal_off = 0; + + if (!vector) + return -EFAULT; + + internal_off = offset; + for (idx = 0; idx < count; idx++) { + retval = sys_pwrite (fd, vector[idx].iov_base, vector[idx].iov_len, + internal_off); + if (retval == -1) { + op_ret = -errno; + goto err; + } + op_ret += retval; + internal_off += retval; + } + +err: + return op_ret; +} + +int32_t +__posix_writev (int fd, struct iovec *vector, int count, off_t startoff, + int odirect) +{ + int32_t op_ret = 0; + int idx = 0; + int max_buf_size = 0; + int retval = 0; + char *buf = NULL; + char *alloc_buf = NULL; + off_t internal_off = 0; + + /* Check for the O_DIRECT flag during open() */ + if (!odirect) + return __posix_pwritev (fd, vector, count, startoff); + + for (idx = 0; idx < count; idx++) { + if (max_buf_size < vector[idx].iov_len) + max_buf_size = vector[idx].iov_len; + } + + alloc_buf = _page_aligned_alloc (max_buf_size, &buf); + if (!alloc_buf) { + op_ret = -errno; + goto err; + } + + internal_off = startoff; + for (idx = 0; idx < count; idx++) { + memcpy (buf, vector[idx].iov_base, vector[idx].iov_len); + + /* not sure whether writev works on O_DIRECT'd fd */ + retval = sys_pwrite (fd, buf, vector[idx].iov_len, internal_off); + if (retval == -1) { + op_ret = -errno; + goto err; + } + + op_ret += retval; + internal_off += retval; + } + +err: + GF_FREE (alloc_buf); + + return op_ret; +} + +dict_t* +_fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append) +{ + dict_t *rsp_xdata = NULL; + int32_t ret = 0; + inode_t *inode = NULL; + + if (fd) + inode = fd->inode; + + if (!fd || !fd->inode || gf_uuid_is_null (fd->inode->gfid)) { + gf_msg_callingfn (this->name, GF_LOG_ERROR, EINVAL, + P_MSG_XATTR_FAILED, "fd: %p inode: %p" + "gfid:%s", fd, inode?inode:0, + inode?uuid_utoa(inode->gfid):"N/A"); + goto out; + } + + if (!xdata) + goto out; + + rsp_xdata = dict_new(); + if (!rsp_xdata) + goto out; + + if (dict_get (xdata, GLUSTERFS_OPEN_FD_COUNT)) { + ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_OPEN_FD_COUNT, + fd->inode->fd_count); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_DICT_SET_FAILED, "%s: Failed to set " + "dictionary value for %s", + uuid_utoa (fd->inode->gfid), + GLUSTERFS_OPEN_FD_COUNT); + } + } + + if (dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) { + ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, + is_append); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_DICT_SET_FAILED, "%s: Failed to set " + "dictionary value for %s", + uuid_utoa (fd->inode->gfid), + GLUSTERFS_WRITE_IS_APPEND); + } + } +out: + return rsp_xdata; +} + +int32_t +posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, + uint32_t flags, struct iobref *iobref, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct posix_private * priv = NULL; + struct posix_fd * pfd = NULL; + struct iatt preop = {0,}; + struct iatt postop = {0,}; + int ret = -1; + dict_t *rsp_xdata = NULL; + int is_append = 0; + gf_boolean_t locked = _gf_false; + gf_boolean_t write_append = _gf_false; + gf_boolean_t update_atomic = _gf_false; + posix_inode_ctx_t *ctx = NULL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + VALIDATE_OR_GOTO (vector, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + VALIDATE_OR_GOTO (priv, out); + DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); + + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + _fd = pfd->fd; + + if (xdata) { + if (dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) + write_append = _gf_true; + if (dict_get (xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) + update_atomic = _gf_true; + } + + /* The write_is_append check and write must happen + atomically. Else another write can overtake this + write after the check and get written earlier. + + So lock before preop-stat and unlock after write. + */ + + /* + * The update_atomic option is to instruct posix to do prestat, + * write and poststat atomically. This is to prevent any modification to + * ia_size and ia_blocks until poststat and the diff in their values + * between pre and poststat could be of use for some translators (shard + * as of today). + */ + + op_ret = posix_inode_ctx_get_all (fd->inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + + if (write_append || update_atomic) { + locked = _gf_true; + pthread_mutex_lock (&ctx->write_atomic_lock); + } + + op_ret = posix_fdstat (this, _fd, &preop); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + goto out; + } + + if (locked && write_append) { + if (preop.ia_size == offset || (fd->flags & O_APPEND)) + is_append = 1; + } + + op_ret = __posix_writev (_fd, vector, count, offset, + (pfd->flags & O_DIRECT)); + + if (locked && (!update_atomic)) { + pthread_mutex_unlock (&ctx->write_atomic_lock); + locked = _gf_false; + } + + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + gf_msg (this->name, GF_LOG_ERROR, op_errno, P_MSG_WRITE_FAILED, + "write failed: offset %"PRIu64 + ",", offset); + goto out; + } + + rsp_xdata = _fill_writev_xdata (fd, xdata, this, is_append); + /* writev successful, we also need to get the stat of + * the file we wrote to + */ + + ret = posix_fdstat (this, _fd, &postop); + if (ret == -1) { + op_ret = -1; + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", + fd); + goto out; + } + + if (locked) { + pthread_mutex_unlock (&ctx->write_atomic_lock); + locked = _gf_false; + } + + if (flags & (O_SYNC|O_DSYNC)) { + ret = sys_fsync (_fd); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_WRITEV_FAILED, + "fsync() in writev on fd %d failed", + _fd); + op_ret = -1; + op_errno = errno; + goto out; + } + } + + LOCK (&priv->lock); + { + priv->write_value += op_ret; + } + UNLOCK (&priv->lock); + +out: + + if (locked) { + pthread_mutex_unlock (&ctx->write_atomic_lock); + locked = _gf_false; + } + + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop, + rsp_xdata); + + if (rsp_xdata) + dict_unref (rsp_xdata); + return 0; +} + + +int32_t +posix_statfs (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xdata) +{ + char * real_path = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + struct statvfs buf = {0, }; + struct posix_private * priv = NULL; + int shared_by = 1; + int percent = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + VALIDATE_OR_GOTO (this->private, out); + + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + if (!real_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + priv = this->private; + + op_ret = sys_statvfs (real_path, &buf); + + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED, + "statvfs failed on %s", real_path); + goto out; + } + + percent = priv->disk_reserve; + buf.f_bfree = (buf.f_bfree - ((buf.f_blocks * percent) / 100)); + + shared_by = priv->shared_brick_count; + if (shared_by > 1) { + buf.f_blocks /= shared_by; + buf.f_bfree /= shared_by; + buf.f_bavail /= shared_by; + buf.f_files /= shared_by; + buf.f_ffree /= shared_by; + buf.f_favail /= shared_by; + } + + if (!priv->export_statfs) { + buf.f_blocks = 0; + buf.f_bfree = 0; + buf.f_bavail = 0; + buf.f_files = 0; + buf.f_ffree = 0; + buf.f_favail = 0; + } + + op_ret = 0; + +out: + STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf, NULL); + return 0; +} + + +int32_t +posix_flush (call_frame_t *frame, xlator_t *this, + fd_t *fd, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + struct posix_fd *pfd = NULL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL on fd=%p", fd); + goto out; + } + + op_ret = 0; + +out: + STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL); + + return 0; +} + + +int32_t +posix_release (xlator_t *this, fd_t *fd) +{ + struct posix_private * priv = NULL; + struct posix_fd * pfd = NULL; + int ret = -1; + uint64_t tmp_pfd = 0; + + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + priv = this->private; + + ret = fd_ctx_del (fd, this, &tmp_pfd); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + pfd = (struct posix_fd *)(long)tmp_pfd; + + if (pfd->dir) { + gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_DIR_NOT_NULL, + "pfd->dir is %p (not NULL) for file fd=%p", + pfd->dir, fd); + } + + pthread_mutex_lock (&priv->janitor_lock); + { + INIT_LIST_HEAD (&pfd->list); + list_add_tail (&pfd->list, &priv->janitor_fds); + pthread_cond_signal (&priv->janitor_cond); + } + pthread_mutex_unlock (&priv->janitor_lock); + + LOCK (&priv->lock); + { + priv->nr_files--; + } + UNLOCK (&priv->lock); + +out: + return 0; +} + + +int +posix_batch_fsync (call_frame_t *frame, xlator_t *this, + fd_t *fd, int datasync, dict_t *xdata) +{ + call_stub_t *stub = NULL; + struct posix_private *priv = NULL; + + priv = this->private; + + stub = fop_fsync_stub (frame, default_fsync, fd, datasync, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0); + return 0; + } + + pthread_mutex_lock (&priv->fsync_mutex); + { + list_add_tail (&stub->list, &priv->fsyncs); + priv->fsync_queue_count++; + pthread_cond_signal (&priv->fsync_cond); + } + pthread_mutex_unlock (&priv->fsync_mutex); + + return 0; +} + + +int32_t +posix_fsync (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t datasync, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct posix_fd * pfd = NULL; + int ret = -1; + struct iatt preop = {0,}; + struct iatt postop = {0,}; + struct posix_private *priv = NULL; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + +#ifdef GF_DARWIN_HOST_OS + /* Always return success in case of fsync in MAC OS X */ + op_ret = 0; + goto out; +#endif + + priv = this->private; + + if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) { + posix_batch_fsync (frame, this, fd, datasync, xdata); + return 0; + } + + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd not found in fd's ctx"); + goto out; + } + + _fd = pfd->fd; + + op_ret = posix_fdstat (this, _fd, &preop); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + goto out; + } + + if (datasync) { + op_ret = sys_fdatasync (_fd); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_FSYNC_FAILED, "fdatasync on fd=%p" + "failed:", fd); + goto out; + } + } else { + op_ret = sys_fsync (_fd); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_FSYNC_FAILED, "fsync on fd=%p " + "failed", fd); + goto out; + } + } + + op_ret = posix_fdstat (this, _fd, &postop); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd); + goto out; + } + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, &preop, &postop, + NULL); + + return 0; +} + +static int gf_posix_xattr_enotsup_log; +static int +_handle_setxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) +{ + posix_xattr_filler_t *filler = NULL; + + filler = tmp; + + return posix_handle_pair (filler->this, filler->real_path, k, v, + filler->flags, filler->stbuf); +} + +#ifdef GF_DARWIN_HOST_OS +static int +map_xattr_flags(int flags) +{ + /* DARWIN has different defines on XATTR_ flags. + There do not seem to be a POSIX standard + Parse any other flags over. + */ + int darwinflags = flags & ~(GF_XATTR_CREATE | GF_XATTR_REPLACE | XATTR_REPLACE); + if (GF_XATTR_CREATE & flags) + darwinflags |= XATTR_CREATE; + if (GF_XATTR_REPLACE & flags) + darwinflags |= XATTR_REPLACE; + return darwinflags; +} +#endif + +int32_t +posix_setxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *dict, int flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char * real_path = NULL; + char *acl_xattr = NULL; + struct iatt stbuf = {0}; + int32_t ret = 0; + ssize_t acl_size = 0; + dict_t *xattr = NULL; + posix_xattr_filler_t filler = {0,}; + struct posix_private *priv = NULL; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + VALIDATE_OR_GOTO (dict, out); + + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); + + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + if (!real_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + + posix_pstat(this, loc->gfid, real_path, &stbuf); + + op_ret = -1; + + dict_del (dict, GFID_XATTR_KEY); + dict_del (dict, GF_XATTR_VOL_ID_KEY); + /* the io-stats-dump key should not reach disk */ + dict_del (dict, GF_XATTR_IOSTATS_DUMP_KEY); + + filler.real_path = real_path; + filler.this = this; + filler.stbuf = &stbuf; + +#ifdef GF_DARWIN_HOST_OS + filler.flags = map_xattr_flags(flags); +#else + filler.flags = flags; +#endif + op_ret = dict_foreach (dict, _handle_setxattr_keyvalue_pair, + &filler); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + goto out; + } + + xattr = dict_new(); + if (!xattr) + goto out; + +/* + * FIXFIX: Send the stbuf info in the xdata for now + * This is used by DHT to redirect FOPs if the file is being migrated + * Ignore errors for now + */ + if (xdata && dict_get (xdata, DHT_IATT_IN_XDATA_KEY)) { + ret = posix_pstat(this, loc->gfid, real_path, &stbuf); + if (ret) + goto out; + + ret = posix_set_iatt_in_dict (xattr, &stbuf); + } + +/* + * ACL can be set on a file/folder using GF_POSIX_ACL_*_KEY xattrs which + * won't aware of access-control xlator. To update its context correctly, + * POSIX_ACL_*_XATTR stored in xdata which is send in the call_back path. + */ + if (dict_get (dict, GF_POSIX_ACL_ACCESS)) { + + /* + * The size of buffer will be know after calling sys_lgetxattr, + * so first we allocate buffer with large size(~4k), then we + * reduced into required size using GF_REALLO(). + */ + acl_xattr = GF_CALLOC (1, ACL_BUFFER_MAX, gf_posix_mt_char); + if (!acl_xattr) + goto out; + + acl_size = sys_lgetxattr (real_path, POSIX_ACL_ACCESS_XATTR, + acl_xattr, ACL_BUFFER_MAX); + + if (acl_size < 0) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_XATTR_FAILED, "Posix acl is not set " + "properly at the backend"); + goto out; + } + + /* If acl_size is more than max buffer size, just ignore it */ + if (acl_size >= ACL_BUFFER_MAX) { + gf_msg (this->name, GF_LOG_WARNING, ENOMEM, + P_MSG_BUFFER_OVERFLOW, "size of acl is more" + "than the buffer"); + goto out; + } + + acl_xattr = GF_REALLOC (acl_xattr, acl_size); + if (!acl_xattr) + goto out; + + ret = dict_set_bin (xattr, POSIX_ACL_ACCESS_XATTR, + acl_xattr, acl_size); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_SET_XDATA_FAIL, "failed to set" + "xdata for acl"); + GF_FREE (acl_xattr); + goto out; + } + } + + if (dict_get (dict, GF_POSIX_ACL_DEFAULT)) { + + acl_xattr = GF_CALLOC (1, ACL_BUFFER_MAX, gf_posix_mt_char); + if (!acl_xattr) + goto out; + + acl_size = sys_lgetxattr (real_path, POSIX_ACL_DEFAULT_XATTR, + acl_xattr, ACL_BUFFER_MAX); + + if (acl_size < 0) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_XATTR_FAILED, "Posix acl is not set " + "properly at the backend"); + goto out; + } + + if (acl_size >= ACL_BUFFER_MAX) { + gf_msg (this->name, GF_LOG_WARNING, ENOMEM, + P_MSG_BUFFER_OVERFLOW, "size of acl is more" + "than the buffer"); + goto out; + } + + acl_xattr = GF_REALLOC (acl_xattr, acl_size); + if (!acl_xattr) + goto out; + + ret = dict_set_bin (xattr, POSIX_ACL_DEFAULT_XATTR, + acl_xattr, acl_size); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_SET_XDATA_FAIL, "failed to set" + "xdata for acl"); + GF_FREE (acl_xattr); + goto out; + } + } +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xattr); + + if (xattr) + dict_unref (xattr); + + return 0; +} + + +int +posix_xattr_get_real_filename (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *key, dict_t *dict, dict_t *xdata) +{ + int ret = -1; + int op_ret = -1; + const char *fname = NULL; + char *real_path = NULL; + char *found = NULL; + DIR *fd = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = {{0,},}; + + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + if (!real_path) { + return -ESTALE; + } + if (op_ret == -1) { + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, + "posix_xattr_get_real_filename (lstat) on %s failed", + real_path); + return -errno; + } + + fd = sys_opendir (real_path); + if (!fd) + return -errno; + + fname = key + strlen (GF_XATTR_GET_REAL_FILENAME_KEY); + + for (;;) { + errno = 0; + entry = sys_readdir (fd, scratch); + if (!entry || errno != 0) + break; + + if (strcasecmp (entry->d_name, fname) == 0) { + found = gf_strdup (entry->d_name); + if (!found) { + (void) sys_closedir (fd); + return -ENOMEM; + } + break; + } + } + + (void) sys_closedir (fd); + + if (!found) + return -ENOENT; + + ret = dict_set_dynstr (dict, (char *)key, found); + if (ret) { + GF_FREE (found); + return -ENOMEM; + } + ret = strlen (found) + 1; + + return ret; +} + +int +posix_get_ancestry_directory (xlator_t *this, inode_t *leaf_inode, + gf_dirent_t *head, char **path, int type, + int32_t *op_errno, dict_t *xdata) +{ + ssize_t handle_size = 0; + struct posix_private *priv = NULL; + inode_t *inode = NULL; + int ret = -1; + char dirpath[PATH_MAX] = {0,}; + + priv = this->private; + + handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); + + ret = posix_make_ancestryfromgfid (this, dirpath, PATH_MAX + 1, head, + type | POSIX_ANCESTRY_PATH, + leaf_inode->gfid, + handle_size, priv->base_path, + leaf_inode->table, &inode, xdata, + op_errno); + if (ret < 0) + goto out; + + + /* there is already a reference in loc->inode */ + inode_unref (inode); + + if ((type & POSIX_ANCESTRY_PATH) && (path != NULL)) { + if (strcmp (dirpath, "/")) + dirpath[strlen (dirpath) - 1] = '\0'; + + *path = gf_strdup (dirpath); + } + +out: + return ret; +} + +int32_t +posix_links_in_same_directory (char *dirpath, int count, inode_t *leaf_inode, + inode_t *parent, struct stat *stbuf, + gf_dirent_t *head, char **path, + int type, dict_t *xdata, int32_t *op_errno) +{ + int op_ret = -1; + gf_dirent_t *gf_entry = NULL; + xlator_t *this = NULL; + struct posix_private *priv = NULL; + DIR *dirp = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = {{0,},}; + char temppath[PATH_MAX] = {0,}; + char scr[PATH_MAX * 4] = {0,}; + + this = THIS; + + priv = this->private; + + dirp = sys_opendir (dirpath); + if (!dirp) { + *op_errno = errno; + gf_msg (this->name, GF_LOG_WARNING, errno, P_MSG_OPEN_FAILED, + "could not opendir %s", dirpath); + goto out; + } + + while (count > 0) { + errno = 0; + entry = sys_readdir (dirp, scratch); + if (!entry || errno != 0) + break; + + if (entry->d_ino != stbuf->st_ino) + continue; + + /* Linking an inode here, can cause a race in posix_acl. + Parent inode gets linked here, but before + it reaches posix_acl_readdirp_cbk, create/lookup can + come on a leaf-inode, as parent-inode-ctx not yet updated + in posix_acl_readdirp_cbk, create and lookup can fail + with EACCESS. So do the inode linking in the quota xlator + + linked_inode = inode_link (leaf_inode, parent, + entry->d_name, NULL); + + GF_ASSERT (linked_inode == leaf_inode); + inode_unref (linked_inode);*/ + + if (type & POSIX_ANCESTRY_DENTRY) { + loc_t loc = {0, }; + + loc.inode = inode_ref (leaf_inode); + gf_uuid_copy (loc.gfid, leaf_inode->gfid); + + (void) snprintf (temppath, sizeof(temppath), "%s/%s", + dirpath, entry->d_name); + + gf_entry = gf_dirent_for_name (entry->d_name); + gf_entry->inode = inode_ref (leaf_inode); + gf_entry->dict + = posix_xattr_fill (this, temppath, &loc, NULL, + -1, xdata, NULL); + iatt_from_stat (&(gf_entry->d_stat), stbuf); + + list_add_tail (&gf_entry->list, &head->list); + loc_wipe (&loc); + } + + if (type & POSIX_ANCESTRY_PATH) { + (void) snprintf (temppath, sizeof(temppath), "%s/%s", + &dirpath[priv->base_path_length], + entry->d_name); + if (!*path) { + *path = gf_strdup (temppath); + } else { + /* creating a colon separated */ + /* list of hard links */ + (void) snprintf (scr, sizeof(scr), "%s:%s", + *path, temppath); + + GF_FREE (*path); + *path = gf_strdup (scr); + } + if (!*path) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + } + + count--; + } + + op_ret = 0; +out: + if (dirp) { + op_ret = sys_closedir (dirp); + if (op_ret == -1) { + *op_errno = errno; + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_CLOSE_FAILED, "closedir failed"); + } + } + + return op_ret; +} + +int +posix_get_ancestry_non_directory (xlator_t *this, inode_t *leaf_inode, + gf_dirent_t *head, char **path, int type, + int32_t *op_errno, dict_t *xdata) +{ + size_t remaining_size = 0; + int op_ret = -1, pathlen = -1; + ssize_t handle_size = 0; + uuid_t pgfid = {0,}; + int nlink_samepgfid = 0; + struct stat stbuf = {0,}; + char *list = NULL; + int32_t list_offset = 0; + struct posix_private *priv = NULL; + ssize_t size = 0; + inode_t *parent = NULL; + loc_t *loc = NULL; + char *leaf_path = NULL; + char key[4096] = {0,}; + char dirpath[PATH_MAX] = {0,}; + char pgfidstr[UUID_CANONICAL_FORM_LEN+1] = {0,}; + + priv = this->private; + + loc = GF_CALLOC (1, sizeof (*loc), gf_posix_mt_char); + if (loc == NULL) { + op_ret = -1; + *op_errno = ENOMEM; + goto out; + } + + gf_uuid_copy (loc->gfid, leaf_inode->gfid); + + MAKE_INODE_HANDLE (leaf_path, this, loc, NULL); + if (!leaf_path) { + GF_FREE (loc); + *op_errno = ESTALE; + goto out; + } + GF_FREE (loc); + + size = sys_llistxattr (leaf_path, NULL, 0); + if (size == -1) { + *op_errno = errno; + if ((errno == ENOTSUP) || (errno == ENOSYS)) { + GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, + this->name, GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting brick" + " with 'user_xattr' flag)"); + + } else { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_XATTR_FAILED, "listxattr failed on" + "%s", leaf_path); + + } + + goto out; + } + + if (size == 0) { + op_ret = 0; + goto out; + } + + list = alloca (size); + if (!list) { + *op_errno = errno; + goto out; + } + + size = sys_llistxattr (leaf_path, list, size); + if (size < 0) { + op_ret = -1; + *op_errno = errno; + goto out; + } + remaining_size = size; + list_offset = 0; + + op_ret = sys_lstat (leaf_path, &stbuf); + if (op_ret == -1) { + *op_errno = errno; + gf_msg (this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED, + "lstat failed on %s", leaf_path); + goto out; + } + + while (remaining_size > 0) { + strncpy (key, list + list_offset, sizeof(key)-1); + key[sizeof(key)-1] = '\0'; + if (strncmp (key, PGFID_XATTR_KEY_PREFIX, + strlen (PGFID_XATTR_KEY_PREFIX)) != 0) + goto next; + + op_ret = sys_lgetxattr (leaf_path, key, + &nlink_samepgfid, + sizeof(nlink_samepgfid)); + if (op_ret == -1) { + *op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_XATTR_FAILED, "getxattr failed on " + "%s: key = %s ", leaf_path, key); + goto out; + } + + nlink_samepgfid = ntoh32 (nlink_samepgfid); + + strncpy (pgfidstr, key + strlen(PGFID_XATTR_KEY_PREFIX), + sizeof(pgfidstr)-1); + pgfidstr[sizeof(pgfidstr)-1] = '\0'; + gf_uuid_parse (pgfidstr, pgfid); + + handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); + + /* constructing the absolute real path of parent dir */ + strncpy (dirpath, priv->base_path, sizeof(dirpath)-1); + dirpath[sizeof(dirpath)-1] = '\0'; + pathlen = PATH_MAX + 1 - priv->base_path_length; + + op_ret = posix_make_ancestryfromgfid (this, + dirpath + priv->base_path_length, + pathlen, + head, + type | POSIX_ANCESTRY_PATH, + pgfid, + handle_size, + priv->base_path, + leaf_inode->table, + &parent, xdata, op_errno); + if (op_ret < 0) { + goto next; + } + + dirpath[strlen (dirpath) - 1] = '\0'; + + posix_links_in_same_directory (dirpath, nlink_samepgfid, + leaf_inode, parent, &stbuf, head, + path, type, xdata, op_errno); + + if (parent != NULL) { + inode_unref (parent); + parent = NULL; + } + + next: + remaining_size -= strlen (key) + 1; + list_offset += strlen (key) + 1; + } /* while (remaining_size > 0) */ + + op_ret = 0; + +out: + return op_ret; +} + +int +posix_get_ancestry (xlator_t *this, inode_t *leaf_inode, + gf_dirent_t *head, char **path, int type, int32_t *op_errno, + dict_t *xdata) +{ + int ret = -1; + struct posix_private *priv = NULL; + + priv = this->private; + + if (IA_ISDIR (leaf_inode->ia_type)) { + ret = posix_get_ancestry_directory (this, leaf_inode, + head, path, type, op_errno, + xdata); + } else { + + if (!priv->update_pgfid_nlinks) + goto out; + ret = posix_get_ancestry_non_directory (this, leaf_inode, + head, path, type, + op_errno, xdata); + } + +out: + if (ret && path && *path) { + GF_FREE (*path); + *path = NULL; + } + + return ret; +} + +/** + * posix_getxattr - this function returns a dictionary with all the + * key:value pair present as xattr. used for + * both 'listxattr' and 'getxattr'. + */ +int32_t +posix_getxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name, dict_t *xdata) +{ + struct posix_private *priv = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + char *value = NULL; + char *real_path = NULL; + dict_t *dict = NULL; + char *file_contents = NULL; + int ret = -1; + char *path = NULL; + char *rpath = NULL; + ssize_t size = 0; + char *list = NULL; + int32_t list_offset = 0; + size_t remaining_size = 0; + char *host_buf = NULL; + char *keybuffer = NULL; + char *value_buf = NULL; + gf_boolean_t have_val = _gf_false; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + + op_ret = -1; + priv = this->private; + + ret = posix_handle_georep_xattrs (frame, name, &op_errno, _gf_true); + if (ret == -1) { + op_ret = -1; + /* errno should be set from the above function*/ + goto out; + } + + if (name && posix_is_gfid2path_xattr (name)) { + op_ret = -1; + op_errno = ENOATTR; + goto out; + } + + if (loc->inode && IA_ISDIR(loc->inode->ia_type) && name && + ZR_FILE_CONTENT_REQUEST(name)) { + ret = posix_get_file_contents (this, loc->gfid, &name[15], + &file_contents); + if (ret < 0) { + op_errno = -ret; + gf_msg (this->name, GF_LOG_ERROR, op_errno, + P_MSG_FILE_FAILED, "getting file contents" + "failed"); + goto out; + } + } + + dict = dict_new (); + if (!dict) { + op_errno = ENOMEM; + goto out; + } + + if (loc->inode && name && GF_POSIX_ACL_REQUEST (name)) { + ret = posix_pacl_get (real_path, name, &value); + if (ret || !value) { + op_errno = errno; + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_ACL_FAILED, "could not get acl (%s) for" + "%s", name, real_path); + op_ret = -1; + goto out; + } + + ret = dict_set_dynstr (dict, (char *)name, value); + if (ret < 0) { + GF_FREE (value); + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_ACL_FAILED, "could not set acl (%s) for" + "%s in dictionary", name, real_path); + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + size = ret; + goto done; + } + + if (loc->inode && name && + (strncmp (name, GF_XATTR_GET_REAL_FILENAME_KEY, + strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) { + ret = posix_xattr_get_real_filename (frame, this, loc, + name, dict, xdata); + if (ret < 0) { + op_ret = -1; + op_errno = -ret; + if (op_errno == ENOENT) { + gf_msg_debug (this->name, 0, "Failed to get " + "real filename (%s, %s)", + loc->path, name); + } else { + gf_msg (this->name, GF_LOG_WARNING, op_errno, + P_MSG_GETTING_FILENAME_FAILED, + "Failed to get real filename (%s, %s):" + , loc->path, name); + } + goto out; + } + + size = ret; + goto done; + } + + if (loc->inode && name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) { + if (!fd_list_empty (loc->inode)) { + ret = dict_set_uint32 (dict, (char *)name, 1); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_DICT_SET_FAILED, "Failed to set " + "dictionary value for %s", name); + op_errno = ENOMEM; + goto out; + } + } else { + ret = dict_set_uint32 (dict, (char *)name, 0); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_DICT_SET_FAILED, "Failed to set " + "dictionary value for %s", name); + op_errno = ENOMEM; + goto out; + } + } + goto done; + } + if (loc->inode && name && (XATTR_IS_PATHINFO (name))) { + if (LOC_HAS_ABSPATH (loc)) + MAKE_REAL_PATH (rpath, this, loc->path); + else + rpath = real_path; + + size = gf_asprintf (&host_buf, "<POSIX(%s):%s:%s>", + priv->base_path, + ((priv->node_uuid_pathinfo && + !gf_uuid_is_null(priv->glusterd_uuid)) + ? uuid_utoa (priv->glusterd_uuid) + : priv->hostname), rpath); + if (size < 0) { + op_errno = ENOMEM; + goto out; + } + ret = dict_set_dynstr (dict, (char *)name, host_buf); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_DICT_SET_FAILED, "could not set value" + " (%s) in dictionary", host_buf); + GF_FREE (host_buf); + op_errno = ENOMEM; + goto out; + } + + goto done; + } + + if (loc->inode && name && + (strcmp (name, GF_XATTR_NODE_UUID_KEY) == 0) + && !gf_uuid_is_null (priv->glusterd_uuid)) { + size = gf_asprintf (&host_buf, "%s", + uuid_utoa (priv->glusterd_uuid)); + if (size == -1) { + op_errno = ENOMEM; + goto out; + } + ret = dict_set_dynstr (dict, GF_XATTR_NODE_UUID_KEY, + host_buf); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, -ret, + P_MSG_DICT_SET_FAILED, "could not set value" + "(%s) in dictionary", host_buf); + GF_FREE (host_buf); + op_errno = -ret; + goto out; + } + goto done; + } + + if (loc->inode && name && + (strcmp (name, GFID_TO_PATH_KEY) == 0)) { + ret = inode_path (loc->inode, NULL, &path); + if (ret < 0) { + op_errno = -ret; + gf_msg (this->name, GF_LOG_WARNING, op_errno, + P_MSG_INODE_PATH_GET_FAILED, + "%s: could not get " + "inode path", uuid_utoa (loc->inode->gfid)); + goto out; + } + + size = ret; + ret = dict_set_dynstr (dict, GFID_TO_PATH_KEY, path); + if (ret < 0) { + op_errno = ENOMEM; + GF_FREE (path); + goto out; + } + goto done; + } + + if (loc->inode && name && + (strcmp (name, GFID2PATH_VIRT_XATTR_KEY) == 0)) { + if (!priv->gfid2path) { + op_errno = ENOATTR; + op_ret = -1; + goto out; + } + ret = posix_get_gfid2path (this, loc->inode, real_path, + &op_errno, dict); + if (ret < 0) { + op_ret = -1; + goto out; + } + size = ret; + goto done; + } + + if (loc->inode && name + && (strcmp (name, GET_ANCESTRY_PATH_KEY) == 0)) { + int type = POSIX_ANCESTRY_PATH; + + op_ret = posix_get_ancestry (this, loc->inode, NULL, + &path, type, &op_errno, + xdata); + if (op_ret < 0) { + op_ret = -1; + op_errno = ENODATA; + goto out; + } + size = op_ret; + op_ret = dict_set_dynstr (dict, GET_ANCESTRY_PATH_KEY, path); + if (op_ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, -op_ret, + P_MSG_GET_KEY_VALUE_FAILED, "could not get " + "value for key (%s)", GET_ANCESTRY_PATH_KEY); + GF_FREE (path); + op_errno = ENOMEM; + goto out; + } + + goto done; + } + + if (loc->inode && name + && (strncmp (name, GLUSTERFS_GET_OBJECT_SIGNATURE, + strlen (GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) { + op_ret = posix_get_objectsignature (real_path, dict); + if (op_ret < 0) { + op_errno = -op_ret; + goto out; + } + + goto done; + } + + /* here allocate value_buf of 8192 bytes to avoid one extra getxattr + call,If buffer size is small to hold the xattr result then it will + allocate a new buffer value of required size and call getxattr again + */ + + value_buf = alloca (XATTR_VAL_BUF_SIZE); + if (name) { + char *key = (char *)name; + + keybuffer = key; +#if defined(GF_DARWIN_HOST_OS_DISABLED) + if (priv->xattr_user_namespace == XATTR_STRIP) { + if (strncmp(key, "user.",5) == 0) { + key += 5; + gf_msg_debug (this->name, 0, "getxattr for file %s" + " stripping user key: %s -> %s", + real_path, keybuffer, key); + } + } +#endif + memset (value_buf, '\0', XATTR_VAL_BUF_SIZE); + size = sys_lgetxattr (real_path, key, value_buf, + XATTR_VAL_BUF_SIZE-1); + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg (this->name, GF_LOG_INFO, errno, + P_MSG_XATTR_FAILED, + "getxattr failed due to overflow of buffer" + " on %s: %s ", real_path, key); + size = sys_lgetxattr (real_path, key, NULL, 0); + } + if (size == -1) { + op_errno = errno; + if ((op_errno == ENOTSUP) || + (op_errno == ENOSYS)) { + GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, + this->name, + GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting" + " brick with 'user_xattr' " + "flag)"); + } + if ((op_errno == ENOATTR) || + (op_errno == ENODATA)) { + gf_msg_debug (this->name, 0, + "No such attribute:%s for file %s", + key, real_path); + } else { + gf_msg (this->name, GF_LOG_ERROR, + op_errno, P_MSG_XATTR_FAILED, + "getxattr failed on %s: %s ", + real_path, key); + } + goto out; + } + } + value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); + if (!value) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + if (have_val) { + memcpy (value, value_buf, size); + } else { + size = sys_lgetxattr (real_path, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_XATTR_FAILED, + "getxattr failed on %s: key = %s", + real_path, key); + GF_FREE (value); + goto out; + } + } + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); + if (op_ret < 0) { + op_errno = -op_ret; + gf_msg (this->name, GF_LOG_ERROR, op_errno, + P_MSG_DICT_SET_FAILED, "dict set operation " + "on %s for the key %s failed.", real_path, key); + GF_FREE (value); + goto out; + } + + goto done; + } + + have_val = _gf_false; + memset (value_buf, '\0', XATTR_VAL_BUF_SIZE); + size = sys_llistxattr (real_path, value_buf, XATTR_VAL_BUF_SIZE-1); + if (size > 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg (this->name, GF_LOG_INFO, errno, + P_MSG_XATTR_FAILED, + "listxattr failed due to overflow of buffer" + " on %s ", real_path); + size = sys_llistxattr (real_path, NULL, 0); + } + if (size == -1) { + op_errno = errno; + if ((errno == ENOTSUP) || (errno == ENOSYS)) { + GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, + this->name, GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting" + " brick with 'user_xattr' " + "flag)"); + } else { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_XATTR_FAILED, + "listxattr failed on %s", real_path); + } + goto out; + } + if (size == 0) + goto done; + } + list = alloca (size); + if (!list) { + op_errno = errno; + goto out; + } + if (have_val) { + memcpy (list, value_buf, size); + } else { + size = sys_llistxattr (real_path, list, size); + if (size < 0) { + op_ret = -1; + op_errno = errno; + goto out; + } + } + remaining_size = size; + list_offset = 0; + keybuffer = alloca (XATTR_KEY_BUF_SIZE); + while (remaining_size > 0) { + strncpy (keybuffer, list + list_offset, XATTR_KEY_BUF_SIZE-1); + keybuffer[XATTR_KEY_BUF_SIZE-1] = '\0'; + + ret = posix_handle_georep_xattrs (frame, keybuffer, NULL, + _gf_false); + if (ret == -1) + goto ignore; + + if (posix_is_gfid2path_xattr (keybuffer)) { + goto ignore; + } + + memset (value_buf, '\0', XATTR_VAL_BUF_SIZE); + have_val = _gf_false; + size = sys_lgetxattr (real_path, keybuffer, value_buf, + XATTR_VAL_BUF_SIZE-1); + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg (this->name, GF_LOG_INFO, op_errno, + P_MSG_XATTR_FAILED, + "getxattr failed due to overflow of" + " buffer on %s: %s ", real_path, + keybuffer); + size = sys_lgetxattr (real_path, keybuffer, + NULL, 0); + } + if (size == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_XATTR_FAILED, "getxattr failed on" + " %s: key = %s ", real_path, keybuffer); + goto out; + } + } + value = GF_CALLOC (size + 1, sizeof(char), + gf_posix_mt_char); + if (!value) { + op_errno = errno; + goto out; + } + if (have_val) { + memcpy (value, value_buf, size); + } else { + size = sys_lgetxattr (real_path, keybuffer, value, size); + if (size == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_XATTR_FAILED, "getxattr failed on" + " %s: key = %s ", real_path, keybuffer); + GF_FREE (value); + goto out; + } + } + value [size] = '\0'; +#ifdef GF_DARWIN_HOST_OS + /* The protocol expect namespace for now */ + char *newkey = NULL; + gf_add_prefix (XATTR_USER_PREFIX, keybuffer, &newkey); + strncpy (keybuffer, newkey, sizeof(keybuffer)); + GF_FREE (newkey); +#endif + op_ret = dict_set_dynptr (dict, keybuffer, value, size); + if (op_ret < 0) { + op_errno = -op_ret; + gf_msg (this->name, GF_LOG_ERROR, op_errno, + P_MSG_DICT_SET_FAILED, "dict set operation " + "on %s for the key %s failed.", real_path, + keybuffer); + GF_FREE (value); + goto out; + } + +ignore: + remaining_size -= strlen (keybuffer) + 1; + list_offset += strlen (keybuffer) + 1; + + } /* while (remaining_size > 0) */ + +done: + op_ret = size; + + if (dict) { + dict_del (dict, GFID_XATTR_KEY); + dict_del (dict, GF_XATTR_VOL_ID_KEY); + } + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, NULL); + + if (dict) { + dict_unref (dict); + } + + return 0; +} + + +int32_t +posix_fgetxattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *name, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + struct posix_fd * pfd = NULL; + int _fd = -1; + int32_t list_offset = 0; + ssize_t size = 0; + size_t remaining_size = 0; + char * value = NULL; + char * list = NULL; + dict_t * dict = NULL; + int ret = -1; + char key[4096] = {0,}; + char *value_buf = NULL; + gf_boolean_t have_val = _gf_false; + + DECLARE_OLD_FS_ID_VAR; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + SET_FS_ID (frame->root->uid, frame->root->gid); + + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (ret < 0) { + op_ret = -1; + gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + _fd = pfd->fd; + + /* Get the total size */ + dict = dict_new (); + if (!dict) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + if (name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) { + ret = dict_set_uint32 (dict, (char *)name, 1); + if (ret < 0) { + op_ret = -1; + size = -1; + op_errno = ENOMEM; + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_DICT_SET_FAILED, "Failed to set " + "dictionary value for %s", name); + goto out; + } + goto done; + } + + if (name && strncmp (name, GLUSTERFS_GET_OBJECT_SIGNATURE, + strlen (GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0) { + op_ret = posix_fdget_objectsignature (_fd, dict); + if (op_ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, 0, 0, + "posix_fdget_objectsignature failed"); + op_errno = -op_ret; + op_ret = -1; + size = -1; + goto out; + } + + goto done; + } + + /* here allocate value_buf of 8192 bytes to avoid one extra getxattr + call,If buffer size is small to hold the xattr result then it will + allocate a new buffer value of required size and call getxattr again + */ + value_buf = alloca (XATTR_VAL_BUF_SIZE); + memset (value_buf, '\0', XATTR_VAL_BUF_SIZE); + + if (name) { + strncpy (key, name, sizeof(key)); +#ifdef GF_DARWIN_HOST_OS + struct posix_private *priv = NULL; + priv = this->private; + if (priv->xattr_user_namespace == XATTR_STRIP) { + char *newkey = NULL; + gf_add_prefix (XATTR_USER_PREFIX, key, &newkey); + strncpy (key, newkey, sizeof(key)); + GF_FREE (newkey); + } +#endif + size = sys_fgetxattr (_fd, key, value_buf, + XATTR_VAL_BUF_SIZE-1); + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg (this->name, GF_LOG_INFO, errno, + P_MSG_XATTR_FAILED, + "fgetxattr failed due to overflow of" + "buffer on %s ", key); + size = sys_fgetxattr (_fd, key, NULL, 0); + } + if (size == -1) { + op_errno = errno; + if (errno == ENODATA || errno == ENOATTR) { + gf_msg_debug (this->name, 0, "fgetxattr" + " failed on key %s (%s)", + key, strerror (op_errno)); + } else { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_XATTR_FAILED, "fgetxattr" + " failed on key %s", key); + } + goto done; + } + } + value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); + if (!value) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + if (have_val) { + memcpy (value, value_buf, size); + } else { + size = sys_fgetxattr (_fd, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_XATTR_FAILED, "fgetxattr" + " failed on fd %p for the key %s ", + fd, key); + GF_FREE (value); + goto out; + } + } + + value [size] = '\0'; + op_ret = dict_set_dynptr (dict, key, value, size); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_DICT_SET_FAILED, "dict set operation " + "on key %s failed", key); + GF_FREE (value); + goto out; + } + + goto done; + } + memset (value_buf, '\0', XATTR_VAL_BUF_SIZE); + size = sys_flistxattr (_fd, value_buf, XATTR_VAL_BUF_SIZE-1); + if (size > 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg (this->name, GF_LOG_INFO, errno, + P_MSG_XATTR_FAILED, + "listxattr failed due to overflow of buffer" + " on %p ", fd); + size = sys_flistxattr (_fd, NULL, 0); + } + if (size == -1) { + op_ret = -1; + op_errno = errno; + if ((errno == ENOTSUP) || (errno == ENOSYS)) { + GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, + this->name, GF_LOG_WARNING, + "Extended attributes not " + "supported (try remounting " + "brick with 'user_xattr' flag)"); + } else { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_XATTR_FAILED, "listxattr failed " + "on %p:", fd); + } + goto out; + } + if (size == 0) + goto done; + } + list = alloca (size + 1); + if (!list) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + if (have_val) + memcpy (list, value_buf, size); + else + size = sys_flistxattr (_fd, list, size); + + remaining_size = size; + list_offset = 0; + while (remaining_size > 0) { + if(*(list + list_offset) == '\0') + break; + + strncpy (key, list + list_offset, sizeof(key)); + memset (value_buf, '\0', XATTR_VAL_BUF_SIZE); + have_val = _gf_false; + size = sys_fgetxattr (_fd, key, value_buf, + XATTR_VAL_BUF_SIZE-1); + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg (this->name, GF_LOG_INFO, errno, + P_MSG_XATTR_FAILED, + "fgetxattr failed due to overflow of buffer" + " on fd %p: for the key %s ", fd, key); + size = sys_fgetxattr (_fd, key, NULL, 0); + } + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_XATTR_FAILED, "fgetxattr failed " + "on fd %p for the key %s ", fd, key); + break; + } + } + value = GF_CALLOC (size + 1, sizeof(char), + gf_posix_mt_char); + if (!value) { + op_ret = -1; + op_errno = errno; + goto out; + } + if (have_val) { + memcpy (value, value_buf, size); + } else { + size = sys_fgetxattr (_fd, key, value, size); + if (size == -1) { + op_ret = -1; + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_XATTR_FAILED, "fgetxattr failed o" + "n the fd %p for the key %s ", fd, key); + GF_FREE (value); + break; + } + } + value [size] = '\0'; + + op_ret = dict_set_dynptr (dict, key, value, size); + if (op_ret) { + op_errno = -op_ret; + op_ret = -1; + gf_msg (this->name, GF_LOG_ERROR, 0, + P_MSG_DICT_SET_FAILED, "dict set operation " + "failed on key %s", key); + GF_FREE (value); + goto out; + } + remaining_size -= strlen (key) + 1; + list_offset += strlen (key) + 1; + + } /* while (remaining_size > 0) */ + +done: + op_ret = size; + + if (dict) { + dict_del (dict, GFID_XATTR_KEY); + dict_del (dict, GF_XATTR_VOL_ID_KEY); + } + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL); + + if (dict) + dict_unref (dict); + + return 0; +} + +static int +_handle_fsetxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) +{ + posix_xattr_filler_t *filler = NULL; + + filler = tmp; + + return posix_fhandle_pair (filler->this, filler->fdnum, k, v, + filler->flags, filler->stbuf); +} + +int32_t +posix_fsetxattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, dict_t *dict, int flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + struct posix_fd *pfd = NULL; + int _fd = -1; + int ret = -1; + struct iatt stbuf = {0,}; + dict_t *xattr = NULL; + posix_xattr_filler_t filler = {0,}; + struct posix_private *priv = NULL; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + VALIDATE_OR_GOTO (dict, out); + + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); + + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + _fd = pfd->fd; + + ret = posix_fdstat (this, pfd->fd, &stbuf); + if (ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, op_errno, + P_MSG_FSTAT_FAILED, "fsetxattr (fstat)" + "failed on fd=%p", fd); + goto out; + } + + dict_del (dict, GFID_XATTR_KEY); + dict_del (dict, GF_XATTR_VOL_ID_KEY); + + filler.fdnum = _fd; + filler.this = this; + filler.stbuf = &stbuf; +#ifdef GF_DARWIN_HOST_OS + filler.flags = map_xattr_flags(flags); +#else + filler.flags = flags; +#endif + op_ret = dict_foreach (dict, _handle_fsetxattr_keyvalue_pair, + &filler); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + } + + if (!ret && xdata && dict_get (xdata, GLUSTERFS_DURABLE_OP)) { + op_ret = sys_fsync (_fd); + if (op_ret < 0) { + op_ret = -1; + op_errno = errno; + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_DURABILITY_REQ_NOT_SATISFIED, + "could not satisfy durability request: " + "reason "); + } + } + + if (xdata && dict_get (xdata, DHT_IATT_IN_XDATA_KEY)) { + ret = posix_fdstat (this, pfd->fd, &stbuf); + if (ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, op_errno, + P_MSG_XATTR_FAILED, "fsetxattr (fstat)" + "failed on fd=%p", fd); + goto out; + } + + xattr = dict_new (); + if (!xattr) + goto out; + ret = posix_set_iatt_in_dict (xattr, &stbuf); + } + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xattr); + + if (xattr) + dict_unref (xattr); + + return 0; +} + +int +_posix_remove_xattr (dict_t *dict, char *key, data_t *value, void *data) +{ + int32_t op_ret = 0; + xlator_t *this = NULL; + posix_xattr_filler_t *filler = NULL; + + filler = (posix_xattr_filler_t *) data; + this = filler->this; +#ifdef GF_DARWIN_HOST_OS + struct posix_private *priv = NULL; + priv = (struct posix_private *) this->private; + char *newkey = NULL; + if (priv->xattr_user_namespace == XATTR_STRIP) { + gf_remove_prefix (XATTR_USER_PREFIX, key, &newkey); + gf_msg_debug ("remove_xattr", 0, "key %s => %s" , key, + newkey); + key = newkey; + } +#endif + /* Bulk remove xattr is internal fop in gluster. Some of the xattrs may + * have special behavior. Ex: removexattr("posix.system_acl_access"), + * removes more than one xattr on the file that could be present in the + * bulk-removal request. Removexattr of these deleted xattrs will fail + * with either ENODATA/ENOATTR. Since all this fop cares is removal of the + * xattrs in bulk-remove request and if they are already deleted, it can be + * treated as success. + */ + + if (filler->real_path) + op_ret = sys_lremovexattr (filler->real_path, key); + else + op_ret = sys_fremovexattr (filler->fdnum, key); + + if (op_ret == -1) { + if (errno == ENODATA || errno == ENOATTR) + op_ret = 0; + } + + if (op_ret == -1) { + filler->op_errno = errno; + if (errno != ENOATTR && errno != ENODATA && errno != EPERM) { + gf_msg (this->name, GF_LOG_ERROR, errno, + P_MSG_XATTR_FAILED, "removexattr failed on " + "file/dir %s with gfid: %s (for %s)", + filler->real_path?filler->real_path:"", + uuid_utoa (filler->inode->gfid), key); + } + } +#ifdef GF_DARWIN_HOST_OS + GF_FREE(newkey); +#endif + return op_ret; +} + +int +posix_common_removexattr (call_frame_t *frame, loc_t *loc, fd_t *fd, + const char *name, dict_t *xdata, int *op_errno, + dict_t **xdata_rsp) +{ + gf_boolean_t bulk_removexattr = _gf_false; + gf_boolean_t disallow = _gf_false; + char *real_path = NULL; + struct posix_fd *pfd = NULL; + int op_ret = 0; + struct iatt stbuf = {0}; + int ret = 0; + int _fd = -1; + xlator_t *this = frame->this; + inode_t *inode = NULL; + posix_xattr_filler_t filler = {0}; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); + + if (loc) { + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + if (!real_path) { + op_ret = -1; + *op_errno = ESTALE; + goto out; + } + inode = loc->inode; + } else { + op_ret = posix_fd_ctx_get (fd, this, &pfd, op_errno); + if (op_ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, *op_errno, + P_MSG_PFD_NULL, "pfd is NULL from fd=%p", fd); + goto out; + } + _fd = pfd->fd; + inode = fd->inode; + } + + if (posix_is_gfid2path_xattr (name)) { + op_ret = -1; + *op_errno = ENOATTR; + goto out; + } + + if (gf_get_index_by_elem (disallow_removexattrs, (char *)name) >= 0) { + gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED, + "Remove xattr called on %s for file/dir %s with gfid: " + "%s", name, real_path?real_path:"", + uuid_utoa(inode->gfid)); + op_ret = -1; + *op_errno = EPERM; + goto out; + } else if (posix_is_bulk_removexattr ((char *)name, xdata)) { + bulk_removexattr = _gf_true; + (void) dict_has_key_from_array (xdata, disallow_removexattrs, + &disallow); + if (disallow) { + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_XATTR_NOT_REMOVED, + "Bulk removexattr has keys that shouldn't be " + "removed for file/dir %s with gfid: %s", + real_path?real_path:"", uuid_utoa(inode->gfid)); + op_ret = -1; + *op_errno = EPERM; + goto out; + } + } + + if (bulk_removexattr) { + filler.real_path = real_path; + filler.this = this; + filler.fdnum = _fd; + filler.inode = inode; + op_ret = dict_foreach (xdata, _posix_remove_xattr, &filler); + if (op_ret) { + *op_errno = filler.op_errno; + goto out; + } + } else { + if (loc) + op_ret = sys_lremovexattr (real_path, name); + else + op_ret = sys_fremovexattr (_fd, name); + if (op_ret == -1) { + *op_errno = errno; + if (*op_errno != ENOATTR && *op_errno != ENODATA && + *op_errno != EPERM) { + gf_msg (this->name, GF_LOG_ERROR, *op_errno, + P_MSG_XATTR_FAILED, + "removexattr on %s with gfid %s " + "(for %s)", real_path, + uuid_utoa (inode->gfid), name); + } + goto out; + } + } + + if (xdata && dict_get (xdata, DHT_IATT_IN_XDATA_KEY)) { + if (loc) + ret = posix_pstat(this, loc->gfid, real_path, &stbuf); + else + ret = posix_fdstat (this, _fd, &stbuf); + if (ret) + goto out; + *xdata_rsp = dict_new(); + if (!*xdata_rsp) + goto out; + + ret = posix_set_iatt_in_dict (*xdata_rsp, &stbuf); + } + op_ret = 0; +out: + SET_TO_OLD_FS_ID (); + return op_ret; +} + +int32_t +posix_removexattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name, dict_t *xdata) +{ + int op_ret = 0; + int op_errno = 0; + dict_t *xdata_rsp = NULL; + + op_ret = posix_common_removexattr (frame, loc, NULL, name, xdata, + &op_errno, &xdata_rsp); + STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata_rsp); + + if (xdata_rsp) + dict_unref (xdata_rsp); + + return 0; +} + +int32_t +posix_fremovexattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *name, dict_t *xdata) +{ + int32_t op_ret = 0; + int32_t op_errno = 0; + dict_t *xdata_rsp = NULL; + + op_ret = posix_common_removexattr (frame, NULL, fd, name, xdata, + &op_errno, &xdata_rsp); + STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata_rsp); + + if (xdata_rsp) + dict_unref (xdata_rsp); + + return 0; +} + + +int32_t +posix_fsyncdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, int datasync, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int ret = -1; + struct posix_fd *pfd = NULL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + goto out; + } + + op_ret = 0; + +out: + STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, NULL); + + return 0; +} + + +void +posix_print_xattr (dict_t *this, + char *key, + data_t *value, + void *data) +{ + gf_msg_debug ("posix", 0, + "(key/val) = (%s/%d)", key, data_to_int32 (value)); +} + + +/** + * add_array - add two arrays of 32-bit numbers (stored in network byte order) + * dest = dest + src + * @count: number of 32-bit numbers + * FIXME: handle overflow + */ + +static void +__add_array (int32_t *dest, int32_t *src, int count) +{ + int i = 0; + int32_t destval = 0; + for (i = 0; i < count; i++) { + destval = ntoh32 (dest[i]); + dest[i] = hton32 (destval + ntoh32 (src[i])); + } +} + +static void +__add_long_array (int64_t *dest, int64_t *src, int count) +{ + int i = 0; + for (i = 0; i < count; i++) { + dest[i] = hton64 (ntoh64 (dest[i]) + ntoh64 (src[i])); + } +} + + +/* functions: + __add_array_with_default + __add_long_array_with_default + + xattrop type: + GF_XATTROP_ADD_ARRAY_WITH_DEFAULT + GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT + + These operations are similar to 'GF_XATTROP_ADD_ARRAY', + except that it adds a default value if xattr is missing + or its value is zero on disk. + + One use-case of this operation is in inode-quota. + When a new directory is created, its default dir_count + should be set to 1. So when a xattrop performed setting + inode-xattrs, it should account initial dir_count + 1 if the xattrs are not present + + Here is the usage of this operation + + value required in xdata for each key + struct array { + int32_t newvalue_1; + int32_t newvalue_2; + ... + int32_t newvalue_n; + int32_t default_1; + int32_t default_2; + ... + int32_t default_n; + }; + + or + + struct array { + int32_t value_1; + int32_t value_2; + ... + int32_t value_n; + } data[2]; + fill data[0] with new value to add + fill data[1] with default value + + xattrop GF_XATTROP_ADD_ARRAY_WITH_DEFAULT + for i from 1 to n + { + if (xattr (dest_i) is zero or not set in the disk) + dest_i = newvalue_i + default_i + else + dest_i = dest_i + newvalue_i + } + + value in xdata after xattrop is successful + struct array { + int32_t dest_1; + int32_t dest_2; + ... + int32_t dest_n; + }; +*/ +static void +__add_array_with_default (int32_t *dest, int32_t *src, int count) +{ + int i = 0; + int32_t destval = 0; + + for (i = 0; i < count; i++) { + destval = ntoh32 (dest[i]); + if (destval == 0) + dest[i] = hton32 (ntoh32 (src[i]) + + ntoh32 (src[count + i])); + else + dest[i] = hton32 (destval + ntoh32 (src[i])); + } +} + +static void +__add_long_array_with_default (int64_t *dest, int64_t *src, int count) +{ + int i = 0; + int64_t destval = 0; + + for (i = 0; i < count; i++) { + destval = ntoh64 (dest[i]); + if (destval == 0) + dest[i] = hton64 (ntoh64 (src[i]) + + ntoh64 (src[i + count])); + else + dest[i] = hton64 (destval + ntoh64 (src[i])); + } +} + +static int +_posix_handle_xattr_keyvalue_pair (dict_t *d, char *k, data_t *v, + void *tmp) +{ + int size = 0; + int count = 0; + int op_ret = 0; + int op_errno = 0; + gf_xattrop_flags_t optype = 0; + char *array = NULL; + char *dst_data = NULL; + inode_t *inode = NULL; + xlator_t *this = NULL; + posix_xattr_filler_t *filler = NULL; + posix_inode_ctx_t *ctx = NULL; + + filler = tmp; + + optype = (gf_xattrop_flags_t)(filler->flags); + this = filler->this; + inode = filler->inode; + count = v->len; + if (optype == GF_XATTROP_ADD_ARRAY_WITH_DEFAULT || + optype == GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT) + count = count / 2; + + array = GF_CALLOC (count, sizeof (char), gf_posix_mt_char); + +#ifdef GF_DARWIN_HOST_OS + struct posix_private *priv = NULL; + priv = this->private; + if (priv->xattr_user_namespace == XATTR_STRIP) { + if (strncmp(k, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) == 0) { + k += XATTR_USER_PREFIX_LEN; + } + } +#endif + op_ret = posix_inode_ctx_get_all (inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + + pthread_mutex_lock (&ctx->xattrop_lock); + { + if (filler->real_path) { + size = sys_lgetxattr (filler->real_path, k, + (char *)array, count); + } else { + size = sys_fgetxattr (filler->fdnum, k, (char *)array, + count); + } + + op_errno = errno; + if ((size == -1) && (op_errno != ENODATA) && + (op_errno != ENOATTR)) { + if (op_errno == ENOTSUP) { + GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, + this->name, GF_LOG_WARNING, + "Extended attributes not " + "supported by filesystem"); + } else if (op_errno != ENOENT || + !posix_special_xattr (marker_xattrs, + k)) { + if (filler->real_path) + gf_msg (this->name, fop_log_level (GF_FOP_XATTROP, + op_errno), op_errno, P_MSG_XATTR_FAILED, + "getxattr failed on %s while " + "doing xattrop: Key:%s ", + filler->real_path, k); + else + gf_msg (this->name, GF_LOG_ERROR, + op_errno, P_MSG_XATTR_FAILED, + "fgetxattr failed on gfid=%s " + "while doing xattrop: " + "Key:%s (%s)", + uuid_utoa (filler->inode->gfid), + k, strerror (op_errno)); + } + + op_ret = -1; + goto unlock; + } + + if (size == -1 && optype == GF_XATTROP_GET_AND_SET) { + GF_FREE (array); + array = NULL; + } + + /* We only write back the xattr if it has been really modified + * (i.e. v->data is not all 0's). Otherwise we return its value + * but we don't update anything. + * + * If the xattr does not exist, a value of all 0's is returned + * without creating it. */ + size = count; + if (optype != GF_XATTROP_GET_AND_SET && + mem_0filled(v->data, v->len) == 0) + goto unlock; + + dst_data = array; + switch (optype) { + + case GF_XATTROP_ADD_ARRAY: + __add_array ((int32_t *) array, + (int32_t *) v->data, count / 4); + break; + + case GF_XATTROP_ADD_ARRAY64: + __add_long_array ((int64_t *) array, + (int64_t *) v->data, + count / 8); + break; + + case GF_XATTROP_ADD_ARRAY_WITH_DEFAULT: + __add_array_with_default ((int32_t *) array, + (int32_t *) v->data, + count / 4); + break; + + case GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT: + __add_long_array_with_default ((int64_t *) array, + (int64_t *) v->data, + count / 8); + break; + + case GF_XATTROP_GET_AND_SET: + dst_data = v->data; + break; + + default: + gf_msg (this->name, GF_LOG_ERROR, EINVAL, + P_MSG_UNKNOWN_OP, "Unknown xattrop type (%d)" + " on %s. Please send a bug report to " + "gluster-devel@gluster.org", optype, + filler->real_path); + op_ret = -1; + op_errno = EINVAL; + goto unlock; + } + + if (filler->real_path) { + size = sys_lsetxattr (filler->real_path, k, + dst_data, count, 0); + } else { + size = sys_fsetxattr (filler->fdnum, k, + (char *)dst_data, + count, 0); + } + op_errno = errno; + } +unlock: + pthread_mutex_unlock (&ctx->xattrop_lock); + + if (op_ret == -1) + goto out; + + if (size == -1) { + if (filler->real_path) + gf_msg (this->name, GF_LOG_ERROR, op_errno, + P_MSG_XATTR_FAILED, "setxattr failed on %s " + "while doing xattrop: key=%s", + filler->real_path, k); + else + gf_msg (this->name, GF_LOG_ERROR, op_errno, + P_MSG_XATTR_FAILED, + "fsetxattr failed on gfid=%s while doing " + "xattrop: key=%s (%s)", + uuid_utoa (filler->inode->gfid), + k, strerror (op_errno)); + op_ret = -1; + goto out; + } else if (array) { + op_ret = dict_set_bin (filler->xattr, k, array, count); + if (op_ret) { + if (filler->real_path) + gf_msg_debug (this->name, 0, + "dict_set_bin failed (path=%s): " + "key=%s (%s)", filler->real_path, + k, strerror (-size)); + else + gf_msg_debug (this->name, 0, + "dict_set_bin failed (gfid=%s): " + "key=%s (%s)", + uuid_utoa (filler->inode->gfid), + k, strerror (-size)); + + op_ret = -1; + op_errno = EINVAL; + GF_FREE (array); + goto out; + } + array = NULL; + } + +out: + if (op_ret < 0) + filler->op_errno = op_errno; + + if (array) + GF_FREE (array); + + return op_ret; +} + +/** + * xattrop - xattr operations - for internal use by GlusterFS + * @optype: ADD_ARRAY: + * dict should contain: + * "key" ==> array of 32-bit numbers + */ + +int +do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + int op_ret = 0; + int op_errno = 0; + int _fd = -1; + char *real_path = NULL; + struct posix_fd *pfd = NULL; + inode_t *inode = NULL; + posix_xattr_filler_t filler = {0,}; + dict_t *xattr_rsp = NULL; + dict_t *xdata_rsp = NULL; + struct iatt stbuf = {0}; + struct posix_private *priv = NULL; + + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (xattr, out); + VALIDATE_OR_GOTO (this, out); + + priv = this->private; + DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); + + if (fd) { + op_ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (op_ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, + fop_log_level(GF_FOP_FXATTROP, op_errno), + P_MSG_PFD_GET_FAILED, "failed to get pfd from" + " fd=%p", fd); + goto out; + } + _fd = pfd->fd; + } + + if (loc && !gf_uuid_is_null (loc->gfid)) { + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + if (!real_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + } + + if (real_path) { + inode = loc->inode; + } else if (fd) { + inode = fd->inode; + } + + xattr_rsp = dict_new (); + if (xattr_rsp == NULL) { + op_ret = -1; + op_errno = ENOMEM; + goto out; + } + + filler.this = this; + filler.fdnum = _fd; + filler.real_path = real_path; + filler.flags = (int)optype; + filler.inode = inode; + filler.xattr = xattr_rsp; + + op_ret = dict_foreach (xattr, _posix_handle_xattr_keyvalue_pair, + &filler); + op_errno = filler.op_errno; + if (op_ret < 0) + goto out; + + if (!xdata) + goto out; + + if (fd) { + op_ret = posix_fdstat (this, _fd, &stbuf); + } else { + op_ret = posix_pstat (this, inode->gfid, real_path, + &stbuf); + } + if (op_ret < 0) { + op_errno = errno; + goto out; + } + xdata_rsp = posix_xattr_fill (this, real_path, loc, fd, _fd, + xdata, &stbuf); + if (!xdata_rsp) { + op_ret = -1; + op_errno = ENOMEM; + } +out: + + STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr_rsp, + xdata_rsp); + + if (xattr_rsp) + dict_unref (xattr_rsp); + + if (xdata_rsp) + dict_unref (xdata_rsp); + return 0; +} + + +int +posix_xattrop (call_frame_t *frame, xlator_t *this, + loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + do_xattrop (frame, this, loc, NULL, optype, xattr, xdata); + return 0; +} + + +int +posix_fxattrop (call_frame_t *frame, xlator_t *this, + fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + do_xattrop (frame, this, NULL, fd, optype, xattr, xdata); + return 0; +} + +int +posix_access (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t mask, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + char *real_path = NULL; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (loc, out); + + MAKE_INODE_HANDLE (real_path, this, loc, NULL); + if (!real_path) { + op_ret = -1; + op_errno = errno; + goto out; + } + + op_ret = sys_access (real_path, mask & 07); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_ACCESS_FAILED, + "access failed on %s", real_path); + goto out; + } + op_ret = 0; + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, NULL); + return 0; +} + + +int32_t +posix_ftruncate (call_frame_t *frame, xlator_t *this, + fd_t *fd, off_t offset, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd = -1; + struct iatt preop = {0,}; + struct iatt postop = {0,}; + struct posix_fd *pfd = NULL; + int ret = -1; + struct posix_private *priv = NULL; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + goto out; + } + + _fd = pfd->fd; + + op_ret = posix_fdstat (this, _fd, &preop); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + goto out; + } + + op_ret = sys_ftruncate (_fd, offset); + + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED, + "ftruncate failed on fd=%p (%"PRId64"", fd, offset); + goto out; + } + + op_ret = posix_fdstat (this, _fd, &postop); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd); + goto out; + } + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, &preop, + &postop, NULL); + + return 0; +} + + +int32_t +posix_fstat (call_frame_t *frame, xlator_t *this, + fd_t *fd, dict_t *xdata) +{ + int _fd = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; + struct iatt buf = {0,}; + struct posix_fd *pfd = NULL; + dict_t *xattr_rsp = NULL; + int ret = -1; + struct posix_private *priv = NULL; + + DECLARE_OLD_FS_ID_VAR; + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + priv = this->private; + VALIDATE_OR_GOTO (priv, out); + + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + goto out; + } + + _fd = pfd->fd; + + op_ret = posix_fdstat (this, _fd, &buf); + if (op_ret == -1) { + op_errno = errno; + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "fstat failed on fd=%p", fd); + goto out; + } + + if (xdata) + xattr_rsp = posix_xattr_fill (this, NULL, NULL, fd, _fd, xdata, + &buf); + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf, xattr_rsp); + if (xattr_rsp) + dict_unref (xattr_rsp); + return 0; +} + +int32_t +posix_lease (call_frame_t *frame, xlator_t *this, + loc_t *loc, struct gf_lease *lease, dict_t *xdata) +{ + struct gf_lease nullease = {0, }; + + gf_msg (this->name, GF_LOG_CRITICAL, EINVAL, P_MSG_LEASE_DISABLED, + "\"features/leases\" translator is not loaded. You need" + "to use it for proper functioning of your application"); + + STACK_UNWIND_STRICT (lease, frame, -1, ENOSYS, &nullease, NULL); + return 0; +} + +static int gf_posix_lk_log; + +int32_t +posix_lk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) +{ + struct gf_flock nullock = {0, }; + + GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock, NULL); + return 0; +} + +int32_t +posix_inodelk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS, NULL); + return 0; +} + +int32_t +posix_finodelk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS, NULL); + return 0; +} + + +int32_t +posix_entrylk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS, NULL); + return 0; +} + +int32_t +posix_fentrylk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) +{ + GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, + "\"features/locks\" translator is " + "not loaded. You need to use it for proper " + "functioning of your application."); + + STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS, NULL); + return 0; +} + + +int +posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, + gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs) +{ + off_t in_case = -1; + off_t last_off = 0; + size_t filled = 0; + int count = 0; + int32_t this_size = -1; + gf_dirent_t *this_entry = NULL; + struct posix_fd *pfd = NULL; + struct stat stbuf = {0,}; + char *hpath = NULL; + int len = 0; + int ret = 0; + int op_errno = 0; + struct dirent *entry = NULL; + struct dirent scratch[2] = {{0,},}; + + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + count = -1; + errno = op_errno; + goto out; + } + + if (skip_dirs) { + len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); + if (len <= 0) { + errno = ESTALE; + count = -1; + goto out; + } + hpath = alloca (len + 256); /* NAME_MAX */ + + if (posix_handle_path (this, fd->inode->gfid, NULL, hpath, + len) <= 0) { + errno = ESTALE; + count = -1; + goto out; + } + + len = strlen (hpath); + hpath[len] = '/'; + } + + if (!off) { + rewinddir (dir); + } else { + seekdir (dir, off); +#ifndef GF_LINUX_HOST_OS + if ((u_long)telldir(dir) != off && off != pfd->dir_eof) { + gf_msg (THIS->name, GF_LOG_ERROR, EINVAL, + P_MSG_DIR_OPERATION_FAILED, + "seekdir(0x%llx) failed on dir=%p: " + "Invalid argument (offset reused from " + "another DIR * structure?)", off, dir); + errno = EINVAL; + count = -1; + goto out; + } +#endif /* GF_LINUX_HOST_OS */ + } + + while (filled <= size) { + in_case = (u_long)telldir (dir); + + if (in_case == -1) { + gf_msg (THIS->name, GF_LOG_ERROR, errno, + P_MSG_DIR_OPERATION_FAILED, + "telldir failed on dir=%p", dir); + goto out; + } + + errno = 0; + + entry = sys_readdir (dir, scratch); + + if (!entry || errno != 0) { + if (errno == EBADF) { + gf_msg (THIS->name, GF_LOG_WARNING, errno, + P_MSG_DIR_OPERATION_FAILED, + "readdir failed on dir=%p", + dir); + goto out; + } + break; + } + +#ifdef __NetBSD__ + /* + * NetBSD with UFS1 backend uses backing files for + * extended attributes. They can be found in a + * .attribute file located at the root of the filesystem + * We hide it to glusterfs clients, since chaos will occur + * when the cluster/dht xlator decides to distribute + * exended attribute backing file across storage servers. + */ + if (__is_root_gfid (fd->inode->gfid) == 0 + && (!strcmp(entry->d_name, ".attribute"))) + continue; +#endif /* __NetBSD__ */ + + if (__is_root_gfid (fd->inode->gfid) + && (!strcmp (GF_HIDDEN_PATH, entry->d_name))) { + continue; + } + + if (skip_dirs) { + if (DT_ISDIR (entry->d_type)) { + continue; + } else if (hpath) { + strcpy (&hpath[len+1], entry->d_name); + ret = sys_lstat (hpath, &stbuf); + if (!ret && S_ISDIR (stbuf.st_mode)) + continue; + } + } + + this_size = max (sizeof (gf_dirent_t), + sizeof (gfs3_dirplist)) + + strlen (entry->d_name) + 1; + + if (this_size + filled > size) { + seekdir (dir, in_case); +#ifndef GF_LINUX_HOST_OS + if ((u_long)telldir(dir) != in_case && + in_case != pfd->dir_eof) { + gf_msg (THIS->name, GF_LOG_ERROR, EINVAL, + P_MSG_DIR_OPERATION_FAILED, + "seekdir(0x%llx) failed on dir=%p: " + "Invalid argument (offset reused from " + "another DIR * structure?)", + in_case, dir); + errno = EINVAL; + count = -1; + goto out; + } +#endif /* GF_LINUX_HOST_OS */ + break; + } + + this_entry = gf_dirent_for_name (entry->d_name); + + if (!this_entry) { + gf_msg (THIS->name, GF_LOG_ERROR, errno, + P_MSG_GF_DIRENT_CREATE_FAILED, + "could not create " + "gf_dirent for entry %s", entry->d_name); + goto out; + } + /* + * we store the offset of next entry here, which is + * probably not intended, but code using syncop_readdir() + * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it + * for directory read resumption. + */ + last_off = (u_long)telldir(dir); + this_entry->d_off = last_off; + this_entry->d_ino = entry->d_ino; + this_entry->d_type = entry->d_type; + + list_add_tail (&this_entry->list, &entries->list); + + filled += this_size; + count ++; + } + + if ((!sys_readdir (dir, scratch) && (errno == 0))) { + /* Indicate EOF */ + errno = ENOENT; + /* Remember EOF offset for later detection */ + pfd->dir_eof = (u_long)last_off; + } +out: + return count; +} + +dict_t * +posix_entry_xattr_fill (xlator_t *this, inode_t *inode, + fd_t *fd, char *entry_path, dict_t *dict, + struct iatt *stbuf) +{ + loc_t tmp_loc = {0,}; + + /* if we don't send the 'loc', open-fd-count be a problem. */ + tmp_loc.inode = inode; + + return posix_xattr_fill (this, entry_path, &tmp_loc, NULL, -1, dict, + stbuf); + +} + + +#ifdef _DIRENT_HAVE_D_TYPE +static int +posix_d_type_from_ia_type (ia_type_t type) +{ + switch (type) { + case IA_IFDIR: return DT_DIR; + case IA_IFCHR: return DT_CHR; + case IA_IFBLK: return DT_BLK; + case IA_IFIFO: return DT_FIFO; + case IA_IFLNK: return DT_LNK; + case IA_IFREG: return DT_REG; + case IA_IFSOCK: return DT_SOCK; + default: return DT_UNKNOWN; + } +} +#endif + + +int +posix_readdirp_fill (xlator_t *this, fd_t *fd, gf_dirent_t *entries, dict_t *dict) +{ + gf_dirent_t *entry = NULL; + inode_table_t *itable = NULL; + inode_t *inode = NULL; + char *hpath = NULL; + int len = 0; + struct iatt stbuf = {0, }; + uuid_t gfid; + int ret = -1; + + if (list_empty(&entries->list)) + return 0; + + itable = fd->inode->table; + + len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); + if (len <= 0) + return -1; + hpath = alloca (len + 256); /* NAME_MAX */ + if (posix_handle_path (this, fd->inode->gfid, NULL, hpath, len) <= 0) + return -1; + len = strlen (hpath); + hpath[len] = '/'; + + list_for_each_entry (entry, &entries->list, list) { + memset (gfid, 0, 16); + inode = inode_grep (fd->inode->table, fd->inode, + entry->d_name); + if (inode) + gf_uuid_copy (gfid, inode->gfid); + + strcpy (&hpath[len+1], entry->d_name); + + ret = posix_pstat (this, gfid, hpath, &stbuf); + + if (ret == -1) { + if (inode) + inode_unref (inode); + continue; + } + + if (!inode) + inode = inode_find (itable, stbuf.ia_gfid); + + if (!inode) + inode = inode_new (itable); + + entry->inode = inode; + + if (dict) { + entry->dict = + posix_entry_xattr_fill (this, entry->inode, + fd, hpath, + dict, &stbuf); + } + + entry->d_stat = stbuf; + if (stbuf.ia_ino) + entry->d_ino = stbuf.ia_ino; + +#ifdef _DIRENT_HAVE_D_TYPE + if (entry->d_type == DT_UNKNOWN && !IA_ISINVAL(stbuf.ia_type)) { + /* The platform supports d_type but the underlying + filesystem doesn't. We set d_type to the correct + value from ia_type */ + entry->d_type = + posix_d_type_from_ia_type (stbuf.ia_type); + } +#endif + + inode = NULL; + } + + return 0; +} + + +int32_t +posix_do_readdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, int whichop, dict_t *dict) +{ + struct posix_fd *pfd = NULL; + DIR *dir = NULL; + int ret = -1; + int count = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + gf_dirent_t entries; + int32_t skip_dirs = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + INIT_LIST_HEAD (&entries.list); + + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + goto out; + } + + dir = pfd->dir; + + if (!dir) { + gf_msg (this->name, GF_LOG_WARNING, EINVAL, P_MSG_PFD_NULL, + "dir is NULL for fd=%p", fd); + op_errno = EINVAL; + goto out; + } + + /* When READDIR_FILTER option is set to on, we can filter out + * directory's entry from the entry->list. + */ + ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs); + + LOCK (&fd->lock); + { + /* posix_fill_readdir performs multiple separate individual + readdir() calls to fill up the buffer. + + In case of NFS where the same anonymous FD is shared between + different applications, reading a common directory can + result in the anonymous fd getting re-used unsafely between + the two readdir requests (in two different io-threads). + + It would also help, in the future, to replace the loop + around readdir() with a single large getdents() call. + */ + count = posix_fill_readdir (fd, dir, off, size, &entries, this, + skip_dirs); + } + UNLOCK (&fd->lock); + + /* pick ENOENT to indicate EOF */ + op_errno = errno; + op_ret = count; + + if (whichop != GF_FOP_READDIRP) + goto out; + + posix_readdirp_fill (this, fd, &entries, dict); + +out: + STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, NULL); + + gf_dirent_free (&entries); + + return 0; +} + + +int32_t +posix_readdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, dict_t *xdata) +{ + posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR, xdata); + return 0; +} + + +int32_t +posix_readdirp (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, dict_t *dict) +{ + gf_dirent_t entries; + int32_t op_ret = -1, op_errno = 0; + gf_dirent_t *entry = NULL; + + + if ((dict != NULL) && (dict_get (dict, GET_ANCESTRY_DENTRY_KEY))) { + INIT_LIST_HEAD (&entries.list); + + op_ret = posix_get_ancestry (this, fd->inode, &entries, NULL, + POSIX_ANCESTRY_DENTRY, + &op_errno, dict); + if (op_ret >= 0) { + op_ret = 0; + + list_for_each_entry (entry, &entries.list, list) { + op_ret++; + } + } + + STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, + NULL); + + gf_dirent_free (&entries); + return 0; + } + + posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP, dict); + return 0; +} + +int32_t +posix_rchecksum (call_frame_t *frame, xlator_t *this, + fd_t *fd, off_t offset, int32_t len, dict_t *xdata) +{ + char *alloc_buf = NULL; + char *buf = NULL; + int _fd = -1; + struct posix_fd *pfd = NULL; + int op_ret = -1; + int op_errno = 0; + int ret = 0; + ssize_t bytes_read = 0; + int32_t weak_checksum = 0; + int32_t zerofillcheck = 0; + unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0}; + struct posix_private *priv = NULL; + dict_t *rsp_xdata = NULL; + gf_boolean_t buf_has_zeroes = _gf_false; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + priv = this->private; + memset (strong_checksum, 0, MD5_DIGEST_LENGTH); + + alloc_buf = _page_aligned_alloc (len, &buf); + if (!alloc_buf) { + op_errno = ENOMEM; + goto out; + } + + rsp_xdata = dict_new(); + if (!rsp_xdata) { + op_errno = ENOMEM; + goto out; + } + + ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg (this->name, GF_LOG_WARNING, -ret, P_MSG_PFD_NULL, + "pfd is NULL, fd=%p", fd); + goto out; + } + + _fd = pfd->fd; + + LOCK (&fd->lock); + { + if (priv->aio_capable && priv->aio_init_done) + __posix_fd_set_odirect (fd, pfd, 0, offset, len); + + bytes_read = sys_pread (_fd, buf, len, offset); + if (bytes_read < 0) { + gf_msg (this->name, GF_LOG_WARNING, errno, + P_MSG_PREAD_FAILED, + "pread of %d bytes returned %zd", len, + bytes_read); + + op_errno = errno; + } + + } + UNLOCK (&fd->lock); + + if (bytes_read < 0) + goto out; + + if (xdata && dict_get_int32 (xdata, "check-zero-filled", + &zerofillcheck) == 0) { + buf_has_zeroes = (mem_0filled (buf, bytes_read)) ? _gf_false : + _gf_true; + ret = dict_set_uint32 (rsp_xdata, "buf-has-zeroes", + buf_has_zeroes); + if (ret) { + gf_msg (this->name, GF_LOG_WARNING, -ret, + P_MSG_DICT_SET_FAILED, "%s: Failed to set " + "dictionary value for key: %s", + uuid_utoa (fd->inode->gfid), "buf-has-zeroes"); + op_errno = -ret; + goto out; + } + } + weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) ret); + gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) bytes_read, + (unsigned char *) strong_checksum); + + op_ret = 0; +out: + STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, + weak_checksum, strong_checksum, rsp_xdata); + if (rsp_xdata) + dict_unref (rsp_xdata); + GF_FREE (alloc_buf); + + return 0; +} + +int +posix_forget (xlator_t *this, inode_t *inode) +{ + int ret = 0; + char *unlink_path = NULL; + uint64_t ctx_uint = 0; + posix_inode_ctx_t *ctx = NULL; + struct posix_private *priv_posix = NULL; + + priv_posix = (struct posix_private *) this->private; + + ret = inode_ctx_del (inode, this, &ctx_uint); + if (!ctx_uint) + return 0; + + ctx = (posix_inode_ctx_t *)ctx_uint; + + if (ctx->unlink_flag == GF_UNLINK_TRUE) { + POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path, + inode->gfid, unlink_path); + if (!unlink_path) { + gf_msg (this->name, GF_LOG_ERROR, ENOMEM, + P_MSG_UNLINK_FAILED, + "Failed to remove gfid :%s", + uuid_utoa (inode->gfid)); + ret = -1; + goto out; + } + ret = sys_unlink(unlink_path); + } +out: + pthread_mutex_destroy (&ctx->xattrop_lock); + pthread_mutex_destroy (&ctx->write_atomic_lock); + pthread_mutex_destroy (&ctx->pgfid_lock); + GF_FREE (ctx); + return ret; +} diff --git a/xlators/storage/posix/src/posix-inode-handle.h b/xlators/storage/posix/src/posix-inode-handle.h new file mode 100644 index 00000000000..6849276d3db --- /dev/null +++ b/xlators/storage/posix/src/posix-inode-handle.h @@ -0,0 +1,106 @@ +/* + Copyright (c) 2011-2017 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ +#ifndef _POSIX_INODE_HANDLE_H +#define _POSIX_INODE_HANDLE_H + +#include <limits.h> +#include <sys/types.h> +#include "xlator.h" +#include "gf-dirent.h" +#include "posix.h" + +/* From Open Group Base Specifications Issue 6 */ +#ifndef _XOPEN_PATH_MAX +#define _XOPEN_PATH_MAX 1024 +#endif + +#define TRASH_DIR "landfill" + +#define UUID0_STR "00000000-0000-0000-0000-000000000000" +#define SLEN(str) (sizeof(str) - 1) + +#define LOC_HAS_ABSPATH(loc) (loc && (loc->path) && (loc->path[0] == '/')) +#define LOC_IS_DIR(loc) (loc && (loc->inode) && \ + (loc->inode->ia_type == IA_IFDIR)) +#define MAKE_REAL_PATH(var, this, path) do { \ + size_t path_len = strlen(path); \ + size_t var_len = path_len + POSIX_BASE_PATH_LEN(this) + 1; \ + if (POSIX_PATH_MAX(this) != -1 && \ + var_len >= POSIX_PATH_MAX(this)) { \ + var = alloca (path_len + 1); \ + strcpy (var, (path[0] == '/') ? path + 1 : path); \ + } else { \ + var = alloca (var_len); \ + strcpy (var, POSIX_BASE_PATH(this)); \ + strcpy (&var[POSIX_BASE_PATH_LEN(this)], path); \ + } \ + } while (0) + +#define MAKE_HANDLE_PATH(var, this, gfid, base) do { \ + int __len; \ + __len = posix_handle_path (this, gfid, base, NULL, 0); \ + if (__len <= 0) \ + break; \ + var = alloca (__len); \ + __len = posix_handle_path (this, gfid, base, var, __len); \ + if (__len <= 0) \ + var = NULL; \ + } while (0) + +#define MAKE_INODE_HANDLE(rpath, this, loc, iatt_p) do { \ + if (gf_uuid_is_null (loc->gfid)) { \ + gf_msg (this->name, GF_LOG_ERROR, 0, \ + P_MSG_INODE_HANDLE_CREATE, \ + "null gfid for path %s", (loc)->path); \ + break; \ + } \ + if (LOC_IS_DIR (loc) && LOC_HAS_ABSPATH (loc)) { \ + MAKE_REAL_PATH (rpath, this, (loc)->path); \ + op_ret = posix_pstat (this, (loc)->gfid, rpath, iatt_p); \ + break; \ + } \ + errno = 0; \ + op_ret = posix_istat (this, loc->gfid, NULL, iatt_p); \ + if (errno != ELOOP) { \ + MAKE_HANDLE_PATH (rpath, this, (loc)->gfid, NULL); \ + if (!rpath) { \ + op_ret = -1; \ + gf_msg (this->name, GF_LOG_ERROR, errno, \ + P_MSG_INODE_HANDLE_CREATE, \ + "Failed to create inode handle " \ + "for path %s", (loc)->path); \ + } \ + break; \ + } \ + /* __ret == -1 && errno == ELOOP */ \ + } while (0) + +#define POSIX_ANCESTRY_PATH (1 << 0) +#define POSIX_ANCESTRY_DENTRY (1 << 1) + +int +posix_handle_path (xlator_t *this, uuid_t gfid, const char *basename, char *buf, + size_t len); + +int +posix_make_ancestryfromgfid (xlator_t *this, char *path, int pathsize, + gf_dirent_t *head, int type, uuid_t gfid, + const size_t handle_size, + const char *priv_base_path, + inode_table_t *table, inode_t **parent, + dict_t *xdata, int32_t *op_errno); + +int +posix_handle_init (xlator_t *this); + +int +posix_handle_trash_init (xlator_t *this); + +#endif /* !_POSIX_INODE_HANDLE_H */ diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 3b03779f305..d8d908f83cd 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -1,5 +1,5 @@ /* - Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com> + Copyright (c) 2006-2017 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. This file is licensed to you under your choice of the GNU Lesser @@ -14,8026 +14,16 @@ #define _GNU_SOURCE #endif -#include <openssl/md5.h> -#include <stdint.h> -#include <sys/time.h> -#include <sys/resource.h> -#include <errno.h> -#include <libgen.h> -#include <pthread.h> -#include <ftw.h> -#include <sys/stat.h> -#include <signal.h> -#include <sys/uio.h> -#include <unistd.h> -#include <ftw.h> - -#ifndef GF_BSD_HOST_OS -#include <alloca.h> -#endif /* GF_BSD_HOST_OS */ - -#ifdef HAVE_LINKAT -#include <fcntl.h> -#endif /* HAVE_LINKAT */ - -#include "glusterfs.h" -#include "checksum.h" -#include "dict.h" -#include "logging.h" -#include "posix.h" #include "xlator.h" -#include "defaults.h" -#include "common-utils.h" -#include "compat-errno.h" -#include "compat.h" -#include "byte-order.h" -#include "syscall.h" -#include "statedump.h" -#include "locking.h" -#include "timer.h" -#include "glusterfs3-xdr.h" -#include "hashfn.h" -#include "posix-aio.h" -#include "glusterfs-acl.h" -#include "posix-messages.h" -#include "events.h" -#include "posix-gfid-path.h" - -extern char *marker_xattrs[]; -#define ALIGN_SIZE 4096 - -#undef HAVE_SET_FSID -#ifdef HAVE_SET_FSID - -#define DECLARE_OLD_FS_ID_VAR uid_t old_fsuid; gid_t old_fsgid; - -#define SET_FS_ID(uid, gid) do { \ - old_fsuid = setfsuid (uid); \ - old_fsgid = setfsgid (gid); \ - } while (0) - -#define SET_TO_OLD_FS_ID() do { \ - setfsuid (old_fsuid); \ - setfsgid (old_fsgid); \ - } while (0) - -#else - -#define DECLARE_OLD_FS_ID_VAR -#define SET_FS_ID(uid, gid) -#define SET_TO_OLD_FS_ID() - -#endif - -/* Setting microseconds or nanoseconds depending on what's supported: - The passed in `tv` can be - struct timespec - if supported (better, because it supports nanosecond resolution) or - struct timeval - otherwise. */ -#if HAVE_UTIMENSAT -#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ - tv.tv_nsec = nanosecs -#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \ - (sys_utimensat (AT_FDCWD, path, tv, AT_SYMLINK_NOFOLLOW)) -#else -#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \ - tv.tv_usec = nanosecs / 1000 -#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \ - (lutimes (path, tv)) -#endif +#include "posix.h" -static char *disallow_removexattrs[] = { - GF_XATTR_VOL_ID_KEY, - GFID_XATTR_KEY, - NULL +class_methods_t class_methods = { + .init = posix_init, + .fini = posix_fini, + .reconfigure = posix_reconfigure, + .notify = posix_notify }; -gf_boolean_t -posix_symlinks_match (xlator_t *this, loc_t *loc, uuid_t gfid) -{ - struct posix_private *priv = NULL; - char linkname_actual[PATH_MAX] = {0,}; - char linkname_expected[PATH_MAX] = {0}; - char *dir_handle = NULL; - ssize_t len = 0; - size_t handle_size = 0; - gf_boolean_t ret = _gf_false; - - priv = this->private; - handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); - dir_handle = alloca0 (handle_size); - - snprintf (linkname_expected, handle_size, "../../%02x/%02x/%s/%s", - loc->pargfid[0], loc->pargfid[1], uuid_utoa (loc->pargfid), - loc->name); - - MAKE_HANDLE_GFID_PATH (dir_handle, this, gfid, NULL); - len = sys_readlink (dir_handle, linkname_actual, PATH_MAX); - if (len < 0) - goto out; - linkname_actual[len] = '\0'; - - if (!strncmp (linkname_actual, linkname_expected, handle_size)) - ret = _gf_true; - -out: - return ret; -} - -dict_t* -posix_dict_set_nlink (dict_t *req, dict_t *res, int32_t nlink) -{ - int ret = -1; - - if (req == NULL || !dict_get (req, GF_REQUEST_LINK_COUNT_XDATA)) - goto out; - - if (res == NULL) - res = dict_new (); - if (res == NULL) - goto out; - - ret = dict_set_uint32 (res, GF_RESPONSE_LINK_COUNT_XDATA, nlink); - if (ret == -1) - gf_msg ("posix", GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, - "Failed to set GF_RESPONSE_LINK_COUNT_XDATA"); -out: - return res; -} - -int -posix_forget (xlator_t *this, inode_t *inode) -{ - int ret = 0; - char *unlink_path = NULL; - uint64_t ctx_uint = 0; - posix_inode_ctx_t *ctx = NULL; - struct posix_private *priv_posix = NULL; - - priv_posix = (struct posix_private *) this->private; - - ret = inode_ctx_del (inode, this, &ctx_uint); - if (!ctx_uint) - return 0; - - ctx = (posix_inode_ctx_t *)ctx_uint; - - if (ctx->unlink_flag == GF_UNLINK_TRUE) { - POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path, - inode->gfid, unlink_path); - if (!unlink_path) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - P_MSG_UNLINK_FAILED, - "Failed to remove gfid :%s", - uuid_utoa (inode->gfid)); - ret = -1; - goto out; - } - ret = sys_unlink(unlink_path); - } -out: - pthread_mutex_destroy (&ctx->xattrop_lock); - pthread_mutex_destroy (&ctx->write_atomic_lock); - pthread_mutex_destroy (&ctx->pgfid_lock); - GF_FREE (ctx); - return ret; -} - -/* Regular fops */ - -int32_t -posix_lookup (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xdata) -{ - struct iatt buf = {0, }; - int32_t op_ret = -1; - int32_t entry_ret = 0; - int32_t op_errno = 0; - dict_t * xattr = NULL; - char * real_path = NULL; - char * par_path = NULL; - struct iatt postparent = {0,}; - int32_t gfidless = 0; - char *pgfid_xattr_key = NULL; - int32_t nlink_samepgfid = 0; - struct posix_private *priv = NULL; - posix_inode_ctx_t *ctx = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - priv = this->private; - - /* The Hidden directory should be for housekeeping purpose and it - should not get any gfid on it */ - if (__is_root_gfid (loc->pargfid) && loc->name - && (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { - gf_msg (this->name, GF_LOG_WARNING, EPERM, - P_MSG_LOOKUP_NOT_PERMITTED, "Lookup issued on %s," - " which is not permitted", GF_HIDDEN_PATH); - op_errno = EPERM; - op_ret = -1; - goto out; - } - - op_ret = dict_get_int32 (xdata, GF_GFIDLESS_LOOKUP, &gfidless); - op_ret = -1; - if (gf_uuid_is_null (loc->pargfid) || (loc->name == NULL)) { - /* nameless lookup */ - MAKE_INODE_HANDLE (real_path, this, loc, &buf); - } else { - MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &buf); - - if (gf_uuid_is_null (loc->inode->gfid)) { - op_ret = posix_gfid_heal (this, real_path, loc, xdata); - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; - goto out; - } - MAKE_ENTRY_HANDLE (real_path, par_path, this, - loc, &buf); - } - } - - op_errno = errno; - - if (op_ret == -1) { - if (op_errno != ENOENT) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, - P_MSG_LSTAT_FAILED, - "lstat on %s failed", - real_path ? real_path : "null"); - } - - entry_ret = -1; - goto parent; - } - - if (xdata && (op_ret == 0)) { - xattr = posix_xattr_fill (this, real_path, loc, NULL, -1, xdata, - &buf); - } - - if (priv->update_pgfid_nlinks) { - if (!gf_uuid_is_null (loc->pargfid) && !IA_ISDIR (buf.ia_type)) { - MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, - PGFID_XATTR_KEY_PREFIX, - loc->pargfid); - - op_ret = posix_inode_ctx_get_all (loc->inode, this, - &ctx); - if (op_ret < 0) { - op_errno = ENOMEM; - goto out; - } - - pthread_mutex_lock (&ctx->pgfid_lock); - { - SET_PGFID_XATTR_IF_ABSENT (real_path, - pgfid_xattr_key, - nlink_samepgfid, - XATTR_CREATE, op_ret, - this, unlock); - } -unlock: - pthread_mutex_unlock (&ctx->pgfid_lock); - } - } - -parent: - if (par_path) { - op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_LSTAT_FAILED, "post-operation lstat on" - " parent %s failed", par_path); - if (op_errno == ENOENT) - /* If parent directory is missing in a lookup, - errno should be ESTALE (bad handle) and not - ENOENT (missing entry) - */ - op_errno = ESTALE; - goto out; - } - } - - op_ret = entry_ret; -out: - if (!op_ret && !gfidless && gf_uuid_is_null (buf.ia_gfid)) { - gf_msg (this->name, GF_LOG_ERROR, ENODATA, P_MSG_NULL_GFID, - "buf->ia_gfid is null for " - "%s", (real_path) ? real_path: ""); - op_ret = -1; - op_errno = ENODATA; - } - - if (op_ret == 0) - op_errno = 0; - STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, - (loc)?loc->inode:NULL, &buf, xattr, &postparent); - - if (xattr) - dict_unref (xattr); - - return 0; -} - - -int32_t -posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) -{ - struct iatt buf = {0,}; - int32_t op_ret = -1; - int32_t op_errno = 0; - struct posix_private *priv = NULL; - char *real_path = NULL; - dict_t *xattr_rsp = NULL; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - - MAKE_INODE_HANDLE (real_path, this, loc, &buf); - - if (op_ret == -1) { - op_errno = errno; - if (op_errno == ENOENT) { - gf_msg_debug(this->name, 0, "lstat on %s failed: %s", - real_path ? real_path : "<null>", - strerror (op_errno)); - } else { - gf_msg (this->name, GF_LOG_ERROR, op_errno, - P_MSG_LSTAT_FAILED, "lstat on %s failed", - real_path ? real_path : "<null>"); - } - goto out; - } - if (xdata) - xattr_rsp = posix_xattr_fill (this, real_path, loc, NULL, -1, - xdata, &buf); - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID(); - STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, &buf, xattr_rsp); - if (xattr_rsp) - dict_unref (xattr_rsp); - - return 0; -} - -static inline mode_t override_umask (mode_t mode, mode_t mode_bit) -{ - gf_msg_debug ("posix", 0, "The value of mode is %u", mode); - mode = mode >> 9; /* 3x3 (bits for each octal digit)*/ - mode = (mode << 9) | mode_bit; - gf_msg_debug ("posix", 0, "The value of mode is %u", mode); - return mode; -} - -static int -posix_do_chmod (xlator_t *this, const char *path, struct iatt *stbuf) -{ - int32_t ret = -1; - mode_t mode = 0; - mode_t mode_bit = 0; - struct posix_private *priv = NULL; - struct stat stat; - int is_symlink = 0; - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - ret = sys_lstat (path, &stat); - if (ret != 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_LSTAT_FAILED, - "lstat failed: %s", path); - goto out; - } - - if (S_ISLNK (stat.st_mode)) - is_symlink = 1; - - if (S_ISDIR (stat.st_mode)) { - mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type); - mode_bit = (mode & priv->create_directory_mask) - | priv->force_directory_mode; - mode = override_umask(mode, mode_bit); - } else { - mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type); - mode_bit = (mode & priv->create_mask) - | priv->force_create_mode; - mode = override_umask(mode, mode_bit); - } - ret = lchmod (path, mode); - if ((ret == -1) && (errno == ENOSYS)) { - /* in Linux symlinks are always in mode 0777 and no - such call as lchmod exists. - */ - gf_msg_debug (this->name, 0, "%s (%s)", path, strerror (errno)); - if (is_symlink) { - ret = 0; - goto out; - } - - ret = sys_chmod (path, mode); - } -out: - return ret; -} - -static int -posix_do_chown (xlator_t *this, - const char *path, - struct iatt *stbuf, - int32_t valid) -{ - int32_t ret = -1; - uid_t uid = -1; - gid_t gid = -1; - - if (valid & GF_SET_ATTR_UID) - uid = stbuf->ia_uid; - - if (valid & GF_SET_ATTR_GID) - gid = stbuf->ia_gid; - - ret = sys_lchown (path, uid, gid); - - return ret; -} - -static int -posix_do_utimes (xlator_t *this, - const char *path, - struct iatt *stbuf, - int valid) -{ - int32_t ret = -1; -#if defined(HAVE_UTIMENSAT) - struct timespec tv[2] = { {0,}, {0,} }; -#else - struct timeval tv[2] = { {0,}, {0,} }; -#endif - struct stat stat; - int is_symlink = 0; - - ret = sys_lstat (path, &stat); - if (ret != 0) { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_FILE_OP_FAILED, "%s", path); - goto out; - } - - if (S_ISLNK (stat.st_mode)) - is_symlink = 1; - - if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { - tv[0].tv_sec = stbuf->ia_atime; - SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[0], stbuf->ia_atime_nsec); - } else { - /* atime is not given, use current values */ - tv[0].tv_sec = ST_ATIM_SEC (&stat); - SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[0], ST_ATIM_NSEC (&stat)); - } - - if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { - tv[1].tv_sec = stbuf->ia_mtime; - SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[1], stbuf->ia_mtime_nsec); - } else { - /* mtime is not given, use current values */ - tv[1].tv_sec = ST_MTIM_SEC (&stat); - SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv[1], ST_MTIM_NSEC (&stat)); - } - - ret = PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv); - if ((ret == -1) && (errno == ENOSYS)) { - gf_msg_debug (this->name, 0, "%s (%s)", - path, strerror (errno)); - if (is_symlink) { - ret = 0; - goto out; - } - - ret = PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv); - } - -out: - return ret; -} - -int -posix_setattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = 0; - struct iatt statpre = {0,}; - struct iatt statpost = {0,}; - dict_t *xattr_rsp = NULL; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_INODE_HANDLE (real_path, this, loc, &statpre); - - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "setattr (lstat) on %s failed", - real_path ? real_path : "<null>"); - goto out; - } - - if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)){ - op_ret = posix_do_chown (this, real_path, stbuf, valid); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_CHOWN_FAILED, "setattr (chown) on %s " - "failed", real_path); - goto out; - } - } - - if (valid & GF_SET_ATTR_MODE) { - op_ret = posix_do_chmod (this, real_path, stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_CHMOD_FAILED, "setattr (chmod) on %s " - "failed", real_path); - goto out; - } - } - - if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { - op_ret = posix_do_utimes (this, real_path, stbuf, valid); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_UTIMES_FAILED, "setattr (utimes) on %s " - "failed", real_path); - goto out; - } - } - - if (!valid) { - op_ret = sys_lchown (real_path, -1, -1); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_LCHOWN_FAILED, "lchown (%s, -1, -1) " - "failed", real_path); - - goto out; - } - } - - op_ret = posix_pstat (this, loc->gfid, real_path, &statpost); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "setattr (lstat) on %s failed", real_path); - goto out; - } - - if (xdata) - xattr_rsp = posix_xattr_fill (this, real_path, loc, NULL, -1, - xdata, &statpost); - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, - &statpre, &statpost, xattr_rsp); - if (xattr_rsp) - dict_unref (xattr_rsp); - - return 0; -} - -int32_t -posix_do_fchown (xlator_t *this, - int fd, - struct iatt *stbuf, - int32_t valid) -{ - int ret = -1; - uid_t uid = -1; - gid_t gid = -1; - - if (valid & GF_SET_ATTR_UID) - uid = stbuf->ia_uid; - - if (valid & GF_SET_ATTR_GID) - gid = stbuf->ia_gid; - - ret = sys_fchown (fd, uid, gid); - - return ret; -} - - -int32_t -posix_do_fchmod (xlator_t *this, - int fd, struct iatt *stbuf) -{ - int32_t ret = -1; - mode_t mode = 0; - mode_t mode_bit = 0; - struct posix_private *priv = NULL; - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - mode = st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type); - mode_bit = (mode & priv->create_mask) - | priv->force_create_mode; - mode = override_umask (mode, mode_bit); - ret = sys_fchmod (fd, mode); -out: - return ret; -} - -static int -posix_do_futimes (xlator_t *this, int fd, struct iatt *stbuf, int valid) -{ - int32_t ret = -1; - struct timeval tv[2] = { {0,}, {0,} }; - struct stat stat = {0,}; - - ret = sys_fstat (fd, &stat); - if (ret != 0) { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_FILE_OP_FAILED, "%d", fd); - goto out; - } - - if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { - tv[0].tv_sec = stbuf->ia_atime; - tv[0].tv_usec = stbuf->ia_atime_nsec / 1000; - } else { - /* atime is not given, use current values */ - tv[0].tv_sec = ST_ATIM_SEC (&stat); - tv[0].tv_usec = ST_ATIM_NSEC (&stat) / 1000; - } - - if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { - tv[1].tv_sec = stbuf->ia_mtime; - tv[1].tv_usec = stbuf->ia_mtime_nsec / 1000; - } else { - /* mtime is not given, use current values */ - tv[1].tv_sec = ST_MTIM_SEC (&stat); - tv[1].tv_usec = ST_MTIM_NSEC (&stat) / 1000; - } - - ret = sys_futimes (fd, tv); - if (ret == -1) - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FUTIMES_FAILED, - "%d", fd); - -out: - return ret; -} - -int -posix_fsetattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - struct iatt statpre = {0,}; - struct iatt statpost = {0,}; - struct posix_fd *pfd = NULL; - dict_t *xattr_rsp = NULL; - int32_t ret = -1; - - DECLARE_OLD_FS_ID_VAR; - - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); - if (ret < 0) { - gf_msg_debug (this->name, 0, "pfd is NULL from fd=%p", fd); - goto out; - } - - op_ret = posix_fdstat (this, pfd->fd, &statpre); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, - "fsetattr (fstat) failed on fd=%p", fd); - goto out; - } - - if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) { - op_ret = posix_do_fchown (this, pfd->fd, stbuf, valid); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_FCHOWN_FAILED, "fsetattr (fchown) failed" - " on fd=%p", fd); - goto out; - } - - } - - if (valid & GF_SET_ATTR_MODE) { - op_ret = posix_do_fchmod (this, pfd->fd, stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_FCHMOD_FAILED, "fsetattr (fchmod) failed" - " on fd=%p", fd); - goto out; - } - } - - if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) { - op_ret = posix_do_futimes (this, pfd->fd, stbuf, valid); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_FUTIMES_FAILED, "fsetattr (futimes) on " - "failed fd=%p", fd); - goto out; - } - } - - if (!valid) { - op_ret = sys_fchown (pfd->fd, -1, -1); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_FCHOWN_FAILED, - "fchown (%d, -1, -1) failed", - pfd->fd); - - goto out; - } - } - - op_ret = posix_fdstat (this, pfd->fd, &statpost); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, - "fsetattr (fstat) failed on fd=%p", fd); - goto out; - } - - if (xdata) - xattr_rsp = posix_xattr_fill (this, NULL, NULL, fd, pfd->fd, - xdata, &statpost); - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, - &statpre, &statpost, xattr_rsp); - if (xattr_rsp) - dict_unref (xattr_rsp); - - return 0; -} - -static int32_t -posix_do_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, - int32_t flags, off_t offset, size_t len, - struct iatt *statpre, struct iatt *statpost, dict_t *xdata) -{ - int32_t ret = -1; - int32_t op_errno = 0; - struct posix_fd *pfd = NULL; - gf_boolean_t locked = _gf_false; - posix_inode_ctx_t *ctx = NULL; - struct posix_private *priv = NULL; - - DECLARE_OLD_FS_ID_VAR; - - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - priv = this->private; - DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, ret, ret, out); - - ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); - if (ret < 0) { - gf_msg_debug (this->name, 0, "pfd is NULL from fd=%p", fd); - goto out; - } - - ret = posix_inode_ctx_get_all (fd->inode, this, &ctx); - if (ret < 0) { - ret = -ENOMEM; - goto out; - } - - if (xdata && dict_get (xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) { - locked = _gf_true; - pthread_mutex_lock (&ctx->write_atomic_lock); - } - - ret = posix_fdstat (this, pfd->fd, statpre); - if (ret == -1) { - ret = -errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, - "fallocate (fstat) failed on fd=%p", fd); - goto out; - } - - ret = sys_fallocate (pfd->fd, flags, offset, len); - if (ret == -1) { - ret = -errno; - gf_msg (this->name, GF_LOG_ERROR, -ret, P_MSG_FALLOCATE_FAILED, - "fallocate failed on %s offset: %jd, " - "len:%zu, flags: %d", uuid_utoa (fd->inode->gfid), - offset, len, flags); - goto out; - } - - ret = posix_fdstat (this, pfd->fd, statpost); - if (ret == -1) { - ret = -errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, - "fallocate (fstat) failed on fd=%p", fd); - goto out; - } - -out: - if (locked) { - pthread_mutex_unlock (&ctx->write_atomic_lock); - locked = _gf_false; - } - SET_TO_OLD_FS_ID (); - if (ret == ENOSPC) - ret = -ENOSPC; - - return ret; -} - -char* -_page_aligned_alloc (size_t size, char **aligned_buf) -{ - char *alloc_buf = NULL; - char *buf = NULL; - - alloc_buf = GF_CALLOC (1, (size + ALIGN_SIZE), gf_posix_mt_char); - if (!alloc_buf) - goto out; - /* page aligned buffer */ - buf = GF_ALIGN_BUF (alloc_buf, ALIGN_SIZE); - *aligned_buf = buf; -out: - return alloc_buf; -} - -static int32_t -_posix_do_zerofill(int fd, off_t offset, off_t len, int o_direct) -{ - off_t num_vect = 0; - off_t num_loop = 1; - off_t idx = 0; - int32_t op_ret = -1; - int32_t vect_size = VECTOR_SIZE; - off_t remain = 0; - off_t extra = 0; - struct iovec *vector = NULL; - char *iov_base = NULL; - char *alloc_buf = NULL; - - if (len == 0) - return 0; - if (len < VECTOR_SIZE) - vect_size = len; - - num_vect = len / (vect_size); - remain = len % vect_size ; - if (num_vect > MAX_NO_VECT) { - extra = num_vect % MAX_NO_VECT; - num_loop = num_vect / MAX_NO_VECT; - num_vect = MAX_NO_VECT; - } - - vector = GF_CALLOC (num_vect, sizeof(struct iovec), - gf_common_mt_iovec); - if (!vector) - return -1; - if (o_direct) { - alloc_buf = _page_aligned_alloc(vect_size, &iov_base); - if (!alloc_buf) { - GF_FREE(vector); - return -1; - } - } else { - iov_base = GF_CALLOC (vect_size, sizeof(char), - gf_common_mt_char); - if (!iov_base) { - GF_FREE(vector); - return -1; - } - } - - for (idx = 0; idx < num_vect; idx++) { - vector[idx].iov_base = iov_base; - vector[idx].iov_len = vect_size; - } - if (sys_lseek (fd, offset, SEEK_SET) < 0) { - op_ret = -1; - goto err; - } - - for (idx = 0; idx < num_loop; idx++) { - op_ret = sys_writev (fd, vector, num_vect); - if (op_ret < 0) - goto err; - } - if (extra) { - op_ret = sys_writev (fd, vector, extra); - if (op_ret < 0) - goto err; - } - if (remain) { - vector[0].iov_len = remain; - op_ret = sys_writev (fd, vector , 1); - if (op_ret < 0) - goto err; - } -err: - if (o_direct) - GF_FREE(alloc_buf); - else - GF_FREE(iov_base); - GF_FREE(vector); - return op_ret; -} - -static int32_t -posix_do_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - off_t len, struct iatt *statpre, struct iatt *statpost, - dict_t *xdata) -{ - int32_t ret = -1; - int32_t op_errno = 0; - int32_t flags = 0; - struct posix_fd *pfd = NULL; - gf_boolean_t locked = _gf_false; - posix_inode_ctx_t *ctx = NULL; - - DECLARE_OLD_FS_ID_VAR; - - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); - if (ret < 0) { - gf_msg_debug (this->name, 0, "pfd is NULL from fd=%p", fd); - goto out; - } - - ret = posix_inode_ctx_get_all (fd->inode, this, &ctx); - if (ret < 0) { - ret = -ENOMEM; - goto out; - } - - if (dict_get (xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) { - locked = _gf_true; - pthread_mutex_lock (&ctx->write_atomic_lock); - } - - ret = posix_fdstat (this, pfd->fd, statpre); - if (ret == -1) { - ret = -errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, - "pre-operation fstat failed on fd = %p", fd); - goto out; - } - - /* See if we can use FALLOC_FL_ZERO_RANGE to perform the zero fill. - * If it fails, fall back to _posix_do_zerofill() and an optional fsync. - */ - flags = FALLOC_FL_ZERO_RANGE; - ret = sys_fallocate (pfd->fd, flags, offset, len); - if (ret == 0) - goto fsync; - - ret = _posix_do_zerofill (pfd->fd, offset, len, pfd->flags & O_DIRECT); - if (ret < 0) { - ret = -errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_ZEROFILL_FAILED, - "zerofill failed on fd %d length %" PRId64 , - pfd->fd, len); - goto out; - } - -fsync: - if (pfd->flags & (O_SYNC|O_DSYNC)) { - ret = sys_fsync (pfd->fd); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - P_MSG_WRITEV_FAILED, "fsync() in writev on fd" - "%d failed", pfd->fd); - ret = -errno; - goto out; - } - } - - ret = posix_fdstat (this, pfd->fd, statpost); - if (ret == -1) { - ret = -errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, - "post operation fstat failed on fd=%p", fd); - goto out; - } - -out: - if (locked) { - pthread_mutex_unlock (&ctx->write_atomic_lock); - locked = _gf_false; - } - SET_TO_OLD_FS_ID (); - - return ret; -} - -static int32_t -_posix_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size, - off_t offset, size_t len, dict_t *xdata) -{ - int32_t ret; - int32_t flags = 0; - struct iatt statpre = {0,}; - struct iatt statpost = {0,}; - -#ifdef FALLOC_FL_KEEP_SIZE - if (keep_size) - flags = FALLOC_FL_KEEP_SIZE; -#endif /* FALLOC_FL_KEEP_SIZE */ - - ret = posix_do_fallocate (frame, this, fd, flags, offset, len, - &statpre, &statpost, xdata); - if (ret < 0) - goto err; - - STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL); - return 0; - -err: - STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL); - return 0; -} - -static int32_t -posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - size_t len, dict_t *xdata) -{ - int32_t ret; -#ifndef FALLOC_FL_KEEP_SIZE - ret = EOPNOTSUPP; - -#else /* FALLOC_FL_KEEP_SIZE */ - int32_t flags = FALLOC_FL_KEEP_SIZE|FALLOC_FL_PUNCH_HOLE; - struct iatt statpre = {0,}; - struct iatt statpost = {0,}; - - ret = posix_do_fallocate (frame, this, fd, flags, offset, len, - &statpre, &statpost, xdata); - if (ret < 0) - goto err; - - STACK_UNWIND_STRICT(discard, frame, 0, 0, &statpre, &statpost, NULL); - return 0; - -err: -#endif /* FALLOC_FL_KEEP_SIZE */ - STACK_UNWIND_STRICT(discard, frame, -1, -ret, NULL, NULL, NULL); - return 0; -} - -static int32_t -posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - off_t len, dict_t *xdata) -{ - int32_t ret = 0; - struct iatt statpre = {0,}; - struct iatt statpost = {0,}; - struct posix_private *priv = NULL; - int op_ret = -1; - int op_errno = -EINVAL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - - priv = this->private; - DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); - - ret = posix_do_zerofill (frame, this, fd, offset, len, - &statpre, &statpost, xdata); - if (ret < 0) { - op_ret = -1; - op_errno = -ret; - goto out; - } - - STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, NULL); - return 0; - -out: - STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL, NULL); - return 0; -} - -static int32_t -posix_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) -{ - /* - * IPC is for inter-translator communication. If one gets here, it - * means somebody sent one that nobody else recognized, which is an - * error much like an uncaught exception. - */ - gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_IPC_NOT_HANDLE, - "GF_LOG_IPC(%d) not handled", op); - STACK_UNWIND_STRICT (ipc, frame, -1, -EOPNOTSUPP, NULL); - return 0; - -} - -#ifdef HAVE_SEEK_HOLE -static int32_t -posix_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - gf_seek_what_t what, dict_t *xdata) -{ - struct posix_fd *pfd = NULL; - off_t ret = -1; - int err = 0; - int whence = 0; - - DECLARE_OLD_FS_ID_VAR; - - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - switch (what) { - case GF_SEEK_DATA: - whence = SEEK_DATA; - break; - case GF_SEEK_HOLE: - whence = SEEK_HOLE; - break; - default: - err = ENOTSUP; - gf_msg (this->name, GF_LOG_ERROR, ENOTSUP, - P_MSG_SEEK_UNKOWN, "don't know what to seek"); - goto out; - } - - ret = posix_fd_ctx_get (fd, this, &pfd, &err); - if (ret < 0) { - gf_msg_debug (this->name, 0, "pfd is NULL from fd=%p", fd); - goto out; - } - - ret = sys_lseek (pfd->fd, offset, whence); - if (ret == -1) { - err = errno; - gf_msg (this->name, GF_LOG_ERROR, err, P_MSG_SEEK_FAILED, - "seek failed on fd %d length %" PRId64 , pfd->fd, - offset); - goto out; - } - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (seek, frame, (ret == -1 ? -1 : 0), err, - (ret == -1 ? -1 : ret), xdata); - return 0; -} -#endif - -int32_t -posix_opendir (call_frame_t *frame, xlator_t *this, - loc_t *loc, fd_t *fd, dict_t *xdata) -{ - char * real_path = NULL; - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - DIR * dir = NULL; - struct posix_fd * pfd = NULL; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (fd, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_INODE_HANDLE (real_path, this, loc, NULL); - if (!real_path) { - op_errno = ESTALE; - goto out; - } - - op_ret = -1; - dir = sys_opendir (real_path); - - if (dir == NULL) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_OPENDIR_FAILED, - "opendir failed on %s", real_path); - goto out; - } - - op_ret = dirfd (dir); - if (op_ret < 0) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_DIRFD_FAILED, - "dirfd() failed on %s", real_path); - goto out; - } - - pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); - if (!pfd) { - op_errno = errno; - goto out; - } - - pfd->dir = dir; - pfd->dir_eof = -1; - pfd->fd = op_ret; - - op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); - if (op_ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_FD_PATH_SETTING_FAILED, "failed to set the fd" - "context path=%s fd=%p", real_path, fd); - - op_ret = 0; - -out: - if (op_ret == -1) { - if (dir) { - (void) sys_closedir (dir); - dir = NULL; - } - if (pfd) { - GF_FREE (pfd); - pfd = NULL; - } - } - - SET_TO_OLD_FS_ID (); - STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, NULL); - return 0; -} - -int32_t -posix_releasedir (xlator_t *this, - fd_t *fd) -{ - struct posix_fd * pfd = NULL; - uint64_t tmp_pfd = 0; - int ret = 0; - - struct posix_private *priv = NULL; - - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - ret = fd_ctx_del (fd, this, &tmp_pfd); - if (ret < 0) { - gf_msg_debug (this->name, 0, "pfd from fd=%p is NULL", fd); - goto out; - } - - pfd = (struct posix_fd *)(long)tmp_pfd; - if (!pfd->dir) { - gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, - "pfd->dir is NULL for fd=%p", fd); - goto out; - } - - priv = this->private; - - pthread_mutex_lock (&priv->janitor_lock); - { - INIT_LIST_HEAD (&pfd->list); - list_add_tail (&pfd->list, &priv->janitor_fds); - pthread_cond_signal (&priv->janitor_cond); - } - pthread_mutex_unlock (&priv->janitor_lock); - -out: - return 0; -} - - -int32_t -posix_readlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, size_t size, dict_t *xdata) -{ - char * dest = NULL; - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = NULL; - struct iatt stbuf = {0,}; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - - dest = alloca (size + 1); - - MAKE_INODE_HANDLE (real_path, this, loc, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "lstat on %s failed", - loc->path ? loc->path : "<null>"); - goto out; - } - - op_ret = sys_readlink (real_path, dest, size); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_READYLINK_FAILED, - "readlink on %s failed", real_path); - goto out; - } - - dest[op_ret] = 0; -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, dest, &stbuf, NULL); - - return 0; -} - - -int -posix_mknod (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata) -{ - int tmp_fd = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = 0; - char *par_path = 0; - struct iatt stbuf = { 0, }; - struct posix_private *priv = NULL; - gid_t gid = 0; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; - void * uuid_req = NULL; - int32_t nlink_samepgfid = 0; - char *pgfid_xattr_key = NULL; - gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; - gf_boolean_t linked = _gf_false; - gf_loglevel_t level = GF_LOG_NONE; - mode_t mode_bit = 0; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno, - out); - MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL); - - mode_bit = (priv->create_mask & mode) | priv->force_create_mode; - mode = override_umask (mode, mode_bit); - - gid = frame->root->gid; - - SET_FS_ID (frame->root->uid, gid); - DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); - if (!real_path || !par_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - - op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "pre-operation lstat on parent of %s failed", - real_path); - goto out; - } - - if (preparent.ia_prot.sgid) { - gid = preparent.ia_gid; - } - - /* Check if the 'gfid' already exists, because this mknod may be an - internal call from distribute for creating 'linkfile', and that - linkfile may be for a hardlinked file */ - if (dict_get (xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { - dict_del (xdata, GLUSTERFS_INTERNAL_FOP_KEY); - op_ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); - if (op_ret) { - gf_msg_debug (this->name, 0, "failed to get the gfid from " - "dict for %s", loc->path); - goto real_op; - } - op_ret = posix_create_link_if_gfid_exists (this, uuid_req, - real_path, - loc->inode->table); - if (!op_ret) { - linked = _gf_true; - goto post_op; - } - } - -real_op: -#ifdef __NetBSD__ - if (S_ISFIFO(mode)) - op_ret = mkfifo (real_path, mode); - else -#endif /* __NetBSD__ */ - op_ret = sys_mknod (real_path, mode, dev); - - if (op_ret == -1) { - op_errno = errno; - if ((op_errno == EINVAL) && S_ISREG (mode)) { - /* Over Darwin, mknod with (S_IFREG|mode) - doesn't work */ - tmp_fd = sys_creat (real_path, mode); - if (tmp_fd == -1) { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_CREATE_FAILED, "create failed on" - "%s", real_path); - goto out; - } - sys_close (tmp_fd); - } else { - if (op_errno == EEXIST) - level = GF_LOG_DEBUG; - else - level = GF_LOG_ERROR; - gf_msg (this->name, level, errno, P_MSG_MKNOD_FAILED, - "mknod on %s failed", real_path); - goto out; - } - } - - entry_created = _gf_true; - -#ifndef HAVE_SET_FSID - op_ret = sys_lchown (real_path, frame->root->uid, gid); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED, - "lchown on %s failed", real_path); - goto out; - } -#endif - -post_op: - op_ret = posix_acl_xattr_set (this, real_path, xdata); - if (op_ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_ACL_FAILED, - "setting ACLs on %s failed", real_path); - } - - if (priv->update_pgfid_nlinks) { - MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, - loc->pargfid); - nlink_samepgfid = 1; - - SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid, - XATTR_CREATE, op_ret, this, ignore); - } - - if (priv->gfid2path) { - posix_set_gfid2path_xattr (this, real_path, loc->pargfid, - loc->name); - } - -ignore: - op_ret = posix_entry_create_xattr_set (this, real_path, xdata); - if (op_ret) { - if (errno != EEXIST) - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_XATTR_FAILED, - "setting xattrs on %s failed", real_path); - else - gf_msg_debug (this->name, 0, - "setting xattrs on %s failed", real_path); - } - - if (!linked) { - op_ret = posix_gfid_set (this, real_path, loc, xdata); - if (op_ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_GFID_FAILED, - "setting gfid on %s failed", real_path); - } else { - gfid_set = _gf_true; - } - } - - op_ret = posix_pstat (this, NULL, real_path, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_MKNOD_FAILED, - "mknod on %s failed", real_path); - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "post-operation lstat on parent %s failed", - par_path); - goto out; - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, - (loc)?loc->inode:NULL, &stbuf, &preparent, - &postparent, NULL); - - if (op_ret < 0) { - if (entry_created) { - if (S_ISREG (mode)) - sys_unlink (real_path); - else - sys_rmdir (real_path); - } - - if (gfid_set) - posix_gfid_unset (this, xdata); - } - - return 0; -} - -int -posix_mkdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = NULL, *gfid_path = NULL; - char *par_path = NULL, *xattr_name = NULL; - struct iatt stbuf = {0, }; - struct posix_private *priv = NULL; - gid_t gid = 0; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; - gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; - void *uuid_req = NULL; - ssize_t size = 0; - dict_t *xdata_rsp = NULL; - void *disk_xattr = NULL; - data_t *arg_data = NULL; - char pgfid[GF_UUID_BUF_SIZE] = {0}; - char value_buf[4096] = {0,}; - gf_boolean_t have_val = _gf_false; - mode_t mode_bit = 0; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - /* The Hidden directory should be for housekeeping purpose and it - should not get created from a user request */ - if (__is_root_gfid (loc->pargfid) && - (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { - gf_msg (this->name, GF_LOG_WARNING, EPERM, - P_MSG_MKDIR_NOT_PERMITTED, "mkdir issued on %s, which" - "is not permitted", GF_HIDDEN_PATH); - op_errno = EPERM; - op_ret = -1; - goto out; - } - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno, - out); - DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); - - MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, NULL); - if (!real_path || !par_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - if (loc->parent) - gf_uuid_unparse (loc->parent->gfid, pgfid); - else - gf_uuid_unparse (loc->pargfid, pgfid); - - gid = frame->root->gid; - - op_ret = posix_pstat (this, NULL, real_path, &stbuf); - - SET_FS_ID (frame->root->uid, gid); - - mode_bit = (priv->create_directory_mask & mode) - | priv->force_directory_mode; - mode = override_umask (mode, mode_bit); - - if (xdata) { - op_ret = dict_get_ptr (xdata, "gfid-req", &uuid_req); - if (!op_ret && !gf_uuid_compare (stbuf.ia_gfid, uuid_req)) { - op_ret = -1; - op_errno = EEXIST; - goto out; - } - } - - if (uuid_req && !gf_uuid_is_null (uuid_req)) { - op_ret = posix_istat (this, uuid_req, NULL, &stbuf); - if ((op_ret == 0) && IA_ISDIR (stbuf.ia_type)) { - size = posix_handle_path (this, uuid_req, NULL, NULL, - 0); - if (size > 0) - gfid_path = alloca (size); - - if (gfid_path) - posix_handle_path (this, uuid_req, NULL, - gfid_path, size); - - if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) { - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_DIR_OF_SAME_ID, "mkdir (%s): " - "gfid (%s) is already associated with " - "directory (%s). Hence, both " - "directories will share same gfid and " - "this can lead to inconsistencies.", - loc->path, uuid_utoa (uuid_req), - gfid_path ? gfid_path : "<NULL>"); - - gf_event (EVENT_POSIX_SAME_GFID, "gfid=%s;" - "path=%s;newpath=%s;brick=%s:%s", - uuid_utoa (uuid_req), - gfid_path ? gfid_path : "<NULL>", - loc->path, priv->hostname, - priv->base_path); - } - if (!posix_symlinks_match (this, loc, uuid_req)) - /* For afr selfheal of dir renames, we need to - * remove the old symlink in order for - * posix_gfid_set to set the symlink to the - * new dir.*/ - posix_handle_unset (this, stbuf.ia_gfid, NULL); - } - } else if (!uuid_req && frame->root->pid != GF_SERVER_PID_TRASH) { - op_ret = -1; - op_errno = EPERM; - gf_msg_callingfn (this->name, GF_LOG_WARNING, op_errno, - P_MSG_NULL_GFID, "mkdir (%s): is issued without " - "gfid-req %p", loc->path, xdata); - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "pre-operation lstat on parent %s failed", - par_path); - goto out; - } - - if (preparent.ia_prot.sgid) { - gid = preparent.ia_gid; - mode |= S_ISGID; - } - - op_ret = dict_get_str (xdata, GF_PREOP_PARENT_KEY, &xattr_name); - if (xattr_name != NULL) { - arg_data = dict_get (xdata, xattr_name); - if (arg_data) { - size = sys_lgetxattr (par_path, xattr_name, value_buf, - sizeof(value_buf) - 1); - if (size >= 0) { - have_val = _gf_true; - } else { - if (errno == ERANGE) { - gf_msg (this->name, GF_LOG_INFO, errno, - P_MSG_PREOP_CHECK_FAILED, - "mkdir (%s/%s): getxattr on key " - "(%s) path (%s) failed due to " - " buffer overflow", pgfid, - loc->name, xattr_name, - par_path); - size = sys_lgetxattr (par_path, - xattr_name, NULL, - 0); - } - if (size < 0) { - op_ret = -1; - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_PREOP_CHECK_FAILED, - "mkdir (%s/%s): getxattr on key (%s)" - " path (%s) failed ", pgfid, - loc->name, xattr_name, - par_path); - goto out; - } - } - disk_xattr = alloca (size); - if (disk_xattr == NULL) { - op_ret = -1; - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_PREOP_CHECK_FAILED, - "mkdir (%s/%s): alloca failed during" - " preop of mkdir (%s)", pgfid, - loc->name, real_path); - goto out; - } - if (have_val) { - memcpy (disk_xattr, value_buf, size); - } else { - size = sys_lgetxattr (par_path, xattr_name, - disk_xattr, size); - if (size < 0) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_PREOP_CHECK_FAILED, - "mkdir (%s/%s): getxattr on " - " key (%s) path (%s) failed " - "(%s)", pgfid, loc->name, - xattr_name, par_path, - strerror (errno)); - goto out; - } - } - if ((arg_data->len != size) - || (memcmp (arg_data->data, disk_xattr, size))) { - gf_msg (this->name, GF_LOG_INFO, EIO, - P_MSG_PREOP_CHECK_FAILED, - "mkdir (%s/%s): failing preop of " - "mkdir (%s) as on-disk" - " xattr value differs from argument " - "value for key %s", pgfid, loc->name, - real_path, xattr_name); - op_ret = -1; - op_errno = EIO; - - xdata_rsp = dict_new (); - if (xdata_rsp == NULL) { - gf_msg (this->name, GF_LOG_ERROR, - ENOMEM, - P_MSG_PREOP_CHECK_FAILED, - "mkdir (%s/%s): " - "dict allocation failed", pgfid, - loc->name); - op_errno = ENOMEM; - goto out; - } - - op_errno = dict_set_int8 (xdata_rsp, - GF_PREOP_CHECK_FAILED, 1); - goto out; - } - - dict_del (xdata, xattr_name); - } - - dict_del (xdata, GF_PREOP_PARENT_KEY); - } - - op_ret = sys_mkdir (real_path, mode); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_MKDIR_FAILED, - "mkdir of %s failed", real_path); - goto out; - } - - entry_created = _gf_true; - -#ifndef HAVE_SET_FSID - op_ret = sys_chown (real_path, frame->root->uid, gid); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_CHOWN_FAILED, - "chown on %s failed", real_path); - goto out; - } -#endif - op_ret = posix_acl_xattr_set (this, real_path, xdata); - if (op_ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_ACL_FAILED, - "setting ACLs on %s failed ", real_path); - } - - op_ret = posix_entry_create_xattr_set (this, real_path, xdata); - if (op_ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, - "setting xattrs on %s failed", real_path); - } - - op_ret = posix_gfid_set (this, real_path, loc, xdata); - if (op_ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_GFID_FAILED, - "setting gfid on %s failed", real_path); - } else { - gfid_set = _gf_true; - } - - op_ret = posix_pstat (this, NULL, real_path, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "lstat on %s failed", real_path); - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "post-operation lstat on parent of %s failed", - real_path); - goto out; - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, - (loc)?loc->inode:NULL, &stbuf, &preparent, - &postparent, xdata_rsp); - - if (op_ret < 0) { - if (entry_created) - sys_rmdir (real_path); - - if (gfid_set) - posix_gfid_unset (this, xdata); - } - - if (xdata_rsp) - dict_unref (xdata_rsp); - - return 0; -} - -int -posix_add_unlink_to_ctx (inode_t *inode, xlator_t *this, char *unlink_path) -{ - uint64_t ctx = GF_UNLINK_FALSE; - int ret = 0; - - if (!unlink_path) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - P_MSG_UNLINK_FAILED, - "Creation of unlink entry failed for gfid: %s", - unlink_path); - ret = -1; - goto out; - } - - ctx = GF_UNLINK_TRUE; - ret = posix_inode_ctx_set_unlink_flag (inode, this, ctx); - if (ret < 0) { - goto out; - } - -out: - return ret; -} - -int32_t -posix_move_gfid_to_unlink (xlator_t *this, uuid_t gfid, loc_t *loc) -{ - char *unlink_path = NULL; - char *gfid_path = NULL; - int ret = 0; - struct posix_private *priv_posix = NULL; - - priv_posix = (struct posix_private *) this->private; - - MAKE_HANDLE_GFID_PATH (gfid_path, this, gfid, NULL); - - POSIX_GET_FILE_UNLINK_PATH (priv_posix->base_path, - loc->inode->gfid, unlink_path); - if (!unlink_path) { - ret = -1; - goto out; - } - gf_msg_debug (this->name, 0, - "Moving gfid: %s to unlink_path : %s", - gfid_path, unlink_path); - ret = sys_rename (gfid_path, unlink_path); - if (ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_UNLINK_FAILED, - "Creation of unlink entry failed for gfid: %s", - unlink_path); - goto out; - } - ret = posix_add_unlink_to_ctx (loc->inode, this, unlink_path); - if (ret < 0) - goto out; - -out: - return ret; -} - -int32_t -posix_unlink_gfid_handle_and_entry (xlator_t *this, const char *real_path, - struct iatt *stbuf, int32_t *op_errno, - loc_t *loc, gf_boolean_t get_link_count, - dict_t *rsp_dict) -{ - int32_t ret = 0; - struct iatt prebuf = {0,}; - gf_boolean_t locked = _gf_false; - - /* Unlink the gfid_handle_first */ - if (stbuf && stbuf->ia_nlink == 1) { - - LOCK (&loc->inode->lock); - - if (loc->inode->fd_count == 0) { - UNLOCK (&loc->inode->lock); - ret = posix_handle_unset (this, stbuf->ia_gfid, NULL); - } else { - UNLOCK (&loc->inode->lock); - ret = posix_move_gfid_to_unlink (this, stbuf->ia_gfid, - loc); - } - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_UNLINK_FAILED, "unlink of gfid handle " - "failed for path:%s with gfid %s", - real_path, uuid_utoa (stbuf->ia_gfid)); - } - } - - if (get_link_count) { - LOCK (&loc->inode->lock); - locked = _gf_true; - ret = posix_pstat (this, loc->gfid, real_path, &prebuf); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_LSTAT_FAILED, "lstat on %s failed", - real_path); - goto err; - } - } - - /* Unlink the actual file */ - ret = sys_unlink (real_path); - if (ret == -1) { - if (op_errno) - *op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_UNLINK_FAILED, - "unlink of %s failed", real_path); - goto err; - } - - if (locked) { - UNLOCK (&loc->inode->lock); - locked = _gf_false; - } - - ret = dict_set_uint32 (rsp_dict, GET_LINK_COUNT, prebuf.ia_nlink); - if (ret) - gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, - "failed to set "GET_LINK_COUNT" for %s", real_path); - - return 0; - -err: - if (locked) { - UNLOCK (&loc->inode->lock); - locked = _gf_false; - } - return -1; -} - -static -int32_t posix_set_iatt_in_dict (dict_t *dict, struct iatt *in_stbuf) -{ - int ret = -1; - struct iatt *stbuf = NULL; - int32_t len = sizeof(struct iatt); - - if (!dict || !in_stbuf) - return ret; - - stbuf = GF_CALLOC (1, len, gf_common_mt_char); - if (!stbuf) - return ret; - - memcpy (stbuf, in_stbuf, len); - - ret = dict_set_bin (dict, DHT_IATT_IN_XDATA_KEY, stbuf, len); - if (ret) - GF_FREE (stbuf); - - return ret; -} - -gf_boolean_t -posix_skip_non_linkto_unlink (dict_t *xdata, loc_t *loc, char *key, - const char *linkto_xattr, struct iatt *stbuf, - const char *real_path) -{ - gf_boolean_t skip_unlink = _gf_false; - gf_boolean_t is_dht_linkto_file = _gf_false; - int unlink_if_linkto = 0; - ssize_t xattr_size = -1; - int op_ret = -1; - - op_ret = dict_get_int32 (xdata, key, - &unlink_if_linkto); - - if (!op_ret && unlink_if_linkto) { - - is_dht_linkto_file = IS_DHT_LINKFILE_MODE (stbuf); - if (!is_dht_linkto_file) - return _gf_true; - - LOCK (&loc->inode->lock); - - xattr_size = sys_lgetxattr (real_path, linkto_xattr, NULL, 0); - - if (xattr_size <= 0) - skip_unlink = _gf_true; - - UNLOCK (&loc->inode->lock); - - gf_msg ("posix", GF_LOG_INFO, 0, P_MSG_XATTR_STATUS, - "linkto_xattr status: %"PRIu32" for %s", skip_unlink, - real_path); - } - return skip_unlink; - -} - -int32_t -posix_unlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, int xflag, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = NULL; - char *par_path = NULL; - int32_t fd = -1; - struct iatt stbuf = {0,}; - struct iatt postbuf = {0,}; - struct posix_private *priv = NULL; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; - char *pgfid_xattr_key = NULL; - int32_t nlink_samepgfid = 0; - int32_t check_open_fd = 0; - int32_t skip_unlink = 0; - int32_t fdstat_requested = 0; - dict_t *unwind_dict = NULL; - void *uuid = NULL; - char uuid_str[GF_UUID_BUF_SIZE] = {0}; - char gfid_str[GF_UUID_BUF_SIZE] = {0}; - gf_boolean_t get_link_count = _gf_false; - posix_inode_ctx_t *ctx = NULL; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); - if (!real_path || !par_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "pre-operation lstat on parent %s failed", - par_path); - goto out; - } - - priv = this->private; - - op_ret = dict_get_ptr (xdata, TIER_LINKFILE_GFID, &uuid); - - if (!op_ret && gf_uuid_compare (uuid, stbuf.ia_gfid)) { - op_errno = ENOENT; - op_ret = -1; - gf_uuid_unparse (uuid, uuid_str); - gf_uuid_unparse (stbuf.ia_gfid, gfid_str); - gf_msg_debug (this->name, op_errno, "Mismatch in gfid for path " - "%s. Aborting the unlink. loc->gfid = %s, " - "stbuf->ia_gfid = %s", real_path, - uuid_str, gfid_str); - goto out; - } - - op_ret = dict_get_int32 (xdata, DHT_SKIP_OPEN_FD_UNLINK, - &check_open_fd); - - if (!op_ret && check_open_fd) { - - LOCK (&loc->inode->lock); - - if (loc->inode->fd_count) { - skip_unlink = 1; - } - - UNLOCK (&loc->inode->lock); - - gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_KEY_STATUS_INFO, - "open-fd-key-status: %"PRIu32" for %s", skip_unlink, - real_path); - - if (skip_unlink) { - op_ret = -1; - op_errno = EBUSY; - goto out; - } - } - /* - * If either of the function return true, skip_unlink. - * If first first function itself return true, - * we don't need to call second function, skip unlink. - */ - skip_unlink = posix_skip_non_linkto_unlink (xdata, loc, - DHT_SKIP_NON_LINKTO_UNLINK, - DHT_LINKTO, &stbuf, - real_path); - skip_unlink = skip_unlink || posix_skip_non_linkto_unlink (xdata, loc, - TIER_SKIP_NON_LINKTO_UNLINK, - TIER_LINKTO, &stbuf, - real_path); - if (skip_unlink) { - op_ret = -1; - op_errno = EBUSY; - goto out; - } - - if (IA_ISREG (loc->inode->ia_type) && - xdata && dict_get (xdata, DHT_IATT_IN_XDATA_KEY)) { - fdstat_requested = 1; - } - - if (fdstat_requested || - (priv->background_unlink && IA_ISREG (loc->inode->ia_type))) { - fd = sys_open (real_path, O_RDONLY, 0); - if (fd == -1) { - op_ret = -1; - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_OPEN_FAILED, - "open of %s failed", real_path); - goto out; - } - } - - if (priv->update_pgfid_nlinks && (stbuf.ia_nlink > 1)) { - MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, - loc->pargfid); - op_ret = posix_inode_ctx_get_all (loc->inode, this, &ctx); - if (op_ret < 0) { - op_errno = ENOMEM; - goto out; - } - pthread_mutex_lock (&ctx->pgfid_lock); - { - UNLINK_MODIFY_PGFID_XATTR (real_path, pgfid_xattr_key, - nlink_samepgfid, 0, op_ret, - this, unlock); - } - unlock: - pthread_mutex_unlock (&ctx->pgfid_lock); - - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_XATTR_FAILED, "modification of " - "parent gfid xattr failed (path:%s gfid:%s)", - real_path, uuid_utoa (loc->inode->gfid)); - if (op_errno != ENOATTR) - /* Allow unlink if pgfid xattr is not set. */ - goto out; - } - } - - if (priv->gfid2path && (stbuf.ia_nlink > 1)) { - op_ret = posix_remove_gfid2path_xattr (this, real_path, - loc->pargfid, - loc->name); - if (op_ret < 0) { - /* Allow unlink if pgfid xattr is not set. */ - if (errno != ENOATTR) - goto out; - } - } - - unwind_dict = dict_new (); - if (!unwind_dict) { - op_errno = -ENOMEM; - op_ret = -1; - goto out; - } - - if (xdata && dict_get (xdata, GET_LINK_COUNT)) - get_link_count = _gf_true; - op_ret = posix_unlink_gfid_handle_and_entry (this, real_path, &stbuf, - &op_errno, loc, - get_link_count, - unwind_dict); - if (op_ret == -1) { - goto out; - } - - if (fdstat_requested) { - op_ret = posix_fdstat (this, fd, &postbuf); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_FSTAT_FAILED, "post operation " - "fstat failed on fd=%d", fd); - goto out; - } - op_ret = posix_set_iatt_in_dict (unwind_dict, &postbuf); - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "post-operation lstat on parent %s failed", - par_path); - goto out; - } - - unwind_dict = posix_dict_set_nlink (xdata, unwind_dict, stbuf.ia_nlink); - op_ret = 0; -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, - &preparent, &postparent, unwind_dict); - - if (fd != -1) { - sys_close (fd); - } - - /* unref unwind_dict*/ - if (unwind_dict) { - dict_unref (unwind_dict); - } - - return 0; -} - - -int -posix_rmdir (call_frame_t *frame, xlator_t *this, - loc_t *loc, int flags, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = NULL; - char *par_path = NULL; - char *gfid_str = NULL; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; - struct iatt stbuf = {0,}; - struct posix_private *priv = NULL; - char tmp_path[PATH_MAX] = {0,}; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - - /* The Hidden directory should be for housekeeping purpose and it - should not get deleted from inside process */ - if (__is_root_gfid (loc->pargfid) && - (strcmp (loc->name, GF_HIDDEN_PATH) == 0)) { - gf_msg (this->name, GF_LOG_WARNING, EPERM, - P_MSG_RMDIR_NOT_PERMITTED, "rmdir issued on %s, which" - "is not permitted", GF_HIDDEN_PATH); - op_errno = EPERM; - op_ret = -1; - goto out; - } - - priv = this->private; - - MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); - if (!real_path || !par_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "pre-operation lstat on parent %s failed", - par_path); - goto out; - } - - if (flags) { - gfid_str = uuid_utoa (stbuf.ia_gfid); - - op_ret = sys_mkdir (priv->trash_path, 0755); - if (errno != EEXIST && op_ret == -1) { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_MKDIR_FAILED, - "mkdir of %s failed", priv->trash_path); - } else { - (void) snprintf (tmp_path, sizeof(tmp_path), "%s/%s", - priv->trash_path, gfid_str); - op_ret = sys_rename (real_path, tmp_path); - pthread_cond_signal (&priv->janitor_cond); - } - } else { - op_ret = sys_rmdir (real_path); - } - op_errno = errno; - - if (op_ret == 0) { - if (posix_symlinks_match (this, loc, stbuf.ia_gfid)) - posix_handle_unset (this, stbuf.ia_gfid, NULL); - } - - if (op_errno == EEXIST) - /* Solaris sets errno = EEXIST instead of ENOTEMPTY */ - op_errno = ENOTEMPTY; - - /* No need to log a common error as ENOTEMPTY */ - if (op_ret == -1 && op_errno != ENOTEMPTY) { - gf_msg (this->name, GF_LOG_ERROR, op_errno, P_MSG_RMDIR_FAILED, - "rmdir of %s failed", real_path); - } - - if (op_ret == -1) { - if (op_errno == ENOTEMPTY) { - gf_msg_debug (this->name, 0, "%s on %s failed", (flags) - ? "rename" : "rmdir", real_path); - } else { - gf_msg (this->name, GF_LOG_ERROR, op_errno, - P_MSG_DIR_OPERATION_FAILED, "%s on %s failed", - (flags) ? "rename" : "rmdir", real_path); - } - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "post-operation lstat on parent of %s failed", - par_path); - goto out; - } - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, - &preparent, &postparent, NULL); - - return 0; -} - - -int -posix_symlink (call_frame_t *frame, xlator_t *this, - const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = 0; - char * par_path = 0; - struct iatt stbuf = { 0, }; - struct posix_private *priv = NULL; - gid_t gid = 0; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; - char *pgfid_xattr_key = NULL; - int32_t nlink_samepgfid = 0; - gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (linkname, out); - VALIDATE_OR_GOTO (loc, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno, - out); - DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); - - MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); - - gid = frame->root->gid; - if (!real_path || !par_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - SET_FS_ID (frame->root->uid, gid); - - op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "pre-operation lstat on parent %s failed", - par_path); - goto out; - } - - if (preparent.ia_prot.sgid) { - gid = preparent.ia_gid; - } - - op_ret = sys_symlink (linkname, real_path); - - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_SYMLINK_FAILED, - "symlink of %s --> %s failed", - real_path, linkname); - goto out; - } - - entry_created = _gf_true; - -#ifndef HAVE_SET_FSID - op_ret = sys_lchown (real_path, frame->root->uid, gid); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED, - "lchown failed on %s", real_path); - goto out; - } -#endif - op_ret = posix_acl_xattr_set (this, real_path, xdata); - if (op_ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_ACL_FAILED, - "setting ACLs on %s failed", real_path); - } - - if (priv->update_pgfid_nlinks) { - MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, - loc->pargfid); - nlink_samepgfid = 1; - SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid, - XATTR_CREATE, op_ret, this, ignore); - } - - if (priv->gfid2path) { - posix_set_gfid2path_xattr (this, real_path, loc->pargfid, - loc->name); - } - -ignore: - op_ret = posix_entry_create_xattr_set (this, real_path, xdata); - if (op_ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, - "setting xattrs on %s failed ", real_path); - } - - op_ret = posix_gfid_set (this, real_path, loc, xdata); - if (op_ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_GFID_FAILED, - "setting gfid on %s failed", real_path); - } else { - gfid_set = _gf_true; - } - - op_ret = posix_pstat (this, NULL, real_path, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "lstat failed on %s", real_path); - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "post-operation lstat on parent %s failed", - par_path); - goto out; - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, - (loc)?loc->inode:NULL, &stbuf, &preparent, - &postparent, NULL); - - if (op_ret < 0) { - if (entry_created) - sys_unlink (real_path); - - if (gfid_set) - posix_gfid_unset (this, xdata); - } - - return 0; -} - - -int -posix_rename (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_oldpath = NULL; - char *real_newpath = NULL; - char *par_oldpath = NULL; - char *par_newpath = NULL; - struct iatt stbuf = {0, }; - struct posix_private *priv = NULL; - char was_present = 1; - struct iatt preoldparent = {0, }; - struct iatt postoldparent = {0, }; - struct iatt prenewparent = {0, }; - struct iatt postnewparent = {0, }; - char olddirid[64]; - char newdirid[64]; - uuid_t victim = {0}; - int was_dir = 0; - int nlink = 0; - char *pgfid_xattr_key = NULL; - int32_t nlink_samepgfid = 0; - char *gfid_path = NULL; - dict_t *unwind_dict = NULL; - gf_boolean_t locked = _gf_false; - gf_boolean_t get_link_count = _gf_false; - posix_inode_ctx_t *ctx_old = NULL; - posix_inode_ctx_t *ctx_new = NULL; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (oldloc, out); - VALIDATE_OR_GOTO (newloc, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_ENTRY_HANDLE (real_oldpath, par_oldpath, this, oldloc, NULL); - if (!real_oldpath || !par_oldpath) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf); - if (!real_newpath || !par_newpath) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - unwind_dict = dict_new (); - if (!unwind_dict) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &preoldparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "pre-operation lstat on parent %s failed", - par_oldpath); - goto out; - } - - op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &prenewparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "pre-operation lstat on parent of %s failed", - par_newpath); - goto out; - } - - op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); - if ((op_ret == -1) && (errno == ENOENT)){ - was_present = 0; - } else { - gf_uuid_copy (victim, stbuf.ia_gfid); - if (IA_ISDIR (stbuf.ia_type)) - was_dir = 1; - nlink = stbuf.ia_nlink; - } - - if (was_present && IA_ISDIR(stbuf.ia_type) && !newloc->inode) { - gf_msg (this->name, GF_LOG_WARNING, EEXIST, P_MSG_DIR_FOUND, - "found directory at %s while expecting ENOENT", - real_newpath); - op_ret = -1; - op_errno = EEXIST; - goto out; - } - - if (was_present && IA_ISDIR(stbuf.ia_type) && - gf_uuid_compare (newloc->inode->gfid, stbuf.ia_gfid)) { - gf_msg (this->name, GF_LOG_WARNING, EEXIST, P_MSG_DIR_FOUND, - "found directory %s at %s while renaming %s", - uuid_utoa_r (newloc->inode->gfid, olddirid), - real_newpath, - uuid_utoa_r (stbuf.ia_gfid, newdirid)); - op_ret = -1; - op_errno = EEXIST; - goto out; - } - - op_ret = posix_inode_ctx_get_all (oldloc->inode, this, &ctx_old); - if (op_ret < 0) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - if (newloc->inode) { - op_ret = posix_inode_ctx_get_all (newloc->inode, this, &ctx_new); - if (op_ret < 0) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - } - - if (IA_ISDIR (oldloc->inode->ia_type)) - posix_handle_unset (this, oldloc->inode->gfid, NULL); - - pthread_mutex_lock (&ctx_old->pgfid_lock); - { - if (!IA_ISDIR (oldloc->inode->ia_type) - && priv->update_pgfid_nlinks) { - MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, - PGFID_XATTR_KEY_PREFIX, - oldloc->pargfid); - UNLINK_MODIFY_PGFID_XATTR (real_oldpath, - pgfid_xattr_key, - nlink_samepgfid, 0, - op_ret, - this, unlock); - } - - if ((xdata) && (dict_get (xdata, GET_LINK_COUNT)) - && (real_newpath) && (was_present)) { - pthread_mutex_lock (&ctx_new->pgfid_lock); - locked = _gf_true; - get_link_count = _gf_true; - op_ret = posix_pstat (this, newloc->gfid, real_newpath, - &stbuf); - if ((op_ret == -1) && (errno != ENOENT)) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_LSTAT_FAILED, - "lstat on %s failed", real_newpath); - goto unlock; - } - } - - op_ret = sys_rename (real_oldpath, real_newpath); - if (op_ret == -1) { - op_errno = errno; - if (op_errno == ENOTEMPTY) { - gf_msg_debug (this->name, 0, "rename of %s to" - " %s failed: %s", real_oldpath, - real_newpath, - strerror (op_errno)); - } else { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_RENAME_FAILED, - "rename of %s to %s failed", - real_oldpath, real_newpath); - } - - if (priv->update_pgfid_nlinks - && !IA_ISDIR (oldloc->inode->ia_type)) { - LINK_MODIFY_PGFID_XATTR (real_oldpath, - pgfid_xattr_key, - nlink_samepgfid, 0, - op_ret, - this, unlock); - } - - goto unlock; - } - - if (locked) { - pthread_mutex_unlock (&ctx_new->pgfid_lock); - locked = _gf_false; - } - - if ((get_link_count) && - (dict_set_uint32 (unwind_dict, GET_LINK_COUNT, - stbuf.ia_nlink))) - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_SET_XDATA_FAIL, "failed to set " - GET_LINK_COUNT" for %s", real_newpath); - - if (!IA_ISDIR (oldloc->inode->ia_type) - && priv->update_pgfid_nlinks) { - MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, - PGFID_XATTR_KEY_PREFIX, - newloc->pargfid); - LINK_MODIFY_PGFID_XATTR (real_newpath, - pgfid_xattr_key, - nlink_samepgfid, 0, - op_ret, - this, unlock); - } - - if (!IA_ISDIR (oldloc->inode->ia_type) && priv->gfid2path) { - MAKE_HANDLE_ABSPATH (gfid_path, this, - oldloc->inode->gfid); - - posix_remove_gfid2path_xattr (this, gfid_path, - oldloc->pargfid, - oldloc->name); - posix_set_gfid2path_xattr (this, gfid_path, - newloc->pargfid, - newloc->name); - } - } - -unlock: - if (locked) { - pthread_mutex_unlock (&ctx_new->pgfid_lock); - locked = _gf_false; - } - pthread_mutex_unlock (&ctx_old->pgfid_lock); - - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_FAILED, - "modification of " - "parent gfid xattr failed (gfid:%s)", - uuid_utoa (oldloc->inode->gfid)); - goto out; - } - - if (was_dir) - posix_handle_unset (this, victim, NULL); - - if (was_present && !was_dir && nlink == 1) - posix_handle_unset (this, victim, NULL); - - if (IA_ISDIR (oldloc->inode->ia_type)) { - posix_handle_soft (this, real_newpath, newloc, - oldloc->inode->gfid, NULL); - } - - op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "lstat on %s failed", real_newpath); - goto out; - } - - op_ret = posix_pstat (this, oldloc->pargfid, par_oldpath, &postoldparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "post-operation lstat on parent %s failed", - par_oldpath); - goto out; - } - - op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postnewparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "post-operation lstat on parent %s failed", - par_newpath); - goto out; - } - - if (was_present) - unwind_dict = posix_dict_set_nlink (xdata, unwind_dict, nlink); - op_ret = 0; -out: - - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, &stbuf, - &preoldparent, &postoldparent, - &prenewparent, &postnewparent, unwind_dict); - - if (unwind_dict) - dict_unref (unwind_dict); - - return 0; -} - - -int -posix_link (call_frame_t *frame, xlator_t *this, - loc_t *oldloc, loc_t *newloc, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_oldpath = 0; - char *real_newpath = 0; - char *par_newpath = 0; - struct iatt stbuf = {0, }; - struct posix_private *priv = NULL; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; - int32_t nlink_samepgfid = 0; - char *pgfid_xattr_key = NULL; - gf_boolean_t entry_created = _gf_false; - posix_inode_ctx_t *ctx = NULL; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (oldloc, out); - VALIDATE_OR_GOTO (newloc, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_INODE_HANDLE (real_oldpath, this, oldloc, &stbuf); - if (!real_oldpath) { - op_errno = errno; - goto out; - } - - MAKE_ENTRY_HANDLE (real_newpath, par_newpath, this, newloc, &stbuf); - if (!real_newpath || !par_newpath) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &preparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "lstat failed: %s", par_newpath); - goto out; - } - - - op_ret = sys_link (real_oldpath, real_newpath); - - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LINK_FAILED, - "link %s to %s failed", - real_oldpath, real_newpath); - goto out; - } - - entry_created = _gf_true; - - op_ret = posix_pstat (this, NULL, real_newpath, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "lstat on %s failed", real_newpath); - goto out; - } - - op_ret = posix_pstat (this, newloc->pargfid, par_newpath, &postparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "lstat failed: %s", par_newpath); - goto out; - } - - if (priv->update_pgfid_nlinks) { - MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, - newloc->pargfid); - - op_ret = posix_inode_ctx_get_all (newloc->inode, this, &ctx); - if (op_ret < 0) { - op_errno = ENOMEM; - goto out; - } - - pthread_mutex_lock (&ctx->pgfid_lock); - { - LINK_MODIFY_PGFID_XATTR (real_newpath, pgfid_xattr_key, - nlink_samepgfid, 0, op_ret, - this, unlock); - } - unlock: - pthread_mutex_unlock (&ctx->pgfid_lock); - - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_XATTR_FAILED, "modification of " - "parent gfid xattr failed (path:%s gfid:%s)", - real_newpath, uuid_utoa (newloc->inode->gfid)); - goto out; - } - } - - if (priv->gfid2path) { - if (stbuf.ia_nlink <= MAX_GFID2PATH_LINK_SUP) { - op_ret = posix_set_gfid2path_xattr (this, real_newpath, - newloc->pargfid, - newloc->name); - if (op_ret) { - op_errno = errno; - goto out; - } - } else { - gf_msg (this->name, GF_LOG_INFO, 0, - P_MSG_XATTR_NOTSUP, "Link count exceeded. " - "gfid2path xattr not set (path:%s gfid:%s)", - real_newpath, uuid_utoa (newloc->inode->gfid)); - } - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, - (oldloc)?oldloc->inode:NULL, &stbuf, &preparent, - &postparent, NULL); - - if (op_ret < 0) { - if (entry_created) - sys_unlink (real_newpath); - } - - return 0; -} - - -int32_t -posix_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, - dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = 0; - struct posix_private *priv = NULL; - struct iatt prebuf = {0,}; - struct iatt postbuf = {0,}; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - - MAKE_INODE_HANDLE (real_path, this, loc, &prebuf); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "pre-operation lstat on %s failed", - real_path ? real_path : "<null>"); - goto out; - } - - op_ret = sys_truncate (real_path, offset); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED, - "truncate on %s failed", real_path); - goto out; - } - - op_ret = posix_pstat (this, loc->gfid, real_path, &postbuf); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "lstat on %s failed", real_path); - goto out; - } - - op_ret = 0; -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, - &prebuf, &postbuf, NULL); - - return 0; -} - - -int -posix_create (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, mode_t mode, - mode_t umask, fd_t *fd, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int32_t _fd = -1; - int _flags = 0; - char * real_path = NULL; - char * par_path = NULL; - struct iatt stbuf = {0, }; - struct posix_fd * pfd = NULL; - struct posix_private * priv = NULL; - char was_present = 1; - - gid_t gid = 0; - struct iatt preparent = {0,}; - struct iatt postparent = {0,}; - - int nlink_samepgfid = 0; - char * pgfid_xattr_key = NULL; - gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; - mode_t mode_bit = 0; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (fd, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - GFID_NULL_CHECK_AND_GOTO (frame, this, loc, xdata, op_ret, op_errno, - out); - DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); - - MAKE_ENTRY_HANDLE (real_path, par_path, this, loc, &stbuf); - - gid = frame->root->gid; - - SET_FS_ID (frame->root->uid, gid); - if (!real_path || !par_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &preparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "pre-operation lstat on parent %s failed", - par_path); - goto out; - } - - if (preparent.ia_prot.sgid) { - gid = preparent.ia_gid; - } - - if (!flags) { - _flags = O_CREAT | O_RDWR | O_EXCL; - } - else { - _flags = flags | O_CREAT; - } - - op_ret = posix_pstat (this, NULL, real_path, &stbuf); - if ((op_ret == -1) && (errno == ENOENT)) { - was_present = 0; - } - - if (priv->o_direct) - _flags |= O_DIRECT; - - - mode_bit = (priv->create_mask & mode) | priv->force_create_mode; - mode = override_umask (mode, mode_bit); - _fd = sys_open (real_path, _flags, mode); - - if (_fd == -1) { - op_errno = errno; - op_ret = -1; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_OPEN_FAILED, - "open on %s failed", real_path); - goto out; - } - - if ((_flags & O_CREAT) && (_flags & O_EXCL)) { - entry_created = _gf_true; - } - - - if (was_present) - goto fill_stat; - -#ifndef HAVE_SET_FSID - op_ret = sys_chown (real_path, frame->root->uid, gid); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_CHOWN_FAILED, - "chown on %s failed", real_path); - } -#endif - op_ret = posix_acl_xattr_set (this, real_path, xdata); - if (op_ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_ACL_FAILED, - "setting ACLs on %s failed", real_path); - } - - if (priv->update_pgfid_nlinks) { - MAKE_PGFID_XATTR_KEY (pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, - loc->pargfid); - nlink_samepgfid = 1; - SET_PGFID_XATTR (real_path, pgfid_xattr_key, nlink_samepgfid, - XATTR_CREATE, op_ret, this, ignore); - } - - if (priv->gfid2path) { - posix_set_gfid2path_xattr (this, real_path, loc->pargfid, - loc->name); - } -ignore: - op_ret = posix_entry_create_xattr_set (this, real_path, xdata); - if (op_ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, - "setting xattrs on %s failed ", real_path); - } - -fill_stat: - op_ret = posix_gfid_set (this, real_path, loc, xdata); - if (op_ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_GFID_FAILED, - "setting gfid on %s failed", real_path); - } else { - gfid_set = _gf_true; - } - - op_ret = posix_fdstat (this, _fd, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, - "fstat on %d failed", _fd); - goto out; - } - - op_ret = posix_pstat (this, loc->pargfid, par_path, &postparent); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "post-operation lstat on parent %s failed", - par_path); - goto out; - } - - op_ret = -1; - pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); - if (!pfd) { - op_errno = errno; - goto out; - } - - pfd->flags = flags; - pfd->fd = _fd; - - op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); - if (op_ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_FD_PATH_SETTING_FAILED, - "failed to set the fd context path=%s fd=%p", - real_path, fd); - - LOCK (&priv->lock); - { - priv->nr_files++; - } - UNLOCK (&priv->lock); - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - if ((-1 == op_ret) && (_fd != -1)) { - sys_close (_fd); - } - - STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, - fd, (loc)?loc->inode:NULL, &stbuf, &preparent, - &postparent, xdata); - - if (op_ret < 0) { - if (entry_created) - sys_unlink (real_path); - - if (gfid_set) - posix_gfid_unset (this, xdata); - } - - return 0; -} - -int32_t -posix_open (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = NULL; - int32_t _fd = -1; - struct posix_fd *pfd = NULL; - struct posix_private *priv = NULL; - struct iatt stbuf = {0, }; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (this->private, out); - VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (fd, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - if (flags & O_CREAT) - DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); - - MAKE_INODE_HANDLE (real_path, this, loc, &stbuf); - if (!real_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - if (IA_ISLNK (stbuf.ia_type)) { - op_ret = -1; - op_errno = ELOOP; - goto out; - } - - op_ret = -1; - SET_FS_ID (frame->root->uid, frame->root->gid); - - if (priv->o_direct) - flags |= O_DIRECT; - - _fd = sys_open (real_path, flags, priv->force_create_mode); - if (_fd == -1) { - op_ret = -1; - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FILE_OP_FAILED, - "open on %s, flags: %d", real_path, flags); - goto out; - } - - pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); - if (!pfd) { - op_errno = errno; - goto out; - } - - pfd->flags = flags; - pfd->fd = _fd; - - op_ret = fd_ctx_set (fd, this, (uint64_t)(long)pfd); - if (op_ret) - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_FD_PATH_SETTING_FAILED, - "failed to set the fd context path=%s fd=%p", - real_path, fd); - - LOCK (&priv->lock); - { - priv->nr_files++; - } - UNLOCK (&priv->lock); - - op_ret = 0; - -out: - if (op_ret == -1) { - if (_fd != -1) { - sys_close (_fd); - } - } - - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, NULL); - - return 0; -} - -int -posix_readv (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int _fd = -1; - struct posix_private * priv = NULL; - struct iobuf * iobuf = NULL; - struct iobref * iobref = NULL; - struct iovec vec = {0,}; - struct posix_fd * pfd = NULL; - struct iatt stbuf = {0,}; - int ret = -1; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, - "pfd is NULL from fd=%p", fd); - goto out; - } - - if (!size) { - op_errno = EINVAL; - gf_msg (this->name, GF_LOG_WARNING, EINVAL, - P_MSG_INVALID_ARGUMENT, "size=%"GF_PRI_SIZET, size); - goto out; - } - - iobuf = iobuf_get_page_aligned (this->ctx->iobuf_pool, size, - ALIGN_SIZE); - if (!iobuf) { - op_errno = ENOMEM; - goto out; - } - - _fd = pfd->fd; - op_ret = sys_pread (_fd, iobuf->ptr, size, offset); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_READ_FAILED, "read failed on gfid=%s, " - "fd=%p, offset=%"PRIu64" size=%"GF_PRI_SIZET", " - "buf=%p", uuid_utoa (fd->inode->gfid), fd, - offset, size, iobuf->ptr); - goto out; - } - - LOCK (&priv->lock); - { - priv->read_value += op_ret; - } - UNLOCK (&priv->lock); - - vec.iov_base = iobuf->ptr; - vec.iov_len = op_ret; - - iobref = iobref_new (); - - iobref_add (iobref, iobuf); - - /* - * readv successful, and we need to get the stat of the file - * we read from - */ - - op_ret = posix_fdstat (this, _fd, &stbuf); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, - "fstat failed on fd=%p", fd); - goto out; - } - - /* Hack to notify higher layers of EOF. */ - if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size) - op_errno = ENOENT; - - op_ret = vec.iov_len; -out: - - STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, - &vec, 1, &stbuf, iobref, NULL); - - if (iobref) - iobref_unref (iobref); - if (iobuf) - iobuf_unref (iobuf); - - return 0; -} - - -int32_t -__posix_pwritev (int fd, struct iovec *vector, int count, off_t offset) -{ - int32_t op_ret = 0; - int idx = 0; - int retval = 0; - off_t internal_off = 0; - - if (!vector) - return -EFAULT; - - internal_off = offset; - for (idx = 0; idx < count; idx++) { - retval = sys_pwrite (fd, vector[idx].iov_base, vector[idx].iov_len, - internal_off); - if (retval == -1) { - op_ret = -errno; - goto err; - } - op_ret += retval; - internal_off += retval; - } - -err: - return op_ret; -} - -int32_t -__posix_writev (int fd, struct iovec *vector, int count, off_t startoff, - int odirect) -{ - int32_t op_ret = 0; - int idx = 0; - int max_buf_size = 0; - int retval = 0; - char *buf = NULL; - char *alloc_buf = NULL; - off_t internal_off = 0; - - /* Check for the O_DIRECT flag during open() */ - if (!odirect) - return __posix_pwritev (fd, vector, count, startoff); - - for (idx = 0; idx < count; idx++) { - if (max_buf_size < vector[idx].iov_len) - max_buf_size = vector[idx].iov_len; - } - - alloc_buf = _page_aligned_alloc (max_buf_size, &buf); - if (!alloc_buf) { - op_ret = -errno; - goto err; - } - - internal_off = startoff; - for (idx = 0; idx < count; idx++) { - memcpy (buf, vector[idx].iov_base, vector[idx].iov_len); - - /* not sure whether writev works on O_DIRECT'd fd */ - retval = sys_pwrite (fd, buf, vector[idx].iov_len, internal_off); - if (retval == -1) { - op_ret = -errno; - goto err; - } - - op_ret += retval; - internal_off += retval; - } - -err: - GF_FREE (alloc_buf); - - return op_ret; -} - -dict_t* -_fill_writev_xdata (fd_t *fd, dict_t *xdata, xlator_t *this, int is_append) -{ - dict_t *rsp_xdata = NULL; - int32_t ret = 0; - inode_t *inode = NULL; - - if (fd) - inode = fd->inode; - - if (!fd || !fd->inode || gf_uuid_is_null (fd->inode->gfid)) { - gf_msg_callingfn (this->name, GF_LOG_ERROR, EINVAL, - P_MSG_XATTR_FAILED, "fd: %p inode: %p" - "gfid:%s", fd, inode?inode:0, - inode?uuid_utoa(inode->gfid):"N/A"); - goto out; - } - - if (!xdata) - goto out; - - rsp_xdata = dict_new(); - if (!rsp_xdata) - goto out; - - if (dict_get (xdata, GLUSTERFS_OPEN_FD_COUNT)) { - ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_OPEN_FD_COUNT, - fd->inode->fd_count); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_DICT_SET_FAILED, "%s: Failed to set " - "dictionary value for %s", - uuid_utoa (fd->inode->gfid), - GLUSTERFS_OPEN_FD_COUNT); - } - } - - if (dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) { - ret = dict_set_uint32 (rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, - is_append); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_DICT_SET_FAILED, "%s: Failed to set " - "dictionary value for %s", - uuid_utoa (fd->inode->gfid), - GLUSTERFS_WRITE_IS_APPEND); - } - } -out: - return rsp_xdata; -} - -int32_t -posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t offset, - uint32_t flags, struct iobref *iobref, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int _fd = -1; - struct posix_private * priv = NULL; - struct posix_fd * pfd = NULL; - struct iatt preop = {0,}; - struct iatt postop = {0,}; - int ret = -1; - dict_t *rsp_xdata = NULL; - int is_append = 0; - gf_boolean_t locked = _gf_false; - gf_boolean_t write_append = _gf_false; - gf_boolean_t update_atomic = _gf_false; - posix_inode_ctx_t *ctx = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (vector, out); - VALIDATE_OR_GOTO (this->private, out); - - priv = this->private; - - VALIDATE_OR_GOTO (priv, out); - DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); - - ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, - "pfd is NULL from fd=%p", fd); - goto out; - } - - _fd = pfd->fd; - - if (xdata) { - if (dict_get (xdata, GLUSTERFS_WRITE_IS_APPEND)) - write_append = _gf_true; - if (dict_get (xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) - update_atomic = _gf_true; - } - - /* The write_is_append check and write must happen - atomically. Else another write can overtake this - write after the check and get written earlier. - - So lock before preop-stat and unlock after write. - */ - - /* - * The update_atomic option is to instruct posix to do prestat, - * write and poststat atomically. This is to prevent any modification to - * ia_size and ia_blocks until poststat and the diff in their values - * between pre and poststat could be of use for some translators (shard - * as of today). - */ - - op_ret = posix_inode_ctx_get_all (fd->inode, this, &ctx); - if (op_ret < 0) { - op_errno = ENOMEM; - goto out; - } - - if (write_append || update_atomic) { - locked = _gf_true; - pthread_mutex_lock (&ctx->write_atomic_lock); - } - - op_ret = posix_fdstat (this, _fd, &preop); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, - "pre-operation fstat failed on fd=%p", fd); - goto out; - } - - if (locked && write_append) { - if (preop.ia_size == offset || (fd->flags & O_APPEND)) - is_append = 1; - } - - op_ret = __posix_writev (_fd, vector, count, offset, - (pfd->flags & O_DIRECT)); - - if (locked && (!update_atomic)) { - pthread_mutex_unlock (&ctx->write_atomic_lock); - locked = _gf_false; - } - - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; - gf_msg (this->name, GF_LOG_ERROR, op_errno, P_MSG_WRITE_FAILED, - "write failed: offset %"PRIu64 - ",", offset); - goto out; - } - - rsp_xdata = _fill_writev_xdata (fd, xdata, this, is_append); - /* writev successful, we also need to get the stat of - * the file we wrote to - */ - - ret = posix_fdstat (this, _fd, &postop); - if (ret == -1) { - op_ret = -1; - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_FSTAT_FAILED, - "post-operation fstat failed on fd=%p", - fd); - goto out; - } - - if (locked) { - pthread_mutex_unlock (&ctx->write_atomic_lock); - locked = _gf_false; - } - - if (flags & (O_SYNC|O_DSYNC)) { - ret = sys_fsync (_fd); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_WRITEV_FAILED, - "fsync() in writev on fd %d failed", - _fd); - op_ret = -1; - op_errno = errno; - goto out; - } - } - - LOCK (&priv->lock); - { - priv->write_value += op_ret; - } - UNLOCK (&priv->lock); - -out: - - if (locked) { - pthread_mutex_unlock (&ctx->write_atomic_lock); - locked = _gf_false; - } - - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, &preop, &postop, - rsp_xdata); - - if (rsp_xdata) - dict_unref (rsp_xdata); - return 0; -} - - -int32_t -posix_statfs (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xdata) -{ - char * real_path = NULL; - int32_t op_ret = -1; - int32_t op_errno = 0; - struct statvfs buf = {0, }; - struct posix_private * priv = NULL; - int shared_by = 1; - int percent = 0; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (this->private, out); - - MAKE_INODE_HANDLE (real_path, this, loc, NULL); - if (!real_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - priv = this->private; - - op_ret = sys_statvfs (real_path, &buf); - - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED, - "statvfs failed on %s", real_path); - goto out; - } - - percent = priv->disk_reserve; - buf.f_bfree = (buf.f_bfree - ((buf.f_blocks * percent) / 100)); - - shared_by = priv->shared_brick_count; - if (shared_by > 1) { - buf.f_blocks /= shared_by; - buf.f_bfree /= shared_by; - buf.f_bavail /= shared_by; - buf.f_files /= shared_by; - buf.f_ffree /= shared_by; - buf.f_favail /= shared_by; - } - - if (!priv->export_statfs) { - buf.f_blocks = 0; - buf.f_bfree = 0; - buf.f_bavail = 0; - buf.f_files = 0; - buf.f_ffree = 0; - buf.f_favail = 0; - } - - op_ret = 0; - -out: - STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, &buf, NULL); - return 0; -} - - -int32_t -posix_flush (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int ret = -1; - struct posix_fd *pfd = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, - "pfd is NULL on fd=%p", fd); - goto out; - } - - op_ret = 0; - -out: - STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL); - - return 0; -} - - -int32_t -posix_release (xlator_t *this, fd_t *fd) -{ - struct posix_private * priv = NULL; - struct posix_fd * pfd = NULL; - int ret = -1; - uint64_t tmp_pfd = 0; - - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - priv = this->private; - - ret = fd_ctx_del (fd, this, &tmp_pfd); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, - "pfd is NULL from fd=%p", fd); - goto out; - } - pfd = (struct posix_fd *)(long)tmp_pfd; - - if (pfd->dir) { - gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_DIR_NOT_NULL, - "pfd->dir is %p (not NULL) for file fd=%p", - pfd->dir, fd); - } - - pthread_mutex_lock (&priv->janitor_lock); - { - INIT_LIST_HEAD (&pfd->list); - list_add_tail (&pfd->list, &priv->janitor_fds); - pthread_cond_signal (&priv->janitor_cond); - } - pthread_mutex_unlock (&priv->janitor_lock); - - LOCK (&priv->lock); - { - priv->nr_files--; - } - UNLOCK (&priv->lock); - -out: - return 0; -} - - -int -posix_batch_fsync (call_frame_t *frame, xlator_t *this, - fd_t *fd, int datasync, dict_t *xdata) -{ - call_stub_t *stub = NULL; - struct posix_private *priv = NULL; - - priv = this->private; - - stub = fop_fsync_stub (frame, default_fsync, fd, datasync, xdata); - if (!stub) { - STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0); - return 0; - } - - pthread_mutex_lock (&priv->fsync_mutex); - { - list_add_tail (&stub->list, &priv->fsyncs); - priv->fsync_queue_count++; - pthread_cond_signal (&priv->fsync_cond); - } - pthread_mutex_unlock (&priv->fsync_mutex); - - return 0; -} - - -int32_t -posix_fsync (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t datasync, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int _fd = -1; - struct posix_fd * pfd = NULL; - int ret = -1; - struct iatt preop = {0,}; - struct iatt postop = {0,}; - struct posix_private *priv = NULL; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - -#ifdef GF_DARWIN_HOST_OS - /* Always return success in case of fsync in MAC OS X */ - op_ret = 0; - goto out; -#endif - - priv = this->private; - - if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) { - posix_batch_fsync (frame, this, fd, datasync, xdata); - return 0; - } - - ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, - "pfd not found in fd's ctx"); - goto out; - } - - _fd = pfd->fd; - - op_ret = posix_fdstat (this, _fd, &preop); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED, - "pre-operation fstat failed on fd=%p", fd); - goto out; - } - - if (datasync) { - op_ret = sys_fdatasync (_fd); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_FSYNC_FAILED, "fdatasync on fd=%p" - "failed:", fd); - goto out; - } - } else { - op_ret = sys_fsync (_fd); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_FSYNC_FAILED, "fsync on fd=%p " - "failed", fd); - goto out; - } - } - - op_ret = posix_fdstat (this, _fd, &postop); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_WARNING, errno, P_MSG_FSTAT_FAILED, - "post-operation fstat failed on fd=%p", fd); - goto out; - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, &preop, &postop, - NULL); - - return 0; -} - -static int gf_posix_xattr_enotsup_log; -static int -_handle_setxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, - void *tmp) -{ - posix_xattr_filler_t *filler = NULL; - - filler = tmp; - - return posix_handle_pair (filler->this, filler->real_path, k, v, - filler->flags, filler->stbuf); -} - -#ifdef GF_DARWIN_HOST_OS -static int -map_xattr_flags(int flags) -{ - /* DARWIN has different defines on XATTR_ flags. - There do not seem to be a POSIX standard - Parse any other flags over. - */ - int darwinflags = flags & ~(GF_XATTR_CREATE | GF_XATTR_REPLACE | XATTR_REPLACE); - if (GF_XATTR_CREATE & flags) - darwinflags |= XATTR_CREATE; - if (GF_XATTR_REPLACE & flags) - darwinflags |= XATTR_REPLACE; - return darwinflags; -} -#endif - -int32_t -posix_setxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *dict, int flags, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char * real_path = NULL; - char *acl_xattr = NULL; - struct iatt stbuf = {0}; - int32_t ret = 0; - ssize_t acl_size = 0; - dict_t *xattr = NULL; - posix_xattr_filler_t filler = {0,}; - struct posix_private *priv = NULL; - - DECLARE_OLD_FS_ID_VAR; - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - VALIDATE_OR_GOTO (dict, out); - - priv = this->private; - DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); - - MAKE_INODE_HANDLE (real_path, this, loc, NULL); - if (!real_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - - posix_pstat(this, loc->gfid, real_path, &stbuf); - - op_ret = -1; - - dict_del (dict, GFID_XATTR_KEY); - dict_del (dict, GF_XATTR_VOL_ID_KEY); - /* the io-stats-dump key should not reach disk */ - dict_del (dict, GF_XATTR_IOSTATS_DUMP_KEY); - - filler.real_path = real_path; - filler.this = this; - filler.stbuf = &stbuf; - -#ifdef GF_DARWIN_HOST_OS - filler.flags = map_xattr_flags(flags); -#else - filler.flags = flags; -#endif - op_ret = dict_foreach (dict, _handle_setxattr_keyvalue_pair, - &filler); - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; - goto out; - } - - xattr = dict_new(); - if (!xattr) - goto out; - -/* - * FIXFIX: Send the stbuf info in the xdata for now - * This is used by DHT to redirect FOPs if the file is being migrated - * Ignore errors for now - */ - if (xdata && dict_get (xdata, DHT_IATT_IN_XDATA_KEY)) { - ret = posix_pstat(this, loc->gfid, real_path, &stbuf); - if (ret) - goto out; - - ret = posix_set_iatt_in_dict (xattr, &stbuf); - } - -/* - * ACL can be set on a file/folder using GF_POSIX_ACL_*_KEY xattrs which - * won't aware of access-control xlator. To update its context correctly, - * POSIX_ACL_*_XATTR stored in xdata which is send in the call_back path. - */ - if (dict_get (dict, GF_POSIX_ACL_ACCESS)) { - - /* - * The size of buffer will be know after calling sys_lgetxattr, - * so first we allocate buffer with large size(~4k), then we - * reduced into required size using GF_REALLO(). - */ - acl_xattr = GF_CALLOC (1, ACL_BUFFER_MAX, gf_posix_mt_char); - if (!acl_xattr) - goto out; - - acl_size = sys_lgetxattr (real_path, POSIX_ACL_ACCESS_XATTR, - acl_xattr, ACL_BUFFER_MAX); - - if (acl_size < 0) { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_XATTR_FAILED, "Posix acl is not set " - "properly at the backend"); - goto out; - } - - /* If acl_size is more than max buffer size, just ignore it */ - if (acl_size >= ACL_BUFFER_MAX) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - P_MSG_BUFFER_OVERFLOW, "size of acl is more" - "than the buffer"); - goto out; - } - - acl_xattr = GF_REALLOC (acl_xattr, acl_size); - if (!acl_xattr) - goto out; - - ret = dict_set_bin (xattr, POSIX_ACL_ACCESS_XATTR, - acl_xattr, acl_size); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_SET_XDATA_FAIL, "failed to set" - "xdata for acl"); - GF_FREE (acl_xattr); - goto out; - } - } - - if (dict_get (dict, GF_POSIX_ACL_DEFAULT)) { - - acl_xattr = GF_CALLOC (1, ACL_BUFFER_MAX, gf_posix_mt_char); - if (!acl_xattr) - goto out; - - acl_size = sys_lgetxattr (real_path, POSIX_ACL_DEFAULT_XATTR, - acl_xattr, ACL_BUFFER_MAX); - - if (acl_size < 0) { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_XATTR_FAILED, "Posix acl is not set " - "properly at the backend"); - goto out; - } - - if (acl_size >= ACL_BUFFER_MAX) { - gf_msg (this->name, GF_LOG_WARNING, ENOMEM, - P_MSG_BUFFER_OVERFLOW, "size of acl is more" - "than the buffer"); - goto out; - } - - acl_xattr = GF_REALLOC (acl_xattr, acl_size); - if (!acl_xattr) - goto out; - - ret = dict_set_bin (xattr, POSIX_ACL_DEFAULT_XATTR, - acl_xattr, acl_size); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_SET_XDATA_FAIL, "failed to set" - "xdata for acl"); - GF_FREE (acl_xattr); - goto out; - } - } -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xattr); - - if (xattr) - dict_unref (xattr); - - return 0; -} - - -int -posix_xattr_get_real_filename (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *key, dict_t *dict, dict_t *xdata) -{ - int ret = -1; - int op_ret = -1; - const char *fname = NULL; - char *real_path = NULL; - char *found = NULL; - DIR *fd = NULL; - struct dirent *entry = NULL; - struct dirent scratch[2] = {{0,},}; - - MAKE_INODE_HANDLE (real_path, this, loc, NULL); - if (!real_path) { - return -ESTALE; - } - if (op_ret == -1) { - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "posix_xattr_get_real_filename (lstat) on %s failed", - real_path); - return -errno; - } - - fd = sys_opendir (real_path); - if (!fd) - return -errno; - - fname = key + strlen (GF_XATTR_GET_REAL_FILENAME_KEY); - - for (;;) { - errno = 0; - entry = sys_readdir (fd, scratch); - if (!entry || errno != 0) - break; - - if (strcasecmp (entry->d_name, fname) == 0) { - found = gf_strdup (entry->d_name); - if (!found) { - (void) sys_closedir (fd); - return -ENOMEM; - } - break; - } - } - - (void) sys_closedir (fd); - - if (!found) - return -ENOENT; - - ret = dict_set_dynstr (dict, (char *)key, found); - if (ret) { - GF_FREE (found); - return -ENOMEM; - } - ret = strlen (found) + 1; - - return ret; -} - -int -posix_get_ancestry_directory (xlator_t *this, inode_t *leaf_inode, - gf_dirent_t *head, char **path, int type, - int32_t *op_errno, dict_t *xdata) -{ - ssize_t handle_size = 0; - struct posix_private *priv = NULL; - inode_t *inode = NULL; - int ret = -1; - char dirpath[PATH_MAX] = {0,}; - - priv = this->private; - - handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); - - ret = posix_make_ancestryfromgfid (this, dirpath, PATH_MAX + 1, head, - type | POSIX_ANCESTRY_PATH, - leaf_inode->gfid, - handle_size, priv->base_path, - leaf_inode->table, &inode, xdata, - op_errno); - if (ret < 0) - goto out; - - - /* there is already a reference in loc->inode */ - inode_unref (inode); - - if ((type & POSIX_ANCESTRY_PATH) && (path != NULL)) { - if (strcmp (dirpath, "/")) - dirpath[strlen (dirpath) - 1] = '\0'; - - *path = gf_strdup (dirpath); - } - -out: - return ret; -} - -int32_t -posix_links_in_same_directory (char *dirpath, int count, inode_t *leaf_inode, - inode_t *parent, struct stat *stbuf, - gf_dirent_t *head, char **path, - int type, dict_t *xdata, int32_t *op_errno) -{ - int op_ret = -1; - gf_dirent_t *gf_entry = NULL; - xlator_t *this = NULL; - struct posix_private *priv = NULL; - DIR *dirp = NULL; - struct dirent *entry = NULL; - struct dirent scratch[2] = {{0,},}; - char temppath[PATH_MAX] = {0,}; - char scr[PATH_MAX * 4] = {0,}; - - this = THIS; - - priv = this->private; - - dirp = sys_opendir (dirpath); - if (!dirp) { - *op_errno = errno; - gf_msg (this->name, GF_LOG_WARNING, errno, P_MSG_OPEN_FAILED, - "could not opendir %s", dirpath); - goto out; - } - - while (count > 0) { - errno = 0; - entry = sys_readdir (dirp, scratch); - if (!entry || errno != 0) - break; - - if (entry->d_ino != stbuf->st_ino) - continue; - - /* Linking an inode here, can cause a race in posix_acl. - Parent inode gets linked here, but before - it reaches posix_acl_readdirp_cbk, create/lookup can - come on a leaf-inode, as parent-inode-ctx not yet updated - in posix_acl_readdirp_cbk, create and lookup can fail - with EACCESS. So do the inode linking in the quota xlator - - linked_inode = inode_link (leaf_inode, parent, - entry->d_name, NULL); - - GF_ASSERT (linked_inode == leaf_inode); - inode_unref (linked_inode);*/ - - if (type & POSIX_ANCESTRY_DENTRY) { - loc_t loc = {0, }; - - loc.inode = inode_ref (leaf_inode); - gf_uuid_copy (loc.gfid, leaf_inode->gfid); - - (void) snprintf (temppath, sizeof(temppath), "%s/%s", - dirpath, entry->d_name); - - gf_entry = gf_dirent_for_name (entry->d_name); - gf_entry->inode = inode_ref (leaf_inode); - gf_entry->dict - = posix_xattr_fill (this, temppath, &loc, NULL, - -1, xdata, NULL); - iatt_from_stat (&(gf_entry->d_stat), stbuf); - - list_add_tail (&gf_entry->list, &head->list); - loc_wipe (&loc); - } - - if (type & POSIX_ANCESTRY_PATH) { - (void) snprintf (temppath, sizeof(temppath), "%s/%s", - &dirpath[priv->base_path_length], - entry->d_name); - if (!*path) { - *path = gf_strdup (temppath); - } else { - /* creating a colon separated */ - /* list of hard links */ - (void) snprintf (scr, sizeof(scr), "%s:%s", - *path, temppath); - - GF_FREE (*path); - *path = gf_strdup (scr); - } - if (!*path) { - op_ret = -1; - *op_errno = ENOMEM; - goto out; - } - } - - count--; - } - - op_ret = 0; -out: - if (dirp) { - op_ret = sys_closedir (dirp); - if (op_ret == -1) { - *op_errno = errno; - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_CLOSE_FAILED, "closedir failed"); - } - } - - return op_ret; -} - -int -posix_get_ancestry_non_directory (xlator_t *this, inode_t *leaf_inode, - gf_dirent_t *head, char **path, int type, - int32_t *op_errno, dict_t *xdata) -{ - size_t remaining_size = 0; - int op_ret = -1, pathlen = -1; - ssize_t handle_size = 0; - uuid_t pgfid = {0,}; - int nlink_samepgfid = 0; - struct stat stbuf = {0,}; - char *list = NULL; - int32_t list_offset = 0; - struct posix_private *priv = NULL; - ssize_t size = 0; - inode_t *parent = NULL; - loc_t *loc = NULL; - char *leaf_path = NULL; - char key[4096] = {0,}; - char dirpath[PATH_MAX] = {0,}; - char pgfidstr[UUID_CANONICAL_FORM_LEN+1] = {0,}; - - priv = this->private; - - loc = GF_CALLOC (1, sizeof (*loc), gf_posix_mt_char); - if (loc == NULL) { - op_ret = -1; - *op_errno = ENOMEM; - goto out; - } - - gf_uuid_copy (loc->gfid, leaf_inode->gfid); - - MAKE_INODE_HANDLE (leaf_path, this, loc, NULL); - if (!leaf_path) { - GF_FREE (loc); - *op_errno = ESTALE; - goto out; - } - GF_FREE (loc); - - size = sys_llistxattr (leaf_path, NULL, 0); - if (size == -1) { - *op_errno = errno; - if ((errno == ENOTSUP) || (errno == ENOSYS)) { - GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, - this->name, GF_LOG_WARNING, - "Extended attributes not " - "supported (try remounting brick" - " with 'user_xattr' flag)"); - - } else { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_XATTR_FAILED, "listxattr failed on" - "%s", leaf_path); - - } - - goto out; - } - - if (size == 0) { - op_ret = 0; - goto out; - } - - list = alloca (size); - if (!list) { - *op_errno = errno; - goto out; - } - - size = sys_llistxattr (leaf_path, list, size); - if (size < 0) { - op_ret = -1; - *op_errno = errno; - goto out; - } - remaining_size = size; - list_offset = 0; - - op_ret = sys_lstat (leaf_path, &stbuf); - if (op_ret == -1) { - *op_errno = errno; - gf_msg (this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED, - "lstat failed on %s", leaf_path); - goto out; - } - - while (remaining_size > 0) { - strncpy (key, list + list_offset, sizeof(key)-1); - key[sizeof(key)-1] = '\0'; - if (strncmp (key, PGFID_XATTR_KEY_PREFIX, - strlen (PGFID_XATTR_KEY_PREFIX)) != 0) - goto next; - - op_ret = sys_lgetxattr (leaf_path, key, - &nlink_samepgfid, - sizeof(nlink_samepgfid)); - if (op_ret == -1) { - *op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_XATTR_FAILED, "getxattr failed on " - "%s: key = %s ", leaf_path, key); - goto out; - } - - nlink_samepgfid = ntoh32 (nlink_samepgfid); - - strncpy (pgfidstr, key + strlen(PGFID_XATTR_KEY_PREFIX), - sizeof(pgfidstr)-1); - pgfidstr[sizeof(pgfidstr)-1] = '\0'; - gf_uuid_parse (pgfidstr, pgfid); - - handle_size = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); - - /* constructing the absolute real path of parent dir */ - strncpy (dirpath, priv->base_path, sizeof(dirpath)-1); - dirpath[sizeof(dirpath)-1] = '\0'; - pathlen = PATH_MAX + 1 - priv->base_path_length; - - op_ret = posix_make_ancestryfromgfid (this, - dirpath + priv->base_path_length, - pathlen, - head, - type | POSIX_ANCESTRY_PATH, - pgfid, - handle_size, - priv->base_path, - leaf_inode->table, - &parent, xdata, op_errno); - if (op_ret < 0) { - goto next; - } - - dirpath[strlen (dirpath) - 1] = '\0'; - - posix_links_in_same_directory (dirpath, nlink_samepgfid, - leaf_inode, parent, &stbuf, head, - path, type, xdata, op_errno); - - if (parent != NULL) { - inode_unref (parent); - parent = NULL; - } - - next: - remaining_size -= strlen (key) + 1; - list_offset += strlen (key) + 1; - } /* while (remaining_size > 0) */ - - op_ret = 0; - -out: - return op_ret; -} - -int -posix_get_ancestry (xlator_t *this, inode_t *leaf_inode, - gf_dirent_t *head, char **path, int type, int32_t *op_errno, - dict_t *xdata) -{ - int ret = -1; - struct posix_private *priv = NULL; - - priv = this->private; - - if (IA_ISDIR (leaf_inode->ia_type)) { - ret = posix_get_ancestry_directory (this, leaf_inode, - head, path, type, op_errno, - xdata); - } else { - - if (!priv->update_pgfid_nlinks) - goto out; - ret = posix_get_ancestry_non_directory (this, leaf_inode, - head, path, type, - op_errno, xdata); - } - -out: - if (ret && path && *path) { - GF_FREE (*path); - *path = NULL; - } - - return ret; -} - -/** - * posix_getxattr - this function returns a dictionary with all the - * key:value pair present as xattr. used for - * both 'listxattr' and 'getxattr'. - */ -int32_t -posix_getxattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) -{ - struct posix_private *priv = NULL; - int32_t op_ret = -1; - int32_t op_errno = 0; - char *value = NULL; - char *real_path = NULL; - dict_t *dict = NULL; - char *file_contents = NULL; - int ret = -1; - char *path = NULL; - char *rpath = NULL; - ssize_t size = 0; - char *list = NULL; - int32_t list_offset = 0; - size_t remaining_size = 0; - char *host_buf = NULL; - char *keybuffer = NULL; - char *value_buf = NULL; - gf_boolean_t have_val = _gf_false; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - MAKE_INODE_HANDLE (real_path, this, loc, NULL); - - op_ret = -1; - priv = this->private; - - ret = posix_handle_georep_xattrs (frame, name, &op_errno, _gf_true); - if (ret == -1) { - op_ret = -1; - goto out; - } - - if (name && posix_is_gfid2path_xattr (name)) { - op_ret = -1; - op_errno = ENOATTR; - goto out; - } - - if (loc->inode && IA_ISDIR(loc->inode->ia_type) && name && - ZR_FILE_CONTENT_REQUEST(name)) { - ret = posix_get_file_contents (this, loc->gfid, &name[15], - &file_contents); - if (ret < 0) { - op_errno = -ret; - gf_msg (this->name, GF_LOG_ERROR, op_errno, - P_MSG_FILE_FAILED, "getting file contents" - "failed"); - goto out; - } - } - - dict = dict_new (); - if (!dict) { - op_errno = ENOMEM; - goto out; - } - - if (loc->inode && name && GF_POSIX_ACL_REQUEST (name)) { - ret = posix_pacl_get (real_path, name, &value); - if (ret || !value) { - op_errno = errno; - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_ACL_FAILED, "could not get acl (%s) for" - "%s", name, real_path); - op_ret = -1; - goto out; - } - - ret = dict_set_dynstr (dict, (char *)name, value); - if (ret < 0) { - GF_FREE (value); - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_ACL_FAILED, "could not set acl (%s) for" - "%s in dictionary", name, real_path); - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - size = ret; - goto done; - } - - if (loc->inode && name && - (strncmp (name, GF_XATTR_GET_REAL_FILENAME_KEY, - strlen (GF_XATTR_GET_REAL_FILENAME_KEY)) == 0)) { - ret = posix_xattr_get_real_filename (frame, this, loc, - name, dict, xdata); - if (ret < 0) { - op_ret = -1; - op_errno = -ret; - if (op_errno == ENOENT) { - gf_msg_debug (this->name, 0, "Failed to get " - "real filename (%s, %s)", - loc->path, name); - } else { - gf_msg (this->name, GF_LOG_WARNING, op_errno, - P_MSG_GETTING_FILENAME_FAILED, - "Failed to get real filename (%s, %s):" - , loc->path, name); - } - goto out; - } - - size = ret; - goto done; - } - - if (loc->inode && name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) { - if (!fd_list_empty (loc->inode)) { - ret = dict_set_uint32 (dict, (char *)name, 1); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_DICT_SET_FAILED, "Failed to set " - "dictionary value for %s", name); - op_errno = ENOMEM; - goto out; - } - } else { - ret = dict_set_uint32 (dict, (char *)name, 0); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_DICT_SET_FAILED, "Failed to set " - "dictionary value for %s", name); - op_errno = ENOMEM; - goto out; - } - } - goto done; - } - if (loc->inode && name && (XATTR_IS_PATHINFO (name))) { - if (LOC_HAS_ABSPATH (loc)) - MAKE_REAL_PATH (rpath, this, loc->path); - else - rpath = real_path; - - size = gf_asprintf (&host_buf, "<POSIX(%s):%s:%s>", - priv->base_path, - ((priv->node_uuid_pathinfo && - !gf_uuid_is_null(priv->glusterd_uuid)) - ? uuid_utoa (priv->glusterd_uuid) - : priv->hostname), rpath); - if (size < 0) { - op_errno = ENOMEM; - goto out; - } - ret = dict_set_dynstr (dict, (char *)name, host_buf); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_DICT_SET_FAILED, "could not set value" - " (%s) in dictionary", host_buf); - GF_FREE (host_buf); - op_errno = ENOMEM; - goto out; - } - - goto done; - } - - if (loc->inode && name && - (strcmp (name, GF_XATTR_NODE_UUID_KEY) == 0) - && !gf_uuid_is_null (priv->glusterd_uuid)) { - size = gf_asprintf (&host_buf, "%s", - uuid_utoa (priv->glusterd_uuid)); - if (size == -1) { - op_errno = ENOMEM; - goto out; - } - ret = dict_set_dynstr (dict, GF_XATTR_NODE_UUID_KEY, - host_buf); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - P_MSG_DICT_SET_FAILED, "could not set value" - "(%s) in dictionary", host_buf); - GF_FREE (host_buf); - op_errno = -ret; - goto out; - } - goto done; - } - - if (loc->inode && name && - (strcmp (name, GFID_TO_PATH_KEY) == 0)) { - ret = inode_path (loc->inode, NULL, &path); - if (ret < 0) { - op_errno = -ret; - gf_msg (this->name, GF_LOG_WARNING, op_errno, - P_MSG_INODE_PATH_GET_FAILED, - "%s: could not get " - "inode path", uuid_utoa (loc->inode->gfid)); - goto out; - } - - size = ret; - ret = dict_set_dynstr (dict, GFID_TO_PATH_KEY, path); - if (ret < 0) { - op_errno = ENOMEM; - GF_FREE (path); - goto out; - } - goto done; - } - - if (loc->inode && name && - (strcmp (name, GFID2PATH_VIRT_XATTR_KEY) == 0)) { - if (!priv->gfid2path) { - op_errno = ENOATTR; - op_ret = -1; - goto out; - } - ret = posix_get_gfid2path (this, loc->inode, real_path, - &op_errno, dict); - if (ret < 0) { - op_ret = -1; - goto out; - } - size = ret; - goto done; - } - - if (loc->inode && name - && (strcmp (name, GET_ANCESTRY_PATH_KEY) == 0)) { - int type = POSIX_ANCESTRY_PATH; - - op_ret = posix_get_ancestry (this, loc->inode, NULL, - &path, type, &op_errno, - xdata); - if (op_ret < 0) { - op_ret = -1; - op_errno = ENODATA; - goto out; - } - size = op_ret; - op_ret = dict_set_dynstr (dict, GET_ANCESTRY_PATH_KEY, path); - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, -op_ret, - P_MSG_GET_KEY_VALUE_FAILED, "could not get " - "value for key (%s)", GET_ANCESTRY_PATH_KEY); - GF_FREE (path); - op_errno = ENOMEM; - goto out; - } - - goto done; - } - - if (loc->inode && name - && (strncmp (name, GLUSTERFS_GET_OBJECT_SIGNATURE, - strlen (GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0)) { - op_ret = posix_get_objectsignature (real_path, dict); - if (op_ret < 0) { - op_errno = -op_ret; - goto out; - } - - goto done; - } - - /* here allocate value_buf of 8192 bytes to avoid one extra getxattr - call,If buffer size is small to hold the xattr result then it will - allocate a new buffer value of required size and call getxattr again - */ - - value_buf = alloca (XATTR_VAL_BUF_SIZE); - if (name) { - char *key = (char *)name; - - keybuffer = key; -#if defined(GF_DARWIN_HOST_OS_DISABLED) - if (priv->xattr_user_namespace == XATTR_STRIP) { - if (strncmp(key, "user.",5) == 0) { - key += 5; - gf_msg_debug (this->name, 0, "getxattr for file %s" - " stripping user key: %s -> %s", - real_path, keybuffer, key); - } - } -#endif - memset (value_buf, '\0', XATTR_VAL_BUF_SIZE); - size = sys_lgetxattr (real_path, key, value_buf, - XATTR_VAL_BUF_SIZE-1); - if (size >= 0) { - have_val = _gf_true; - } else { - if (errno == ERANGE) { - gf_msg (this->name, GF_LOG_INFO, errno, - P_MSG_XATTR_FAILED, - "getxattr failed due to overflow of buffer" - " on %s: %s ", real_path, key); - size = sys_lgetxattr (real_path, key, NULL, 0); - } - if (size == -1) { - op_errno = errno; - if ((op_errno == ENOTSUP) || - (op_errno == ENOSYS)) { - GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, - this->name, - GF_LOG_WARNING, - "Extended attributes not " - "supported (try remounting" - " brick with 'user_xattr' " - "flag)"); - } - if ((op_errno == ENOATTR) || - (op_errno == ENODATA)) { - gf_msg_debug (this->name, 0, - "No such attribute:%s for file %s", - key, real_path); - } else { - gf_msg (this->name, GF_LOG_ERROR, - op_errno, P_MSG_XATTR_FAILED, - "getxattr failed on %s: %s ", - real_path, key); - } - goto out; - } - } - value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); - if (!value) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - if (have_val) { - memcpy (value, value_buf, size); - } else { - size = sys_lgetxattr (real_path, key, value, size); - if (size == -1) { - op_ret = -1; - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_XATTR_FAILED, - "getxattr failed on %s: key = %s", - real_path, key); - GF_FREE (value); - goto out; - } - } - value [size] = '\0'; - op_ret = dict_set_dynptr (dict, key, value, size); - if (op_ret < 0) { - op_errno = -op_ret; - gf_msg (this->name, GF_LOG_ERROR, op_errno, - P_MSG_DICT_SET_FAILED, "dict set operation " - "on %s for the key %s failed.", real_path, key); - GF_FREE (value); - goto out; - } - - goto done; - } - - have_val = _gf_false; - memset (value_buf, '\0', XATTR_VAL_BUF_SIZE); - size = sys_llistxattr (real_path, value_buf, XATTR_VAL_BUF_SIZE-1); - if (size > 0) { - have_val = _gf_true; - } else { - if (errno == ERANGE) { - gf_msg (this->name, GF_LOG_INFO, errno, - P_MSG_XATTR_FAILED, - "listxattr failed due to overflow of buffer" - " on %s ", real_path); - size = sys_llistxattr (real_path, NULL, 0); - } - if (size == -1) { - op_errno = errno; - if ((errno == ENOTSUP) || (errno == ENOSYS)) { - GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, - this->name, GF_LOG_WARNING, - "Extended attributes not " - "supported (try remounting" - " brick with 'user_xattr' " - "flag)"); - } else { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_XATTR_FAILED, - "listxattr failed on %s", real_path); - } - goto out; - } - if (size == 0) - goto done; - } - list = alloca (size); - if (!list) { - op_errno = errno; - goto out; - } - if (have_val) { - memcpy (list, value_buf, size); - } else { - size = sys_llistxattr (real_path, list, size); - if (size < 0) { - op_ret = -1; - op_errno = errno; - goto out; - } - } - remaining_size = size; - list_offset = 0; - keybuffer = alloca (XATTR_KEY_BUF_SIZE); - while (remaining_size > 0) { - strncpy (keybuffer, list + list_offset, XATTR_KEY_BUF_SIZE-1); - keybuffer[XATTR_KEY_BUF_SIZE-1] = '\0'; - - ret = posix_handle_georep_xattrs (frame, keybuffer, NULL, - _gf_false); - if (ret == -1) - goto ignore; - - if (posix_is_gfid2path_xattr (keybuffer)) { - goto ignore; - } - - memset (value_buf, '\0', XATTR_VAL_BUF_SIZE); - have_val = _gf_false; - size = sys_lgetxattr (real_path, keybuffer, value_buf, - XATTR_VAL_BUF_SIZE-1); - if (size >= 0) { - have_val = _gf_true; - } else { - if (errno == ERANGE) { - gf_msg (this->name, GF_LOG_INFO, op_errno, - P_MSG_XATTR_FAILED, - "getxattr failed due to overflow of" - " buffer on %s: %s ", real_path, - keybuffer); - size = sys_lgetxattr (real_path, keybuffer, - NULL, 0); - } - if (size == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_XATTR_FAILED, "getxattr failed on" - " %s: key = %s ", real_path, keybuffer); - goto out; - } - } - value = GF_CALLOC (size + 1, sizeof(char), - gf_posix_mt_char); - if (!value) { - op_errno = errno; - goto out; - } - if (have_val) { - memcpy (value, value_buf, size); - } else { - size = sys_lgetxattr (real_path, keybuffer, value, size); - if (size == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_XATTR_FAILED, "getxattr failed on" - " %s: key = %s ", real_path, keybuffer); - GF_FREE (value); - goto out; - } - } - value [size] = '\0'; -#ifdef GF_DARWIN_HOST_OS - /* The protocol expect namespace for now */ - char *newkey = NULL; - gf_add_prefix (XATTR_USER_PREFIX, keybuffer, &newkey); - strncpy (keybuffer, newkey, sizeof(keybuffer)); - GF_FREE (newkey); -#endif - op_ret = dict_set_dynptr (dict, keybuffer, value, size); - if (op_ret < 0) { - op_errno = -op_ret; - gf_msg (this->name, GF_LOG_ERROR, op_errno, - P_MSG_DICT_SET_FAILED, "dict set operation " - "on %s for the key %s failed.", real_path, - keybuffer); - GF_FREE (value); - goto out; - } - -ignore: - remaining_size -= strlen (keybuffer) + 1; - list_offset += strlen (keybuffer) + 1; - - } /* while (remaining_size > 0) */ - -done: - op_ret = size; - - if (dict) { - dict_del (dict, GFID_XATTR_KEY); - dict_del (dict, GF_XATTR_VOL_ID_KEY); - } - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, NULL); - - if (dict) { - dict_unref (dict); - } - - return 0; -} - - -int32_t -posix_fgetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = EINVAL; - struct posix_fd * pfd = NULL; - int _fd = -1; - int32_t list_offset = 0; - ssize_t size = 0; - size_t remaining_size = 0; - char * value = NULL; - char * list = NULL; - dict_t * dict = NULL; - int ret = -1; - char key[4096] = {0,}; - char *value_buf = NULL; - gf_boolean_t have_val = _gf_false; - - DECLARE_OLD_FS_ID_VAR; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - SET_FS_ID (frame->root->uid, frame->root->gid); - - ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); - if (ret < 0) { - op_ret = -1; - gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, - "pfd is NULL from fd=%p", fd); - goto out; - } - - _fd = pfd->fd; - - /* Get the total size */ - dict = dict_new (); - if (!dict) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - if (name && !strcmp (name, GLUSTERFS_OPEN_FD_COUNT)) { - ret = dict_set_uint32 (dict, (char *)name, 1); - if (ret < 0) { - op_ret = -1; - size = -1; - op_errno = ENOMEM; - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_DICT_SET_FAILED, "Failed to set " - "dictionary value for %s", name); - goto out; - } - goto done; - } - - if (name && strncmp (name, GLUSTERFS_GET_OBJECT_SIGNATURE, - strlen (GLUSTERFS_GET_OBJECT_SIGNATURE)) == 0) { - op_ret = posix_fdget_objectsignature (_fd, dict); - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, 0, - "posix_fdget_objectsignature failed"); - op_errno = -op_ret; - op_ret = -1; - size = -1; - goto out; - } - - goto done; - } - - /* here allocate value_buf of 8192 bytes to avoid one extra getxattr - call,If buffer size is small to hold the xattr result then it will - allocate a new buffer value of required size and call getxattr again - */ - value_buf = alloca (XATTR_VAL_BUF_SIZE); - memset (value_buf, '\0', XATTR_VAL_BUF_SIZE); - - if (name) { - strncpy (key, name, sizeof(key)); -#ifdef GF_DARWIN_HOST_OS - struct posix_private *priv = NULL; - priv = this->private; - if (priv->xattr_user_namespace == XATTR_STRIP) { - char *newkey = NULL; - gf_add_prefix (XATTR_USER_PREFIX, key, &newkey); - strncpy (key, newkey, sizeof(key)); - GF_FREE (newkey); - } -#endif - size = sys_fgetxattr (_fd, key, value_buf, - XATTR_VAL_BUF_SIZE-1); - if (size >= 0) { - have_val = _gf_true; - } else { - if (errno == ERANGE) { - gf_msg (this->name, GF_LOG_INFO, errno, - P_MSG_XATTR_FAILED, - "fgetxattr failed due to overflow of" - "buffer on %s ", key); - size = sys_fgetxattr (_fd, key, NULL, 0); - } - if (size == -1) { - op_errno = errno; - if (errno == ENODATA || errno == ENOATTR) { - gf_msg_debug (this->name, 0, "fgetxattr" - " failed on key %s (%s)", - key, strerror (op_errno)); - } else { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_XATTR_FAILED, "fgetxattr" - " failed on key %s", key); - } - goto done; - } - } - value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char); - if (!value) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - if (have_val) { - memcpy (value, value_buf, size); - } else { - size = sys_fgetxattr (_fd, key, value, size); - if (size == -1) { - op_ret = -1; - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_XATTR_FAILED, "fgetxattr" - " failed on fd %p for the key %s ", - fd, key); - GF_FREE (value); - goto out; - } - } - - value [size] = '\0'; - op_ret = dict_set_dynptr (dict, key, value, size); - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - P_MSG_DICT_SET_FAILED, "dict set operation " - "on key %s failed", key); - GF_FREE (value); - goto out; - } - - goto done; - } - memset (value_buf, '\0', XATTR_VAL_BUF_SIZE); - size = sys_flistxattr (_fd, value_buf, XATTR_VAL_BUF_SIZE-1); - if (size > 0) { - have_val = _gf_true; - } else { - if (errno == ERANGE) { - gf_msg (this->name, GF_LOG_INFO, errno, - P_MSG_XATTR_FAILED, - "listxattr failed due to overflow of buffer" - " on %p ", fd); - size = sys_flistxattr (_fd, NULL, 0); - } - if (size == -1) { - op_ret = -1; - op_errno = errno; - if ((errno == ENOTSUP) || (errno == ENOSYS)) { - GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log, - this->name, GF_LOG_WARNING, - "Extended attributes not " - "supported (try remounting " - "brick with 'user_xattr' flag)"); - } else { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_XATTR_FAILED, "listxattr failed " - "on %p:", fd); - } - goto out; - } - if (size == 0) - goto done; - } - list = alloca (size + 1); - if (!list) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - if (have_val) - memcpy (list, value_buf, size); - else - size = sys_flistxattr (_fd, list, size); - - remaining_size = size; - list_offset = 0; - while (remaining_size > 0) { - if(*(list + list_offset) == '\0') - break; - - strncpy (key, list + list_offset, sizeof(key)); - memset (value_buf, '\0', XATTR_VAL_BUF_SIZE); - have_val = _gf_false; - size = sys_fgetxattr (_fd, key, value_buf, - XATTR_VAL_BUF_SIZE-1); - if (size >= 0) { - have_val = _gf_true; - } else { - if (errno == ERANGE) { - gf_msg (this->name, GF_LOG_INFO, errno, - P_MSG_XATTR_FAILED, - "fgetxattr failed due to overflow of buffer" - " on fd %p: for the key %s ", fd, key); - size = sys_fgetxattr (_fd, key, NULL, 0); - } - if (size == -1) { - op_ret = -1; - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_XATTR_FAILED, "fgetxattr failed " - "on fd %p for the key %s ", fd, key); - break; - } - } - value = GF_CALLOC (size + 1, sizeof(char), - gf_posix_mt_char); - if (!value) { - op_ret = -1; - op_errno = errno; - goto out; - } - if (have_val) { - memcpy (value, value_buf, size); - } else { - size = sys_fgetxattr (_fd, key, value, size); - if (size == -1) { - op_ret = -1; - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_XATTR_FAILED, "fgetxattr failed o" - "n the fd %p for the key %s ", fd, key); - GF_FREE (value); - break; - } - } - value [size] = '\0'; - - op_ret = dict_set_dynptr (dict, key, value, size); - if (op_ret) { - op_errno = -op_ret; - op_ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - P_MSG_DICT_SET_FAILED, "dict set operation " - "failed on key %s", key); - GF_FREE (value); - goto out; - } - remaining_size -= strlen (key) + 1; - list_offset += strlen (key) + 1; - - } /* while (remaining_size > 0) */ - -done: - op_ret = size; - - if (dict) { - dict_del (dict, GFID_XATTR_KEY); - dict_del (dict, GF_XATTR_VOL_ID_KEY); - } - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, NULL); - - if (dict) - dict_unref (dict); - - return 0; -} - -static int -_handle_fsetxattr_keyvalue_pair (dict_t *d, char *k, data_t *v, - void *tmp) -{ - posix_xattr_filler_t *filler = NULL; - - filler = tmp; - - return posix_fhandle_pair (filler->this, filler->fdnum, k, v, - filler->flags, filler->stbuf); -} - -int32_t -posix_fsetxattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *dict, int flags, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - struct posix_fd *pfd = NULL; - int _fd = -1; - int ret = -1; - struct iatt stbuf = {0,}; - dict_t *xattr = NULL; - posix_xattr_filler_t filler = {0,}; - struct posix_private *priv = NULL; - - DECLARE_OLD_FS_ID_VAR; - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - VALIDATE_OR_GOTO (dict, out); - - priv = this->private; - DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); - - ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, - "pfd is NULL from fd=%p", fd); - goto out; - } - _fd = pfd->fd; - - ret = posix_fdstat (this, pfd->fd, &stbuf); - if (ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, op_errno, - P_MSG_FSTAT_FAILED, "fsetxattr (fstat)" - "failed on fd=%p", fd); - goto out; - } - - dict_del (dict, GFID_XATTR_KEY); - dict_del (dict, GF_XATTR_VOL_ID_KEY); - - filler.fdnum = _fd; - filler.this = this; - filler.stbuf = &stbuf; -#ifdef GF_DARWIN_HOST_OS - filler.flags = map_xattr_flags(flags); -#else - filler.flags = flags; -#endif - op_ret = dict_foreach (dict, _handle_fsetxattr_keyvalue_pair, - &filler); - if (op_ret < 0) { - op_errno = -op_ret; - op_ret = -1; - } - - if (!ret && xdata && dict_get (xdata, GLUSTERFS_DURABLE_OP)) { - op_ret = sys_fsync (_fd); - if (op_ret < 0) { - op_ret = -1; - op_errno = errno; - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_DURABILITY_REQ_NOT_SATISFIED, - "could not satisfy durability request: " - "reason "); - } - } - - if (xdata && dict_get (xdata, DHT_IATT_IN_XDATA_KEY)) { - ret = posix_fdstat (this, pfd->fd, &stbuf); - if (ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, op_errno, - P_MSG_XATTR_FAILED, "fsetxattr (fstat)" - "failed on fd=%p", fd); - goto out; - } - - xattr = dict_new (); - if (!xattr) - goto out; - ret = posix_set_iatt_in_dict (xattr, &stbuf); - } - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xattr); - - if (xattr) - dict_unref (xattr); - - return 0; -} - -int -_posix_remove_xattr (dict_t *dict, char *key, data_t *value, void *data) -{ - int32_t op_ret = 0; - xlator_t *this = NULL; - posix_xattr_filler_t *filler = NULL; - - filler = (posix_xattr_filler_t *) data; - this = filler->this; -#ifdef GF_DARWIN_HOST_OS - struct posix_private *priv = NULL; - priv = (struct posix_private *) this->private; - char *newkey = NULL; - if (priv->xattr_user_namespace == XATTR_STRIP) { - gf_remove_prefix (XATTR_USER_PREFIX, key, &newkey); - gf_msg_debug ("remove_xattr", 0, "key %s => %s" , key, - newkey); - key = newkey; - } -#endif - /* Bulk remove xattr is internal fop in gluster. Some of the xattrs may - * have special behavior. Ex: removexattr("posix.system_acl_access"), - * removes more than one xattr on the file that could be present in the - * bulk-removal request. Removexattr of these deleted xattrs will fail - * with either ENODATA/ENOATTR. Since all this fop cares is removal of the - * xattrs in bulk-remove request and if they are already deleted, it can be - * treated as success. - */ - - if (filler->real_path) - op_ret = sys_lremovexattr (filler->real_path, key); - else - op_ret = sys_fremovexattr (filler->fdnum, key); - - if (op_ret == -1) { - if (errno == ENODATA || errno == ENOATTR) - op_ret = 0; - } - - if (op_ret == -1) { - filler->op_errno = errno; - if (errno != ENOATTR && errno != ENODATA && errno != EPERM) { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_XATTR_FAILED, "removexattr failed on " - "file/dir %s with gfid: %s (for %s)", - filler->real_path?filler->real_path:"", - uuid_utoa (filler->inode->gfid), key); - } - } -#ifdef GF_DARWIN_HOST_OS - GF_FREE(newkey); -#endif - return op_ret; -} - -int -posix_common_removexattr (call_frame_t *frame, loc_t *loc, fd_t *fd, - const char *name, dict_t *xdata, int *op_errno, - dict_t **xdata_rsp) -{ - gf_boolean_t bulk_removexattr = _gf_false; - gf_boolean_t disallow = _gf_false; - char *real_path = NULL; - struct posix_fd *pfd = NULL; - int op_ret = 0; - struct iatt stbuf = {0}; - int ret = 0; - int _fd = -1; - xlator_t *this = frame->this; - inode_t *inode = NULL; - posix_xattr_filler_t filler = {0}; - - DECLARE_OLD_FS_ID_VAR; - - SET_FS_ID (frame->root->uid, frame->root->gid); - - if (loc) { - MAKE_INODE_HANDLE (real_path, this, loc, NULL); - if (!real_path) { - op_ret = -1; - *op_errno = ESTALE; - goto out; - } - inode = loc->inode; - } else { - op_ret = posix_fd_ctx_get (fd, this, &pfd, op_errno); - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, *op_errno, - P_MSG_PFD_NULL, "pfd is NULL from fd=%p", fd); - goto out; - } - _fd = pfd->fd; - inode = fd->inode; - } - - if (posix_is_gfid2path_xattr (name)) { - op_ret = -1; - *op_errno = ENOATTR; - goto out; - } - - if (gf_get_index_by_elem (disallow_removexattrs, (char *)name) >= 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED, - "Remove xattr called on %s for file/dir %s with gfid: " - "%s", name, real_path?real_path:"", - uuid_utoa(inode->gfid)); - op_ret = -1; - *op_errno = EPERM; - goto out; - } else if (posix_is_bulk_removexattr ((char *)name, xdata)) { - bulk_removexattr = _gf_true; - (void) dict_has_key_from_array (xdata, disallow_removexattrs, - &disallow); - if (disallow) { - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_XATTR_NOT_REMOVED, - "Bulk removexattr has keys that shouldn't be " - "removed for file/dir %s with gfid: %s", - real_path?real_path:"", uuid_utoa(inode->gfid)); - op_ret = -1; - *op_errno = EPERM; - goto out; - } - } - - if (bulk_removexattr) { - filler.real_path = real_path; - filler.this = this; - filler.fdnum = _fd; - filler.inode = inode; - op_ret = dict_foreach (xdata, _posix_remove_xattr, &filler); - if (op_ret) { - *op_errno = filler.op_errno; - goto out; - } - } else { - if (loc) - op_ret = sys_lremovexattr (real_path, name); - else - op_ret = sys_fremovexattr (_fd, name); - if (op_ret == -1) { - *op_errno = errno; - if (*op_errno != ENOATTR && *op_errno != ENODATA && - *op_errno != EPERM) { - gf_msg (this->name, GF_LOG_ERROR, *op_errno, - P_MSG_XATTR_FAILED, - "removexattr on %s with gfid %s " - "(for %s)", real_path, - uuid_utoa (inode->gfid), name); - } - goto out; - } - } - - if (xdata && dict_get (xdata, DHT_IATT_IN_XDATA_KEY)) { - if (loc) - ret = posix_pstat(this, loc->gfid, real_path, &stbuf); - else - ret = posix_fdstat (this, _fd, &stbuf); - if (ret) - goto out; - *xdata_rsp = dict_new(); - if (!*xdata_rsp) - goto out; - - ret = posix_set_iatt_in_dict (*xdata_rsp, &stbuf); - } - op_ret = 0; -out: - SET_TO_OLD_FS_ID (); - return op_ret; -} - -int32_t -posix_removexattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, const char *name, dict_t *xdata) -{ - int op_ret = 0; - int op_errno = 0; - dict_t *xdata_rsp = NULL; - - op_ret = posix_common_removexattr (frame, loc, NULL, name, xdata, - &op_errno, &xdata_rsp); - STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata_rsp); - - if (xdata_rsp) - dict_unref (xdata_rsp); - - return 0; -} - -int32_t -posix_fremovexattr (call_frame_t *frame, xlator_t *this, - fd_t *fd, const char *name, dict_t *xdata) -{ - int32_t op_ret = 0; - int32_t op_errno = 0; - dict_t *xdata_rsp = NULL; - - op_ret = posix_common_removexattr (frame, NULL, fd, name, xdata, - &op_errno, &xdata_rsp); - STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata_rsp); - - if (xdata_rsp) - dict_unref (xdata_rsp); - - return 0; -} - - -int32_t -posix_fsyncdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, int datasync, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int ret = -1; - struct posix_fd *pfd = NULL; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, - "pfd is NULL, fd=%p", fd); - goto out; - } - - op_ret = 0; - -out: - STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, NULL); - - return 0; -} - - -void -posix_print_xattr (dict_t *this, - char *key, - data_t *value, - void *data) -{ - gf_msg_debug ("posix", 0, - "(key/val) = (%s/%d)", key, data_to_int32 (value)); -} - - -/** - * add_array - add two arrays of 32-bit numbers (stored in network byte order) - * dest = dest + src - * @count: number of 32-bit numbers - * FIXME: handle overflow - */ - -static void -__add_array (int32_t *dest, int32_t *src, int count) -{ - int i = 0; - int32_t destval = 0; - for (i = 0; i < count; i++) { - destval = ntoh32 (dest[i]); - dest[i] = hton32 (destval + ntoh32 (src[i])); - } -} - -static void -__add_long_array (int64_t *dest, int64_t *src, int count) -{ - int i = 0; - for (i = 0; i < count; i++) { - dest[i] = hton64 (ntoh64 (dest[i]) + ntoh64 (src[i])); - } -} - - -/* functions: - __add_array_with_default - __add_long_array_with_default - - xattrop type: - GF_XATTROP_ADD_ARRAY_WITH_DEFAULT - GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT - - These operations are similar to 'GF_XATTROP_ADD_ARRAY', - except that it adds a default value if xattr is missing - or its value is zero on disk. - - One use-case of this operation is in inode-quota. - When a new directory is created, its default dir_count - should be set to 1. So when a xattrop performed setting - inode-xattrs, it should account initial dir_count - 1 if the xattrs are not present - - Here is the usage of this operation - - value required in xdata for each key - struct array { - int32_t newvalue_1; - int32_t newvalue_2; - ... - int32_t newvalue_n; - int32_t default_1; - int32_t default_2; - ... - int32_t default_n; - }; - - or - - struct array { - int32_t value_1; - int32_t value_2; - ... - int32_t value_n; - } data[2]; - fill data[0] with new value to add - fill data[1] with default value - - xattrop GF_XATTROP_ADD_ARRAY_WITH_DEFAULT - for i from 1 to n - { - if (xattr (dest_i) is zero or not set in the disk) - dest_i = newvalue_i + default_i - else - dest_i = dest_i + newvalue_i - } - - value in xdata after xattrop is successful - struct array { - int32_t dest_1; - int32_t dest_2; - ... - int32_t dest_n; - }; -*/ -static void -__add_array_with_default (int32_t *dest, int32_t *src, int count) -{ - int i = 0; - int32_t destval = 0; - - for (i = 0; i < count; i++) { - destval = ntoh32 (dest[i]); - if (destval == 0) - dest[i] = hton32 (ntoh32 (src[i]) + - ntoh32 (src[count + i])); - else - dest[i] = hton32 (destval + ntoh32 (src[i])); - } -} - -static void -__add_long_array_with_default (int64_t *dest, int64_t *src, int count) -{ - int i = 0; - int64_t destval = 0; - - for (i = 0; i < count; i++) { - destval = ntoh64 (dest[i]); - if (destval == 0) - dest[i] = hton64 (ntoh64 (src[i]) + - ntoh64 (src[i + count])); - else - dest[i] = hton64 (destval + ntoh64 (src[i])); - } -} - -static int -_posix_handle_xattr_keyvalue_pair (dict_t *d, char *k, data_t *v, - void *tmp) -{ - int size = 0; - int count = 0; - int op_ret = 0; - int op_errno = 0; - gf_xattrop_flags_t optype = 0; - char *array = NULL; - char *dst_data = NULL; - inode_t *inode = NULL; - xlator_t *this = NULL; - posix_xattr_filler_t *filler = NULL; - posix_inode_ctx_t *ctx = NULL; - - filler = tmp; - - optype = (gf_xattrop_flags_t)(filler->flags); - this = filler->this; - inode = filler->inode; - count = v->len; - if (optype == GF_XATTROP_ADD_ARRAY_WITH_DEFAULT || - optype == GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT) - count = count / 2; - - array = GF_CALLOC (count, sizeof (char), gf_posix_mt_char); - -#ifdef GF_DARWIN_HOST_OS - struct posix_private *priv = NULL; - priv = this->private; - if (priv->xattr_user_namespace == XATTR_STRIP) { - if (strncmp(k, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) == 0) { - k += XATTR_USER_PREFIX_LEN; - } - } -#endif - op_ret = posix_inode_ctx_get_all (inode, this, &ctx); - if (op_ret < 0) { - op_errno = ENOMEM; - goto out; - } - - pthread_mutex_lock (&ctx->xattrop_lock); - { - if (filler->real_path) { - size = sys_lgetxattr (filler->real_path, k, - (char *)array, count); - } else { - size = sys_fgetxattr (filler->fdnum, k, (char *)array, - count); - } - - op_errno = errno; - if ((size == -1) && (op_errno != ENODATA) && - (op_errno != ENOATTR)) { - if (op_errno == ENOTSUP) { - GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, - this->name, GF_LOG_WARNING, - "Extended attributes not " - "supported by filesystem"); - } else if (op_errno != ENOENT || - !posix_special_xattr (marker_xattrs, - k)) { - if (filler->real_path) - gf_msg (this->name, fop_log_level (GF_FOP_XATTROP, - op_errno), op_errno, P_MSG_XATTR_FAILED, - "getxattr failed on %s while " - "doing xattrop: Key:%s ", - filler->real_path, k); - else - gf_msg (this->name, GF_LOG_ERROR, - op_errno, P_MSG_XATTR_FAILED, - "fgetxattr failed on gfid=%s " - "while doing xattrop: " - "Key:%s (%s)", - uuid_utoa (filler->inode->gfid), - k, strerror (op_errno)); - } - - op_ret = -1; - goto unlock; - } - - if (size == -1 && optype == GF_XATTROP_GET_AND_SET) { - GF_FREE (array); - array = NULL; - } - - /* We only write back the xattr if it has been really modified - * (i.e. v->data is not all 0's). Otherwise we return its value - * but we don't update anything. - * - * If the xattr does not exist, a value of all 0's is returned - * without creating it. */ - size = count; - if (optype != GF_XATTROP_GET_AND_SET && - mem_0filled(v->data, v->len) == 0) - goto unlock; - - dst_data = array; - switch (optype) { - - case GF_XATTROP_ADD_ARRAY: - __add_array ((int32_t *) array, - (int32_t *) v->data, count / 4); - break; - - case GF_XATTROP_ADD_ARRAY64: - __add_long_array ((int64_t *) array, - (int64_t *) v->data, - count / 8); - break; - - case GF_XATTROP_ADD_ARRAY_WITH_DEFAULT: - __add_array_with_default ((int32_t *) array, - (int32_t *) v->data, - count / 4); - break; - - case GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT: - __add_long_array_with_default ((int64_t *) array, - (int64_t *) v->data, - count / 8); - break; - - case GF_XATTROP_GET_AND_SET: - dst_data = v->data; - break; - - default: - gf_msg (this->name, GF_LOG_ERROR, EINVAL, - P_MSG_UNKNOWN_OP, "Unknown xattrop type (%d)" - " on %s. Please send a bug report to " - "gluster-devel@gluster.org", optype, - filler->real_path); - op_ret = -1; - op_errno = EINVAL; - goto unlock; - } - - if (filler->real_path) { - size = sys_lsetxattr (filler->real_path, k, - dst_data, count, 0); - } else { - size = sys_fsetxattr (filler->fdnum, k, - (char *)dst_data, - count, 0); - } - op_errno = errno; - } -unlock: - pthread_mutex_unlock (&ctx->xattrop_lock); - - if (op_ret == -1) - goto out; - - if (size == -1) { - if (filler->real_path) - gf_msg (this->name, GF_LOG_ERROR, op_errno, - P_MSG_XATTR_FAILED, "setxattr failed on %s " - "while doing xattrop: key=%s", - filler->real_path, k); - else - gf_msg (this->name, GF_LOG_ERROR, op_errno, - P_MSG_XATTR_FAILED, - "fsetxattr failed on gfid=%s while doing " - "xattrop: key=%s (%s)", - uuid_utoa (filler->inode->gfid), - k, strerror (op_errno)); - op_ret = -1; - goto out; - } else if (array) { - op_ret = dict_set_bin (filler->xattr, k, array, count); - if (op_ret) { - if (filler->real_path) - gf_msg_debug (this->name, 0, - "dict_set_bin failed (path=%s): " - "key=%s (%s)", filler->real_path, - k, strerror (-size)); - else - gf_msg_debug (this->name, 0, - "dict_set_bin failed (gfid=%s): " - "key=%s (%s)", - uuid_utoa (filler->inode->gfid), - k, strerror (-size)); - - op_ret = -1; - op_errno = EINVAL; - GF_FREE (array); - goto out; - } - array = NULL; - } - -out: - if (op_ret < 0) - filler->op_errno = op_errno; - - if (array) - GF_FREE (array); - - return op_ret; -} - -/** - * xattrop - xattr operations - for internal use by GlusterFS - * @optype: ADD_ARRAY: - * dict should contain: - * "key" ==> array of 32-bit numbers - */ - -int -do_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, - gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) -{ - int op_ret = 0; - int op_errno = 0; - int _fd = -1; - char *real_path = NULL; - struct posix_fd *pfd = NULL; - inode_t *inode = NULL; - posix_xattr_filler_t filler = {0,}; - dict_t *xattr_rsp = NULL; - dict_t *xdata_rsp = NULL; - struct iatt stbuf = {0}; - struct posix_private *priv = NULL; - - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (xattr, out); - VALIDATE_OR_GOTO (this, out); - - priv = this->private; - DISK_SPACE_CHECK_AND_GOTO (frame, priv, xdata, op_ret, op_errno, out); - - if (fd) { - op_ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, - fop_log_level(GF_FOP_FXATTROP, op_errno), - P_MSG_PFD_GET_FAILED, "failed to get pfd from" - " fd=%p", fd); - goto out; - } - _fd = pfd->fd; - } - - if (loc && !gf_uuid_is_null (loc->gfid)) { - MAKE_INODE_HANDLE (real_path, this, loc, NULL); - if (!real_path) { - op_ret = -1; - op_errno = ESTALE; - goto out; - } - } - - if (real_path) { - inode = loc->inode; - } else if (fd) { - inode = fd->inode; - } - - xattr_rsp = dict_new (); - if (xattr_rsp == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - filler.this = this; - filler.fdnum = _fd; - filler.real_path = real_path; - filler.flags = (int)optype; - filler.inode = inode; - filler.xattr = xattr_rsp; - - op_ret = dict_foreach (xattr, _posix_handle_xattr_keyvalue_pair, - &filler); - op_errno = filler.op_errno; - if (op_ret < 0) - goto out; - - if (!xdata) - goto out; - - if (fd) { - op_ret = posix_fdstat (this, _fd, &stbuf); - } else { - op_ret = posix_pstat (this, inode->gfid, real_path, - &stbuf); - } - if (op_ret < 0) { - op_errno = errno; - goto out; - } - xdata_rsp = posix_xattr_fill (this, real_path, loc, fd, _fd, - xdata, &stbuf); - if (!xdata_rsp) { - op_ret = -1; - op_errno = ENOMEM; - } -out: - - STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr_rsp, - xdata_rsp); - - if (xattr_rsp) - dict_unref (xattr_rsp); - - if (xdata_rsp) - dict_unref (xdata_rsp); - return 0; -} - - -int -posix_xattrop (call_frame_t *frame, xlator_t *this, - loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) -{ - do_xattrop (frame, this, loc, NULL, optype, xattr, xdata); - return 0; -} - - -int -posix_fxattrop (call_frame_t *frame, xlator_t *this, - fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) -{ - do_xattrop (frame, this, NULL, fd, optype, xattr, xdata); - return 0; -} - -int -posix_access (call_frame_t *frame, xlator_t *this, - loc_t *loc, int32_t mask, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - char *real_path = NULL; - - DECLARE_OLD_FS_ID_VAR; - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (loc, out); - - MAKE_INODE_HANDLE (real_path, this, loc, NULL); - if (!real_path) { - op_ret = -1; - op_errno = errno; - goto out; - } - - op_ret = sys_access (real_path, mask & 07); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_ACCESS_FAILED, - "access failed on %s", real_path); - goto out; - } - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, NULL); - return 0; -} - - -int32_t -posix_ftruncate (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset, dict_t *xdata) -{ - int32_t op_ret = -1; - int32_t op_errno = 0; - int _fd = -1; - struct iatt preop = {0,}; - struct iatt postop = {0,}; - struct posix_fd *pfd = NULL; - int ret = -1; - struct posix_private *priv = NULL; - - DECLARE_OLD_FS_ID_VAR; - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, - "pfd is NULL, fd=%p", fd); - goto out; - } - - _fd = pfd->fd; - - op_ret = posix_fdstat (this, _fd, &preop); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, - "pre-operation fstat failed on fd=%p", fd); - goto out; - } - - op_ret = sys_ftruncate (_fd, offset); - - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED, - "ftruncate failed on fd=%p (%"PRId64"", fd, offset); - goto out; - } - - op_ret = posix_fdstat (this, _fd, &postop); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, - "post-operation fstat failed on fd=%p", fd); - goto out; - } - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, &preop, - &postop, NULL); - - return 0; -} - - -int32_t -posix_fstat (call_frame_t *frame, xlator_t *this, - fd_t *fd, dict_t *xdata) -{ - int _fd = -1; - int32_t op_ret = -1; - int32_t op_errno = 0; - struct iatt buf = {0,}; - struct posix_fd *pfd = NULL; - dict_t *xattr_rsp = NULL; - int ret = -1; - struct posix_private *priv = NULL; - - DECLARE_OLD_FS_ID_VAR; - SET_FS_ID (frame->root->uid, frame->root->gid); - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - priv = this->private; - VALIDATE_OR_GOTO (priv, out); - - ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, - "pfd is NULL, fd=%p", fd); - goto out; - } - - _fd = pfd->fd; - - op_ret = posix_fdstat (this, _fd, &buf); - if (op_ret == -1) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, - "fstat failed on fd=%p", fd); - goto out; - } - - if (xdata) - xattr_rsp = posix_xattr_fill (this, NULL, NULL, fd, _fd, xdata, - &buf); - - op_ret = 0; - -out: - SET_TO_OLD_FS_ID (); - - STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, &buf, xattr_rsp); - if (xattr_rsp) - dict_unref (xattr_rsp); - return 0; -} - -int32_t -posix_lease (call_frame_t *frame, xlator_t *this, - loc_t *loc, struct gf_lease *lease, dict_t *xdata) -{ - struct gf_lease nullease = {0, }; - - gf_msg (this->name, GF_LOG_CRITICAL, EINVAL, P_MSG_LEASE_DISABLED, - "\"features/leases\" translator is not loaded. You need" - "to use it for proper functioning of your application"); - - STACK_UNWIND_STRICT (lease, frame, -1, ENOSYS, &nullease, NULL); - return 0; -} - -static int gf_posix_lk_log; - -int32_t -posix_lk (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) -{ - struct gf_flock nullock = {0, }; - - GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, - "\"features/locks\" translator is " - "not loaded. You need to use it for proper " - "functioning of your application."); - - STACK_UNWIND_STRICT (lk, frame, -1, ENOSYS, &nullock, NULL); - return 0; -} - -int32_t -posix_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, - struct gf_flock *lock, dict_t *xdata) -{ - GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, - "\"features/locks\" translator is " - "not loaded. You need to use it for proper " - "functioning of your application."); - - STACK_UNWIND_STRICT (inodelk, frame, -1, ENOSYS, NULL); - return 0; -} - -int32_t -posix_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, - struct gf_flock *lock, dict_t *xdata) -{ - GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, - "\"features/locks\" translator is " - "not loaded. You need to use it for proper " - "functioning of your application."); - - STACK_UNWIND_STRICT (finodelk, frame, -1, ENOSYS, NULL); - return 0; -} - - -int32_t -posix_entrylk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type, dict_t *xdata) -{ - GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, - "\"features/locks\" translator is " - "not loaded. You need to use it for proper " - "functioning of your application."); - - STACK_UNWIND_STRICT (entrylk, frame, -1, ENOSYS, NULL); - return 0; -} - -int32_t -posix_fentrylk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type, dict_t *xdata) -{ - GF_LOG_OCCASIONALLY (gf_posix_lk_log, this->name, GF_LOG_CRITICAL, - "\"features/locks\" translator is " - "not loaded. You need to use it for proper " - "functioning of your application."); - - STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOSYS, NULL); - return 0; -} - - -int -posix_fill_readdir (fd_t *fd, DIR *dir, off_t off, size_t size, - gf_dirent_t *entries, xlator_t *this, int32_t skip_dirs) -{ - off_t in_case = -1; - off_t last_off = 0; - size_t filled = 0; - int count = 0; - int32_t this_size = -1; - gf_dirent_t *this_entry = NULL; - struct posix_fd *pfd = NULL; - struct stat stbuf = {0,}; - char *hpath = NULL; - int len = 0; - int ret = 0; - int op_errno = 0; - struct dirent *entry = NULL; - struct dirent scratch[2] = {{0,},}; - - ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, - "pfd is NULL, fd=%p", fd); - count = -1; - errno = op_errno; - goto out; - } - - if (skip_dirs) { - len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); - if (len <= 0) { - errno = ESTALE; - count = -1; - goto out; - } - hpath = alloca (len + 256); /* NAME_MAX */ - - if (posix_handle_path (this, fd->inode->gfid, NULL, hpath, - len) <= 0) { - errno = ESTALE; - count = -1; - goto out; - } - - len = strlen (hpath); - hpath[len] = '/'; - } - - if (!off) { - rewinddir (dir); - } else { - seekdir (dir, off); -#ifndef GF_LINUX_HOST_OS - if ((u_long)telldir(dir) != off && off != pfd->dir_eof) { - gf_msg (THIS->name, GF_LOG_ERROR, EINVAL, - P_MSG_DIR_OPERATION_FAILED, - "seekdir(0x%llx) failed on dir=%p: " - "Invalid argument (offset reused from " - "another DIR * structure?)", off, dir); - errno = EINVAL; - count = -1; - goto out; - } -#endif /* GF_LINUX_HOST_OS */ - } - - while (filled <= size) { - in_case = (u_long)telldir (dir); - - if (in_case == -1) { - gf_msg (THIS->name, GF_LOG_ERROR, errno, - P_MSG_DIR_OPERATION_FAILED, - "telldir failed on dir=%p", dir); - goto out; - } - - errno = 0; - - entry = sys_readdir (dir, scratch); - - if (!entry || errno != 0) { - if (errno == EBADF) { - gf_msg (THIS->name, GF_LOG_WARNING, errno, - P_MSG_DIR_OPERATION_FAILED, - "readdir failed on dir=%p", - dir); - goto out; - } - break; - } - -#ifdef __NetBSD__ - /* - * NetBSD with UFS1 backend uses backing files for - * extended attributes. They can be found in a - * .attribute file located at the root of the filesystem - * We hide it to glusterfs clients, since chaos will occur - * when the cluster/dht xlator decides to distribute - * exended attribute backing file across storage servers. - */ - if (__is_root_gfid (fd->inode->gfid) == 0 - && (!strcmp(entry->d_name, ".attribute"))) - continue; -#endif /* __NetBSD__ */ - - if (__is_root_gfid (fd->inode->gfid) - && (!strcmp (GF_HIDDEN_PATH, entry->d_name))) { - continue; - } - - if (skip_dirs) { - if (DT_ISDIR (entry->d_type)) { - continue; - } else if (hpath) { - strcpy (&hpath[len+1], entry->d_name); - ret = sys_lstat (hpath, &stbuf); - if (!ret && S_ISDIR (stbuf.st_mode)) - continue; - } - } - - this_size = max (sizeof (gf_dirent_t), - sizeof (gfs3_dirplist)) - + strlen (entry->d_name) + 1; - - if (this_size + filled > size) { - seekdir (dir, in_case); -#ifndef GF_LINUX_HOST_OS - if ((u_long)telldir(dir) != in_case && - in_case != pfd->dir_eof) { - gf_msg (THIS->name, GF_LOG_ERROR, EINVAL, - P_MSG_DIR_OPERATION_FAILED, - "seekdir(0x%llx) failed on dir=%p: " - "Invalid argument (offset reused from " - "another DIR * structure?)", - in_case, dir); - errno = EINVAL; - count = -1; - goto out; - } -#endif /* GF_LINUX_HOST_OS */ - break; - } - - this_entry = gf_dirent_for_name (entry->d_name); - - if (!this_entry) { - gf_msg (THIS->name, GF_LOG_ERROR, errno, - P_MSG_GF_DIRENT_CREATE_FAILED, - "could not create " - "gf_dirent for entry %s", entry->d_name); - goto out; - } - /* - * we store the offset of next entry here, which is - * probably not intended, but code using syncop_readdir() - * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it - * for directory read resumption. - */ - last_off = (u_long)telldir(dir); - this_entry->d_off = last_off; - this_entry->d_ino = entry->d_ino; - this_entry->d_type = entry->d_type; - - list_add_tail (&this_entry->list, &entries->list); - - filled += this_size; - count ++; - } - - if ((!sys_readdir (dir, scratch) && (errno == 0))) { - /* Indicate EOF */ - errno = ENOENT; - /* Remember EOF offset for later detection */ - pfd->dir_eof = (u_long)last_off; - } -out: - return count; -} - -dict_t * -posix_entry_xattr_fill (xlator_t *this, inode_t *inode, - fd_t *fd, char *entry_path, dict_t *dict, - struct iatt *stbuf) -{ - loc_t tmp_loc = {0,}; - - /* if we don't send the 'loc', open-fd-count be a problem. */ - tmp_loc.inode = inode; - - return posix_xattr_fill (this, entry_path, &tmp_loc, NULL, -1, dict, - stbuf); - -} - - -#ifdef _DIRENT_HAVE_D_TYPE -static int -posix_d_type_from_ia_type (ia_type_t type) -{ - switch (type) { - case IA_IFDIR: return DT_DIR; - case IA_IFCHR: return DT_CHR; - case IA_IFBLK: return DT_BLK; - case IA_IFIFO: return DT_FIFO; - case IA_IFLNK: return DT_LNK; - case IA_IFREG: return DT_REG; - case IA_IFSOCK: return DT_SOCK; - default: return DT_UNKNOWN; - } -} -#endif - - -int -posix_readdirp_fill (xlator_t *this, fd_t *fd, gf_dirent_t *entries, dict_t *dict) -{ - gf_dirent_t *entry = NULL; - inode_table_t *itable = NULL; - inode_t *inode = NULL; - char *hpath = NULL; - int len = 0; - struct iatt stbuf = {0, }; - uuid_t gfid; - int ret = -1; - - if (list_empty(&entries->list)) - return 0; - - itable = fd->inode->table; - - len = posix_handle_path (this, fd->inode->gfid, NULL, NULL, 0); - if (len <= 0) - return -1; - hpath = alloca (len + 256); /* NAME_MAX */ - if (posix_handle_path (this, fd->inode->gfid, NULL, hpath, len) <= 0) - return -1; - len = strlen (hpath); - hpath[len] = '/'; - - list_for_each_entry (entry, &entries->list, list) { - memset (gfid, 0, 16); - inode = inode_grep (fd->inode->table, fd->inode, - entry->d_name); - if (inode) - gf_uuid_copy (gfid, inode->gfid); - - strcpy (&hpath[len+1], entry->d_name); - - ret = posix_pstat (this, gfid, hpath, &stbuf); - - if (ret == -1) { - if (inode) - inode_unref (inode); - continue; - } - - if (!inode) - inode = inode_find (itable, stbuf.ia_gfid); - - if (!inode) - inode = inode_new (itable); - - entry->inode = inode; - - if (dict) { - entry->dict = - posix_entry_xattr_fill (this, entry->inode, - fd, hpath, - dict, &stbuf); - } - - entry->d_stat = stbuf; - if (stbuf.ia_ino) - entry->d_ino = stbuf.ia_ino; - -#ifdef _DIRENT_HAVE_D_TYPE - if (entry->d_type == DT_UNKNOWN && !IA_ISINVAL(stbuf.ia_type)) { - /* The platform supports d_type but the underlying - filesystem doesn't. We set d_type to the correct - value from ia_type */ - entry->d_type = - posix_d_type_from_ia_type (stbuf.ia_type); - } -#endif - - inode = NULL; - } - - return 0; -} - - -int32_t -posix_do_readdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, int whichop, dict_t *dict) -{ - struct posix_fd *pfd = NULL; - DIR *dir = NULL; - int ret = -1; - int count = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; - gf_dirent_t entries; - int32_t skip_dirs = 0; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - INIT_LIST_HEAD (&entries.list); - - ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, op_errno, P_MSG_PFD_NULL, - "pfd is NULL, fd=%p", fd); - goto out; - } - - dir = pfd->dir; - - if (!dir) { - gf_msg (this->name, GF_LOG_WARNING, EINVAL, P_MSG_PFD_NULL, - "dir is NULL for fd=%p", fd); - op_errno = EINVAL; - goto out; - } - - /* When READDIR_FILTER option is set to on, we can filter out - * directory's entry from the entry->list. - */ - ret = dict_get_int32 (dict, GF_READDIR_SKIP_DIRS, &skip_dirs); - - LOCK (&fd->lock); - { - /* posix_fill_readdir performs multiple separate individual - readdir() calls to fill up the buffer. - - In case of NFS where the same anonymous FD is shared between - different applications, reading a common directory can - result in the anonymous fd getting re-used unsafely between - the two readdir requests (in two different io-threads). - - It would also help, in the future, to replace the loop - around readdir() with a single large getdents() call. - */ - count = posix_fill_readdir (fd, dir, off, size, &entries, this, - skip_dirs); - } - UNLOCK (&fd->lock); - - /* pick ENOENT to indicate EOF */ - op_errno = errno; - op_ret = count; - - if (whichop != GF_FOP_READDIRP) - goto out; - - posix_readdirp_fill (this, fd, &entries, dict); - -out: - STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, NULL); - - gf_dirent_free (&entries); - - return 0; -} - - -int32_t -posix_readdir (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, dict_t *xdata) -{ - posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR, xdata); - return 0; -} - - -int32_t -posix_readdirp (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t off, dict_t *dict) -{ - gf_dirent_t entries; - int32_t op_ret = -1, op_errno = 0; - gf_dirent_t *entry = NULL; - - - if ((dict != NULL) && (dict_get (dict, GET_ANCESTRY_DENTRY_KEY))) { - INIT_LIST_HEAD (&entries.list); - - op_ret = posix_get_ancestry (this, fd->inode, &entries, NULL, - POSIX_ANCESTRY_DENTRY, - &op_errno, dict); - if (op_ret >= 0) { - op_ret = 0; - - list_for_each_entry (entry, &entries.list, list) { - op_ret++; - } - } - - STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, - NULL); - - gf_dirent_free (&entries); - return 0; - } - - posix_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP, dict); - return 0; -} - -int32_t -posix_priv (xlator_t *this) -{ - struct posix_private *priv = NULL; - char key_prefix[GF_DUMP_MAX_BUF_LEN]; - - (void) snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", - this->type, this->name); - gf_proc_dump_add_section(key_prefix); - - if (!this) - return 0; - - priv = this->private; - - if (!priv) - return 0; - - gf_proc_dump_write("base_path","%s", priv->base_path); - gf_proc_dump_write("base_path_length","%d", priv->base_path_length); - gf_proc_dump_write("max_read","%d", priv->read_value); - gf_proc_dump_write("max_write","%d", priv->write_value); - gf_proc_dump_write("nr_files","%ld", priv->nr_files); - - return 0; -} - -int32_t -posix_inode (xlator_t *this) -{ - return 0; -} - - -int32_t -posix_rchecksum (call_frame_t *frame, xlator_t *this, - fd_t *fd, off_t offset, int32_t len, dict_t *xdata) -{ - char *alloc_buf = NULL; - char *buf = NULL; - int _fd = -1; - struct posix_fd *pfd = NULL; - int op_ret = -1; - int op_errno = 0; - int ret = 0; - ssize_t bytes_read = 0; - int32_t weak_checksum = 0; - int32_t zerofillcheck = 0; - unsigned char strong_checksum[MD5_DIGEST_LENGTH] = {0}; - struct posix_private *priv = NULL; - dict_t *rsp_xdata = NULL; - gf_boolean_t buf_has_zeroes = _gf_false; - - VALIDATE_OR_GOTO (frame, out); - VALIDATE_OR_GOTO (this, out); - VALIDATE_OR_GOTO (fd, out); - - priv = this->private; - memset (strong_checksum, 0, MD5_DIGEST_LENGTH); - - alloc_buf = _page_aligned_alloc (len, &buf); - if (!alloc_buf) { - op_errno = ENOMEM; - goto out; - } - - rsp_xdata = dict_new(); - if (!rsp_xdata) { - op_errno = ENOMEM; - goto out; - } - - ret = posix_fd_ctx_get (fd, this, &pfd, &op_errno); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, -ret, P_MSG_PFD_NULL, - "pfd is NULL, fd=%p", fd); - goto out; - } - - _fd = pfd->fd; - - LOCK (&fd->lock); - { - if (priv->aio_capable && priv->aio_init_done) - __posix_fd_set_odirect (fd, pfd, 0, offset, len); - - bytes_read = sys_pread (_fd, buf, len, offset); - if (bytes_read < 0) { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_PREAD_FAILED, - "pread of %d bytes returned %zd", len, - bytes_read); - - op_errno = errno; - } - - } - UNLOCK (&fd->lock); - - if (bytes_read < 0) - goto out; - - if (xdata && dict_get_int32 (xdata, "check-zero-filled", - &zerofillcheck) == 0) { - buf_has_zeroes = (mem_0filled (buf, bytes_read)) ? _gf_false : - _gf_true; - ret = dict_set_uint32 (rsp_xdata, "buf-has-zeroes", - buf_has_zeroes); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, -ret, - P_MSG_DICT_SET_FAILED, "%s: Failed to set " - "dictionary value for key: %s", - uuid_utoa (fd->inode->gfid), "buf-has-zeroes"); - op_errno = -ret; - goto out; - } - } - weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) ret); - gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) bytes_read, - (unsigned char *) strong_checksum); - - op_ret = 0; -out: - STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, - weak_checksum, strong_checksum, rsp_xdata); - if (rsp_xdata) - dict_unref (rsp_xdata); - GF_FREE (alloc_buf); - - return 0; -} - - -/** - * notify - when parent sends PARENT_UP, send CHILD_UP event from here - */ -int32_t -notify (xlator_t *this, - int32_t event, - void *data, - ...) -{ - struct posix_private *priv = NULL; - - priv = this->private; - switch (event) - { - case GF_EVENT_PARENT_UP: - { - /* Tell the parent that posix xlator is up */ - default_notify (this, GF_EVENT_CHILD_UP, data); - } - break; - case GF_EVENT_CLEANUP: - if (priv->health_check) { - priv->health_check_active = _gf_false; - pthread_cancel (priv->health_check); - priv->health_check = 0; - } - if (priv->disk_space_check) { - priv->disk_space_check_active = _gf_false; - pthread_cancel (priv->disk_space_check); - priv->disk_space_check = 0; - } - if (priv->janitor) { - (void) gf_thread_cleanup_xint (priv->janitor); - priv->janitor = 0; - } - if (priv->fsyncer) { - (void) gf_thread_cleanup_xint (priv->fsyncer); - priv->fsyncer = 0; - } - if (priv->mount_lock) { - (void) sys_closedir (priv->mount_lock); - priv->mount_lock = NULL; - } - - break; - default: - /* */ - break; - } - return 0; -} - -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - if (!this) - return ret; - - ret = xlator_mem_acct_init (this, gf_posix_mt_end + 1); - - if (ret != 0) { - return ret; - } - - return ret; -} - -static int -posix_set_owner (xlator_t *this, uid_t uid, gid_t gid) -{ - struct posix_private *priv = NULL; - int ret = -1; - struct stat st = {0,}; - - priv = this->private; - - ret = sys_lstat (priv->base_path, &st); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_DIR_OPERATION_FAILED, "Failed to stat " - "brick path %s", - priv->base_path); - return ret; - } - - if ((uid == -1 || st.st_uid == uid) && - (gid == -1 || st.st_gid == gid)) - return 0; - - ret = sys_chown (priv->base_path, uid, gid); - if (ret) - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_DIR_OPERATION_FAILED, "Failed to set uid/gid for" - " brick path %s", priv->base_path); - - return ret; -} -static int -set_gfid2path_separator (struct posix_private *priv, const char *str) -{ - int str_len = 0; - - str_len = strlen(str); - if (str_len > 0 && str_len < 8) { - strcpy (priv->gfid2path_sep, str); - return 0; - } - - return -1; -} - -static int -set_batch_fsync_mode (struct posix_private *priv, const char *str) -{ - if (strcmp (str, "none") == 0) - priv->batch_fsync_mode = BATCH_NONE; - else if (strcmp (str, "syncfs") == 0) - priv->batch_fsync_mode = BATCH_SYNCFS; - else if (strcmp (str, "syncfs-single-fsync") == 0) - priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC; - else if (strcmp (str, "syncfs-reverse-fsync") == 0) - priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC; - else if (strcmp (str, "reverse-fsync") == 0) - priv->batch_fsync_mode = BATCH_REVERSE_FSYNC; - else - return -1; - - return 0; -} - -#ifdef GF_DARWIN_HOST_OS -static int -set_xattr_user_namespace_mode (struct posix_private *priv, const char *str) -{ - if (strcmp (str, "none") == 0) - priv->xattr_user_namespace = XATTR_NONE; - else if (strcmp (str, "strip") == 0) - priv->xattr_user_namespace = XATTR_STRIP; - else if (strcmp (str, "append") == 0) - priv->xattr_user_namespace = XATTR_APPEND; - else if (strcmp (str, "both") == 0) - priv->xattr_user_namespace = XATTR_BOTH; - else - return -1; - return 0; -} -#endif - -int -reconfigure (xlator_t *this, dict_t *options) -{ - int ret = -1; - struct posix_private *priv = NULL; - int32_t uid = -1; - int32_t gid = -1; - char *batch_fsync_mode_str = NULL; - char *gfid2path_sep = NULL; - int32_t force_create_mode = -1; - int32_t force_directory_mode = -1; - int32_t create_mask = -1; - int32_t create_directory_mask = -1; - - priv = this->private; - GF_OPTION_RECONF ("brick-uid", uid, options, int32, out); - GF_OPTION_RECONF ("brick-gid", gid, options, int32, out); - if (uid != -1 || gid != -1) - posix_set_owner (this, uid, gid); - - GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec, - options, uint32, out); - - GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str, - options, str, out); - - if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, - "Unknown mode string: %s", batch_fsync_mode_str); - goto out; - } - - GF_OPTION_RECONF ("gfid2path-separator", gfid2path_sep, options, - str, out); - if (set_gfid2path_separator (priv, gfid2path_sep) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, - "Length of separator exceeds 7: %s", gfid2path_sep); - goto out; - } - -#ifdef GF_DARWIN_HOST_OS - - char *xattr_user_namespace_mode_str = NULL; - - GF_OPTION_RECONF ("xattr-user-namespace-mode", xattr_user_namespace_mode_str, - options, str, out); - - if (set_xattr_user_namespace_mode (priv, xattr_user_namespace_mode_str) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_UNKNOWN_ARGUMENT, - "Unknown xattr user namespace mode string: %s", - xattr_user_namespace_mode_str); - goto out; - } - -#endif - - GF_OPTION_RECONF ("linux-aio", priv->aio_configured, - options, bool, out); - - if (priv->aio_configured) - posix_aio_on (this); - else - posix_aio_off (this); - - GF_OPTION_RECONF ("update-link-count-parent", priv->update_pgfid_nlinks, - options, bool, out); - - GF_OPTION_RECONF ("gfid2path", priv->gfid2path, - options, bool, out); - - GF_OPTION_RECONF ("node-uuid-pathinfo", priv->node_uuid_pathinfo, - options, bool, out); - - if (priv->node_uuid_pathinfo && - (gf_uuid_is_null (priv->glusterd_uuid))) { - gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_UUID_NULL, - "glusterd uuid is NULL, pathinfo xattr would" - " fallback to <hostname>:<export>"); - } - - GF_OPTION_RECONF ("reserve", priv->disk_reserve, - options, uint32, out); - if (priv->disk_reserve) - posix_spawn_disk_space_check_thread (this); - - GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval, - options, uint32, out); - GF_OPTION_RECONF ("health-check-timeout", priv->health_check_timeout, - options, uint32, out); - posix_spawn_health_check_thread (this); - - GF_OPTION_RECONF ("shared-brick-count", priv->shared_brick_count, - options, int32, out); - GF_OPTION_RECONF ("force-create-mode", force_create_mode, - options, int32, out); - priv->force_create_mode = force_create_mode; - - GF_OPTION_RECONF ("force-directory-mode", force_directory_mode, - options, int32, out); - priv->force_directory_mode = force_directory_mode; - - GF_OPTION_RECONF ("create-mask", create_mask, - options, int32, out); - priv->create_mask = create_mask; - - GF_OPTION_RECONF ("create-directory-mask", create_directory_mask, - options, int32, out); - priv->create_directory_mask = create_directory_mask; - - ret = 0; -out: - return ret; -} - -int32_t -posix_delete_unlink_entry (const char *fpath, const struct stat *sb, - int typeflag, struct FTW *ftwbuf) { - - int ret = 0; - - if (!fpath) - goto out; - - switch (typeflag) { - case FTW_SL: - case FTW_NS: - case FTW_F: - case FTW_SLN: - ret = sys_unlink(fpath); - break; - case FTW_D: - case FTW_DP: - case FTW_DNR: - if (ftwbuf->level != 0) { - ret = sys_rmdir(fpath); - } - break; - default: - break; - } - if (ret) { - gf_msg ("posix_delete_unlink_entry", GF_LOG_WARNING, errno, - P_MSG_HANDLE_CREATE, - "Deletion of entries %s failed" - "Please delete it manually", - fpath); - } -out: - return 0; -} - -int32_t -posix_delete_unlink (const char *unlink_path) { - - int ret = -1; - int flags = 0; - - flags |= (FTW_DEPTH | FTW_PHYS); - - ret = nftw(unlink_path, posix_delete_unlink_entry, 2, flags); - if (ret) { - gf_msg ("posix_delete_unlink", GF_LOG_ERROR, 0, - P_MSG_HANDLE_CREATE, - "Deleting files from %s failed", - unlink_path); - } - return ret; -} - -int32_t -posix_create_unlink_dir (xlator_t *this) { - - struct posix_private *priv = NULL; - struct stat stbuf; - int ret = -1; - uuid_t gfid = {0}; - char gfid_str[64] = {0}; - char unlink_path[PATH_MAX] = {0,}; - char landfill_path[PATH_MAX] = {0,}; - - priv = this->private; - - (void) snprintf (unlink_path, sizeof(unlink_path), "%s/%s", - priv->base_path, GF_UNLINK_PATH); - - gf_uuid_generate (gfid); - uuid_utoa_r (gfid, gfid_str); - - (void) snprintf (landfill_path, sizeof(landfill_path), "%s/%s/%s", - priv->base_path, GF_LANDFILL_PATH, gfid_str); - - ret = sys_stat (unlink_path, &stbuf); - switch (ret) { - case -1: - if (errno != ENOENT) { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_HANDLE_CREATE, - "Checking for %s failed", - unlink_path); - return -1; - } - break; - case 0: - if (!S_ISDIR (stbuf.st_mode)) { - gf_msg (this->name, GF_LOG_ERROR, 0, - P_MSG_HANDLE_CREATE, - "Not a directory: %s", - unlink_path); - return -1; - } - ret = posix_delete_unlink (unlink_path); - return 0; - default: - break; - } - ret = sys_mkdir (unlink_path, 0600); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_HANDLE_CREATE, - "Creating directory %s failed", - unlink_path); - return -1; - } - - return 0; -} - -/** - * init - - */ -int -init (xlator_t *this) -{ - struct posix_private *_private = NULL; - data_t *dir_data = NULL; - data_t *tmp_data = NULL; - struct stat buf = {0,}; - gf_boolean_t tmp_bool = 0; - int ret = 0; - int op_ret = -1; - int op_errno = 0; - ssize_t size = -1; - uuid_t old_uuid = {0,}; - uuid_t dict_uuid = {0,}; - uuid_t gfid = {0,}; - uuid_t rootgfid = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}; - char *guuid = NULL; - int32_t uid = -1; - int32_t gid = -1; - char *batch_fsync_mode_str; - char *gfid2path_sep = NULL; - int force_create = -1; - int force_directory = -1; - int create_mask = -1; - int create_directory_mask = -1; - - dir_data = dict_get (this->options, "directory"); - - if (this->children) { - gf_msg (this->name, GF_LOG_CRITICAL, 0, P_MSG_SUBVOLUME_ERROR, - "FATAL: storage/posix cannot have subvolumes"); - ret = -1; - goto out; - } - - if (!this->parents) { - gf_msg (this->name, GF_LOG_WARNING, 0, P_MSG_VOLUME_DANGLING, - "Volume is dangling. Please check the volume file."); - } - - if (!dir_data) { - gf_msg (this->name, GF_LOG_CRITICAL, 0, - P_MSG_EXPORT_DIR_MISSING, - "Export directory not specified in volume file."); - ret = -1; - goto out; - } - - umask (000); // umask `masking' is done at the client side - - /* Check whether the specified directory exists, if not log it. */ - op_ret = sys_stat (dir_data->data, &buf); - if ((op_ret != 0) || !S_ISDIR (buf.st_mode)) { - gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_DIR_OPERATION_FAILED, - "Directory '%s' doesn't exist, exiting.", - dir_data->data); - ret = -1; - goto out; - } - - _private = GF_CALLOC (1, sizeof (*_private), - gf_posix_mt_posix_private); - if (!_private) { - ret = -1; - goto out; - } - - _private->base_path = gf_strdup (dir_data->data); - _private->base_path_length = strlen (_private->base_path); - - ret = dict_get_str (this->options, "hostname", &_private->hostname); - if (ret) { - _private->hostname = GF_CALLOC (256, sizeof (char), - gf_common_mt_char); - if (!_private->hostname) { - goto out; - } - ret = gethostname (_private->hostname, 256); - if (ret < 0) { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_HOSTNAME_MISSING, - "could not find hostname "); - } - } - - /* Check for Extended attribute support, if not present, log it */ - op_ret = sys_lsetxattr (dir_data->data, - "trusted.glusterfs.test", "working", 8, 0); - if (op_ret != -1) { - sys_lremovexattr (dir_data->data, "trusted.glusterfs.test"); - } else { - tmp_data = dict_get (this->options, - "mandate-attribute"); - if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &tmp_bool) == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - P_MSG_INVALID_OPTION, - "wrong option provided for key " - "\"mandate-attribute\""); - ret = -1; - goto out; - } - if (!tmp_bool) { - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_XATTR_NOTSUP, - "Extended attribute not supported, " - "starting as per option"); - } else { - gf_msg (this->name, GF_LOG_CRITICAL, 0, - P_MSG_XATTR_NOTSUP, - "Extended attribute not supported, " - "exiting."); - ret = -1; - goto out; - } - } else { - gf_msg (this->name, GF_LOG_CRITICAL, 0, - P_MSG_XATTR_NOTSUP, - "Extended attribute not supported, exiting."); - ret = -1; - goto out; - } - } - - tmp_data = dict_get (this->options, "volume-id"); - if (tmp_data) { - op_ret = gf_uuid_parse (tmp_data->data, dict_uuid); - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - P_MSG_INVALID_VOLUME_ID, - "wrong volume-id (%s) set" - " in volume file", tmp_data->data); - ret = -1; - goto out; - } - size = sys_lgetxattr (dir_data->data, - "trusted.glusterfs.volume-id", old_uuid, 16); - if (size == 16) { - if (gf_uuid_compare (old_uuid, dict_uuid)) { - gf_msg (this->name, GF_LOG_ERROR, 0, - P_MSG_INVALID_VOLUME_ID, - "mismatching volume-id (%s) received. " - "already is a part of volume %s ", - tmp_data->data, uuid_utoa (old_uuid)); - gf_event (EVENT_POSIX_ALREADY_PART_OF_VOLUME, - "volume-id=%s;brick=%s:%s", - uuid_utoa (old_uuid), - _private->hostname, _private->base_path); - ret = -1; - goto out; - } - } else if ((size == -1) && - (errno == ENODATA || errno == ENOATTR)) { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_VOLUME_ID_ABSENT, - "Extended attribute trusted.glusterfs." - "volume-id is absent"); - gf_event (EVENT_POSIX_BRICK_NOT_IN_VOLUME, - "brick=%s:%s", - _private->hostname, _private->base_path); - ret = -1; - goto out; - - } else if ((size == -1) && (errno != ENODATA) && - (errno != ENOATTR)) { - /* Wrong 'volume-id' is set, it should be error */ - gf_event (EVENT_POSIX_BRICK_VERIFICATION_FAILED, - "brick=%s:%s", - _private->hostname, _private->base_path); - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_VOLUME_ID_FETCH_FAILED, - "%s: failed to fetch volume-id", - dir_data->data); - ret = -1; - goto out; - } else { - ret = -1; - gf_event (EVENT_POSIX_BRICK_VERIFICATION_FAILED, - "brick=%s:%s", - _private->hostname, _private->base_path); - gf_msg (this->name, GF_LOG_ERROR, 0, - P_MSG_VOLUME_ID_FETCH_FAILED, - "failed to fetch proper volume id from export"); - goto out; - } - } - - /* Now check if the export directory has some other 'gfid', - other than that of root '/' */ - size = sys_lgetxattr (dir_data->data, "trusted.gfid", gfid, 16); - if (size == 16) { - if (!__is_root_gfid (gfid)) { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_GFID_SET_FAILED, - "%s: gfid (%s) is not that of glusterfs '/' ", - dir_data->data, uuid_utoa (gfid)); - ret = -1; - goto out; - } - } else if (size != -1) { - /* Wrong 'gfid' is set, it should be error */ - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_GFID_SET_FAILED, - "%s: wrong value set as gfid", - dir_data->data); - ret = -1; - goto out; - } else if ((size == -1) && (errno != ENODATA) && - (errno != ENOATTR)) { - /* Wrong 'gfid' is set, it should be error */ - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_GFID_SET_FAILED, - "%s: failed to fetch gfid", - dir_data->data); - ret = -1; - goto out; - } else { - /* First time volume, set the GFID */ - size = sys_lsetxattr (dir_data->data, "trusted.gfid", rootgfid, - 16, XATTR_CREATE); - if (size == -1) { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_GFID_SET_FAILED, - "%s: failed to set gfid", - dir_data->data); - ret = -1; - goto out; - } - } - - ret = 0; - - size = sys_lgetxattr (dir_data->data, POSIX_ACL_ACCESS_XATTR, - NULL, 0); - if ((size < 0) && (errno == ENOTSUP)) { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_ACL_NOTSUP, - "Posix access control list is not supported."); - gf_event (EVENT_POSIX_ACL_NOT_SUPPORTED, - "brick=%s:%s", _private->hostname, _private->base_path); - } - - /* - * _XOPEN_PATH_MAX is the longest file path len we MUST - * support according to POSIX standard. When prepended - * by the brick base path it may exceed backed filesystem - * capacity (which MAY be bigger than _XOPEN_PATH_MAX). If - * this is the case, chdir() to the brick base path and - * use relative paths when they are too long. See also - * MAKE_REAL_PATH in posix-handle.h - */ - _private->path_max = pathconf(_private->base_path, _PC_PATH_MAX); - if (_private->path_max != -1 && - _XOPEN_PATH_MAX + _private->base_path_length > _private->path_max) { - ret = chdir(_private->base_path); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - P_MSG_BASEPATH_CHDIR_FAILED, - "chdir() to \"%s\" failed", - _private->base_path); - goto out; - } -#ifdef __NetBSD__ - /* - * At least on NetBSD, the chdir() above uncovers a - * race condition which cause file lookup to fail - * with ENODATA for a few seconds. The volume quickly - * reaches a sane state, but regression tests are fast - * enough to choke on it. The reason is obscure (as - * often with race conditions), but sleeping here for - * a second seems to workaround the problem. - */ - sleep(1); -#endif - } - - - LOCK_INIT (&_private->lock); - - _private->export_statfs = 1; - tmp_data = dict_get (this->options, "export-statfs-size"); - if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &_private->export_statfs) == -1) { - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - P_MSG_INVALID_OPTION_VAL, - "'export-statfs-size' takes only boolean " - "options"); - goto out; - } - if (!_private->export_statfs) - gf_msg_debug (this->name, 0, - "'statfs()' returns dummy size"); - } - - _private->background_unlink = 0; - tmp_data = dict_get (this->options, "background-unlink"); - if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &_private->background_unlink) == -1) { - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - P_MSG_INVALID_OPTION_VAL, "'background-unlink'" - " takes only boolean options"); - goto out; - } - - if (_private->background_unlink) - gf_msg_debug (this->name, 0, - "unlinks will be performed in background"); - } - - tmp_data = dict_get (this->options, "o-direct"); - if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &_private->o_direct) == -1) { - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - P_MSG_INVALID_OPTION_VAL, - "wrong option provided for 'o-direct'"); - goto out; - } - if (_private->o_direct) - gf_msg_debug (this->name, 0, "o-direct mode is enabled" - " (O_DIRECT for every open)"); - } - - tmp_data = dict_get (this->options, "update-link-count-parent"); - if (tmp_data) { - if (gf_string2boolean (tmp_data->data, - &_private->update_pgfid_nlinks) == -1) { - ret = -1; - gf_msg (this->name, GF_LOG_ERROR, 0, - P_MSG_INVALID_OPTION, "wrong value provided " - "for 'update-link-count-parent'"); - goto out; - } - if (_private->update_pgfid_nlinks) - gf_msg_debug (this->name, 0, "update-link-count-parent" - " is enabled. Thus for each file an " - "extended attribute representing the " - "number of hardlinks for that file " - "within the same parent directory is" - " set."); - } - - ret = dict_get_str (this->options, "glusterd-uuid", &guuid); - if (!ret) { - if (gf_uuid_parse (guuid, _private->glusterd_uuid)) - gf_msg (this->name, GF_LOG_WARNING, 0, - P_MSG_INVALID_NODE_UUID, "Cannot parse " - "glusterd (node) UUID, node-uuid xattr " - "request would return - \"No such attribute\""); - } else { - gf_msg_debug (this->name, 0, "No glusterd (node) UUID passed -" - " node-uuid xattr request will return \"No such" - " attribute\""); - } - ret = 0; - - GF_OPTION_INIT ("janitor-sleep-duration", - _private->janitor_sleep_duration, int32, out); - - /* performing open dir on brick dir locks the brick dir - * and prevents it from being unmounted - */ - _private->mount_lock = sys_opendir (dir_data->data); - if (!_private->mount_lock) { - ret = -1; - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, 0, - P_MSG_DIR_OPERATION_FAILED, - "Could not lock brick directory (%s)", - strerror (op_errno)); - goto out; - } -#ifndef GF_DARWIN_HOST_OS - { - struct rlimit lim; - lim.rlim_cur = 1048576; - lim.rlim_max = 1048576; - - if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_SET_ULIMIT_FAILED, - "Failed to set 'ulimit -n " - " 1048576'"); - lim.rlim_cur = 65536; - lim.rlim_max = 65536; - - if (setrlimit (RLIMIT_NOFILE, &lim) == -1) { - gf_msg (this->name, GF_LOG_WARNING, errno, - P_MSG_SET_FILE_MAX_FAILED, - "Failed to set maximum allowed open " - "file descriptors to 64k"); - } - else { - gf_msg (this->name, GF_LOG_INFO, 0, - P_MSG_MAX_FILE_OPEN, "Maximum allowed " - "open file descriptors set to 65536"); - } - } - } -#endif - _private->shared_brick_count = 1; - ret = dict_get_int32 (this->options, "shared-brick-count", - &_private->shared_brick_count); - if (ret == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - P_MSG_INVALID_OPTION_VAL, - "'shared-brick-count' takes only integer " - "values"); - goto out; - } - - this->private = (void *)_private; - - op_ret = posix_handle_init (this); - if (op_ret == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE, - "Posix handle setup failed"); - ret = -1; - goto out; - } - - op_ret = posix_handle_trash_init (this); - if (op_ret < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_HANDLE_CREATE_TRASH, - "Posix landfill setup failed"); - ret = -1; - goto out; - } - - op_ret = posix_create_unlink_dir (this); - if (op_ret == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - P_MSG_HANDLE_CREATE, - "Creation of unlink directory failed"); - ret = -1; - goto out; - } - - _private->aio_init_done = _gf_false; - _private->aio_capable = _gf_false; - - GF_OPTION_INIT ("brick-uid", uid, int32, out); - GF_OPTION_INIT ("brick-gid", gid, int32, out); - if (uid != -1 || gid != -1) - posix_set_owner (this, uid, gid); - - GF_OPTION_INIT ("linux-aio", _private->aio_configured, bool, out); - - if (_private->aio_configured) { - op_ret = posix_aio_on (this); - - if (op_ret == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_POSIX_AIO, - "Posix AIO init failed"); - ret = -1; - goto out; - } - } - - GF_OPTION_INIT ("node-uuid-pathinfo", - _private->node_uuid_pathinfo, bool, out); - if (_private->node_uuid_pathinfo && - (gf_uuid_is_null (_private->glusterd_uuid))) { - gf_msg (this->name, GF_LOG_INFO, 0, P_MSG_UUID_NULL, - "glusterd uuid is NULL, pathinfo xattr would" - " fallback to <hostname>:<export>"); - } - - _private->disk_space_check_active = _gf_false; - _private->disk_space_full = 0; - GF_OPTION_INIT ("reserve", - _private->disk_reserve, uint32, out); - if (_private->disk_reserve) - posix_spawn_disk_space_check_thread (this); - - _private->health_check_active = _gf_false; - GF_OPTION_INIT ("health-check-interval", - _private->health_check_interval, uint32, out); - GF_OPTION_INIT ("health-check-timeout", - _private->health_check_timeout, uint32, out); - if (_private->health_check_interval) - posix_spawn_health_check_thread (this); - - pthread_mutex_init (&_private->janitor_lock, NULL); - pthread_cond_init (&_private->janitor_cond, NULL); - INIT_LIST_HEAD (&_private->janitor_fds); - - posix_spawn_janitor_thread (this); - - pthread_mutex_init (&_private->fsync_mutex, NULL); - pthread_cond_init (&_private->fsync_cond, NULL); - INIT_LIST_HEAD (&_private->fsyncs); - - ret = gf_thread_create (&_private->fsyncer, NULL, posix_fsyncer, this, - "posixfsy"); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, errno, - P_MSG_FSYNCER_THREAD_CREATE_FAILED, - "fsyncer thread creation failed"); - goto out; - } - - GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out); - - if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, - "Unknown mode string: %s", batch_fsync_mode_str); - goto out; - } - - GF_OPTION_INIT ("gfid2path", _private->gfid2path, bool, out); - - GF_OPTION_INIT ("gfid2path-separator", gfid2path_sep, str, out); - if (set_gfid2path_separator (_private, gfid2path_sep) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, - "Length of separator exceeds 7: %s", gfid2path_sep); - goto out; - } - -#ifdef GF_DARWIN_HOST_OS - - char *xattr_user_namespace_mode_str = NULL; - - GF_OPTION_INIT ("xattr-user-namespace-mode", - xattr_user_namespace_mode_str, str, out); - - if (set_xattr_user_namespace_mode (_private, - xattr_user_namespace_mode_str) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, P_MSG_INVALID_ARGUMENT, - "Unknown xattr user namespace mode string: %s", - xattr_user_namespace_mode_str); - goto out; - } -#endif - - GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, - uint32, out); - GF_OPTION_INIT ("force-create-mode", force_create, int32, out); - _private->force_create_mode = force_create; - - GF_OPTION_INIT ("force-directory-mode", force_directory, int32, out); - _private->force_directory_mode = force_directory; - - GF_OPTION_INIT ("create-mask", - create_mask, int32, out); - _private->create_mask = create_mask; - - GF_OPTION_INIT ("create-directory-mask", - create_directory_mask, int32, out); - _private->create_directory_mask = create_directory_mask; -out: - if (ret) { - if (_private) { - GF_FREE (_private->base_path); - - GF_FREE (_private->hostname); - - GF_FREE (_private->trash_path); - - GF_FREE (_private); - } - - this->private = NULL; - } - return ret; -} - -void -fini (xlator_t *this) -{ - struct posix_private *priv = this->private; - if (!priv) - return; - this->private = NULL; - /*unlock brick dir*/ - if (priv->mount_lock) - (void) sys_closedir (priv->mount_lock); - - GF_FREE (priv->base_path); - GF_FREE (priv->hostname); - GF_FREE (priv->trash_path); - GF_FREE (priv); - - return; -} struct xlator_dumpops dumpops = { .priv = posix_priv, .inode = posix_inode, @@ -8081,8 +71,8 @@ struct xlator_fops fops = { .fxattrop = posix_fxattrop, .setattr = posix_setattr, .fsetattr = posix_fsetattr, - .fallocate = _posix_fallocate, - .discard = posix_discard, + .fallocate = posix_glfallocate, + .discard = posix_discard, .zerofill = posix_zerofill, .ipc = posix_ipc, #ifdef HAVE_SEEK_HOLE @@ -8096,217 +86,3 @@ struct xlator_cbks cbks = { .releasedir = posix_releasedir, .forget = posix_forget }; - -struct volume_options options[] = { - { .key = {"o-direct"}, - .type = GF_OPTION_TYPE_BOOL }, - { .key = {"directory"}, - .type = GF_OPTION_TYPE_PATH, - .default_value = "{{brick.path}}" - }, - { .key = {"hostname"}, - .type = GF_OPTION_TYPE_ANY }, - { .key = {"export-statfs-size"}, - .type = GF_OPTION_TYPE_BOOL }, - { .key = {"mandate-attribute"}, - .type = GF_OPTION_TYPE_BOOL }, - { .key = {"background-unlink"}, - .type = GF_OPTION_TYPE_BOOL }, - { .key = {"janitor-sleep-duration"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .validate = GF_OPT_VALIDATE_MIN, - .default_value = "10", - .description = "Interval (in seconds) between times the internal " - "'landfill' directory is emptied." - }, - { .key = {"volume-id"}, - .type = GF_OPTION_TYPE_ANY, - .default_value = "{{brick.volumeid}}" - }, - { .key = {"glusterd-uuid"}, - .type = GF_OPTION_TYPE_STR }, - { - .key = {"linux-aio"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "Support for native Linux AIO", - .op_version = {1}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { - .key = {"brick-uid"}, - .type = GF_OPTION_TYPE_INT, - .min = -1, - .validate = GF_OPT_VALIDATE_MIN, - .default_value = "-1", - .description = "Support for setting uid of brick's owner", - .op_version = {1}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { - .key = {"brick-gid"}, - .type = GF_OPTION_TYPE_INT, - .min = -1, - .validate = GF_OPT_VALIDATE_MIN, - .default_value = "-1", - .description = "Support for setting gid of brick's owner", - .op_version = {1}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { .key = {"node-uuid-pathinfo"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "return glusterd's node-uuid in pathinfo xattr" - " string instead of hostname", - .op_version = {3}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { - .key = {"health-check-interval"}, - .type = GF_OPTION_TYPE_INT, - .min = 0, - .default_value = "30", - .validate = GF_OPT_VALIDATE_MIN, - .description = "Interval in seconds for a filesystem health check, " - "set to 0 to disable", - .op_version = {3}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { - .key = {"health-check-timeout"}, - .type = GF_OPTION_TYPE_INT, - .min = 0, - .default_value = "10", - .validate = GF_OPT_VALIDATE_MIN, - .description = "Interval in seconds to wait aio_write finish for health check, " - "set to 0 to disable", - .op_version = {GD_OP_VERSION_4_0_0}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { - .key = {"reserve"}, - .type = GF_OPTION_TYPE_INT, - .min = 0, - .default_value = "1", - .validate = GF_OPT_VALIDATE_MIN, - .description = "Percentage of disk space to be reserved." - " Set to 0 to disable", - .op_version = {GD_OP_VERSION_3_13_0}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { .key = {"batch-fsync-mode"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "reverse-fsync", - .description = "Possible values:\n" - "\t- syncfs: Perform one syncfs() on behalf oa batch" - "of fsyncs.\n" - "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch" - " of fsyncs and one fsync() per batch.\n" - "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch" - " of fsyncs and fsync() each file in the batch in reverse order.\n" - " in reverse order.\n" - "\t- reverse-fsync: Perform fsync() of each file in the batch in" - " reverse order.", - .op_version = {3}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { .key = {"batch-fsync-delay-usec"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "0", - .description = "Num of usecs to wait for aggregating fsync" - " requests", - .op_version = {3}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { .key = {"update-link-count-parent"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", - .description = "Enable placeholders for gfid to path conversion", - .op_version = {GD_OP_VERSION_3_6_0}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, - { .key = {"gfid2path"}, - .type = GF_OPTION_TYPE_BOOL, -#ifdef __NetBSD__ - /* - * NetBSD storage of extended attributes for UFS1 badly - * scales when the list of extended attributes names rises. - * This option can add as many extended attributes names - * as we have files, hence we keep it disabled for performance - * sake. - */ - .default_value = "off", -#else - .default_value = "on", -#endif - .description = "Enable logging metadata for gfid to path conversion", - .op_version = {GD_OP_VERSION_3_12_0}, - .flags = OPT_FLAG_SETTABLE - }, - { .key = {"gfid2path-separator"}, - .type = GF_OPTION_TYPE_STR, - .default_value = ":", - .description = "Path separator for glusterfs.gfidtopath virt xattr", - .op_version = {GD_OP_VERSION_3_12_0}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, -#if GF_DARWIN_HOST_OS - { .key = {"xattr-user-namespace-mode"}, - .type = GF_OPTION_TYPE_STR, - .default_value = "none", - .description = "Option to control XATTR user namespace on the raw filesystem: " - "\t- None: Will use the user namespace, so files will be exchangable with Linux.\n" - " The raw filesystem will not be compatible with OS X Finder.\n" - "\t- Strip: Will strip the user namespace before setting. The raw filesystem will work in OS X.\n", - .op_version = {GD_OP_VERSION_3_6_0}, - .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC - }, -#endif - { .key = {"shared-brick-count"}, - .type = GF_OPTION_TYPE_INT, - .default_value = "1", - .description = "Number of bricks sharing the same backend export." - " Useful for displaying the proper usable size through statvfs() " - "call (df command)", - }, - { .key = {"force-create-mode"}, - .type = GF_OPTION_TYPE_INT, - .min = 0000, - .max = 0777, - .default_value = "0000", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, - .description = "Mode bit permission that will always be set on a file." - }, - { .key = {"force-directory-mode"}, - .type = GF_OPTION_TYPE_INT, - .min = 0000, - .max = 0777, - .default_value = "0000", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, - .description = "Mode bit permission that will be always set on directory" - }, - { .key = {"create-mask"}, - .type = GF_OPTION_TYPE_INT, - .min = 0000, - .max = 0777, - .default_value = "0777", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, - .description = "Any bit not set here will be removed from the" - "modes set on a file when it is created" - }, - { .key = {"create-directory-mask"}, - .type = GF_OPTION_TYPE_INT, - .min = 0000, - .max = 0777, - .default_value = "0777", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, - .description = "Any bit not set here will be removed from the" - "modes set on a directory when it is created" - }, - { .key = {NULL} } -}; diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index e2f1dfa2dd1..36348767870 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -363,4 +363,247 @@ posix_fdget_objectsignature (int, dict_t *); gf_boolean_t posix_is_bulk_removexattr (char *name, dict_t *dict); + +int32_t +posix_set_iatt_in_dict (dict_t *, struct iatt *); + +mode_t +posix_override_umask (mode_t , mode_t); + +int32_t +posix_priv (xlator_t *this); + +int32_t +posix_inode (xlator_t *this); + +void +posix_fini (xlator_t *this); + +int +posix_init (xlator_t *this); + +int +posix_reconfigure (xlator_t *this, dict_t *options); + +int32_t +posix_notify (xlator_t *this, int32_t event, void *data, ...); + +/* posix-entry-ops.c FOP signatures */ +int32_t +posix_lookup (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xdata); + +int +posix_create (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, mode_t mode, + mode_t umask, fd_t *fd, dict_t *xdata); + +int +posix_symlink (call_frame_t *frame, xlator_t *this, + const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata); + +int +posix_rename (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc, dict_t *xdata); + +int +posix_link (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc, dict_t *xdata); + +int +posix_mknod (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata); + +int +posix_mkdir (call_frame_t *frame, xlator_t *this, + loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata); + +int32_t +posix_unlink (call_frame_t *frame, xlator_t *this, + loc_t *loc, int xflag, dict_t *xdata); + +int +posix_rmdir (call_frame_t *frame, xlator_t *this, + loc_t *loc, int flags, dict_t *xdata); + +/* posix-inode-fs-ops.c FOP signatures */ +int +posix_forget (xlator_t *this, inode_t *inode); + +int32_t +posix_discover (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xdata); + +int32_t +posix_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); + +int +posix_setattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata); + +int +posix_fsetattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata); + +int32_t +posix_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata); + +int32_t +posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata); + +int32_t +posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t keep_size, off_t offset, size_t len, dict_t *xdata); + +int32_t +posix_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata); + +int32_t +posix_seek (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata); + +int32_t +posix_opendir (call_frame_t *frame, xlator_t *this, + loc_t *loc, fd_t *fd, dict_t *xdata); + +int32_t +posix_releasedir (xlator_t *this, + fd_t *fd); + +int32_t +posix_readlink (call_frame_t *frame, xlator_t *this, + loc_t *loc, size_t size, dict_t *xdata); + +int32_t +posix_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata); + +int32_t +posix_open (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata); + +int +posix_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata); + +int32_t +posix_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, + uint32_t flags, struct iobref *iobref, dict_t *xdata); + +int32_t +posix_statfs (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *xdata); + +int32_t +posix_flush (call_frame_t *frame, xlator_t *this, + fd_t *fd, dict_t *xdata); + +int32_t +posix_release (xlator_t *this, fd_t *fd); + +int32_t +posix_fsync (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t datasync, dict_t *xdata); + +int32_t +posix_setxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *dict, int flags, dict_t *xdata); + +int +posix_get_ancestry_non_directory (xlator_t *this, inode_t *leaf_inode, + gf_dirent_t *head, char **path, int type, + int32_t *op_errno, dict_t *xdata); + +int +posix_get_ancestry (xlator_t *this, inode_t *leaf_inode, + gf_dirent_t *head, char **path, int type, int32_t *op_errno, + dict_t *xdata); + +int32_t +posix_getxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name, dict_t *xdata); + +int32_t +posix_fgetxattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *name, dict_t *xdata); + +int32_t +posix_fsetxattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, dict_t *dict, int flags, dict_t *xdata); + +int32_t +posix_removexattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name, dict_t *xdata); + +int32_t +posix_fremovexattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *name, dict_t *xdata); + +int32_t +posix_fsyncdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, int datasync, dict_t *xdata); + +int +posix_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata); + +int +posix_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata); + +int +posix_access (call_frame_t *frame, xlator_t *this, + loc_t *loc, int32_t mask, dict_t *xdata); + +int32_t +posix_ftruncate (call_frame_t *frame, xlator_t *this, + fd_t *fd, off_t offset, dict_t *xdata); + +int32_t +posix_fstat (call_frame_t *frame, xlator_t *this, + fd_t *fd, dict_t *xdata); + +int32_t +posix_lease (call_frame_t *frame, xlator_t *this, + loc_t *loc, struct gf_lease *lease, dict_t *xdata); + +int32_t +posix_lk (call_frame_t *frame, xlator_t *this, + fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata); + +int32_t +posix_inodelk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *lock, dict_t *xdata); + +int32_t +posix_finodelk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, int32_t cmd, + struct gf_flock *lock, dict_t *xdata); + +int32_t +posix_entrylk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata); + +int32_t +posix_fentrylk (call_frame_t *frame, xlator_t *this, + const char *volume, fd_t *fd, const char *basename, + entrylk_cmd cmd, entrylk_type type, dict_t *xdata); + +int32_t +posix_readdir (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, dict_t *xdata); + +int32_t +posix_readdirp (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, dict_t *dict); + +int32_t +posix_rchecksum (call_frame_t *frame, xlator_t *this, + fd_t *fd, off_t offset, int32_t len, dict_t *xdata); + #endif /* _POSIX_H */ diff --git a/xlators/xlator.sym b/xlators/xlator.sym index b50eb2848f8..67f7cdde1c2 100644 --- a/xlators/xlator.sym +++ b/xlators/xlator.sym @@ -1,3 +1,4 @@ +class_methods xlator_api init fini |