diff options
Diffstat (limited to 'xlators/storage/posix')
| -rw-r--r-- | xlators/storage/posix/src/Makefile.am | 2 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-aio.c | 20 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-aio.h | 3 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-common.c | 353 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-entry-ops.c | 470 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-gfid-path.c | 98 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-gfid-path.h | 13 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-handle.c | 209 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-handle.h | 33 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-helpers.c | 992 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-inode-fd-ops.c | 1118 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-inode-handle.h | 20 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-mem-types.h | 7 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-messages.h | 6 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-metadata.c | 557 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix-metadata.h | 25 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix.c | 34 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix.h | 183 |
18 files changed, 2877 insertions, 1266 deletions
diff --git a/xlators/storage/posix/src/Makefile.am b/xlators/storage/posix/src/Makefile.am index d8af6221e4e..c080a229ff3 100644 --- a/xlators/storage/posix/src/Makefile.am +++ b/xlators/storage/posix/src/Makefile.am @@ -17,7 +17,7 @@ noinst_HEADERS = posix.h posix-mem-types.h posix-handle.h posix-aio.h \ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ - -I$(top_srcdir)/rpc/rpc-lib/src + -I$(top_srcdir)/rpc/rpc-lib/src -I$(CONTRIBDIR)/timer-wheel AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS) -I$(top_srcdir)/glusterfsd/src diff --git a/xlators/storage/posix/src/posix-aio.c b/xlators/storage/posix/src/posix-aio.c index cf570d5be41..d0cb0002bbf 100644 --- a/xlators/storage/posix/src/posix-aio.c +++ b/xlators/storage/posix/src/posix-aio.c @@ -7,8 +7,6 @@ later), or the GNU General Public License, version 2 (GPLv2), in all cases as published by the Free Software Foundation. */ -#include "xlator.h" -#include "glusterfs.h" #include "posix.h" #include <sys/uio.h> #include "posix-messages.h" @@ -133,11 +131,7 @@ posix_aio_readv_complete(struct posix_aio_cb *paiocb, int res, int res2) if (!postbuf.ia_size || (offset + iov.iov_len) >= postbuf.ia_size) op_errno = ENOENT; - LOCK(&priv->lock); - { - priv->read_value += op_ret; - } - UNLOCK(&priv->lock); + GF_ATOMIC_ADD(priv->read_value, op_ret); out: STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1, &postbuf, @@ -266,6 +260,12 @@ posix_aio_writev_complete(struct posix_aio_cb *paiocb, int res, int res2) struct posix_private *priv = NULL; fd_t *fd = NULL; + if (!paiocb) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + frame = paiocb->frame; this = frame->this; priv = this->private; @@ -295,11 +295,7 @@ posix_aio_writev_complete(struct posix_aio_cb *paiocb, int res, int res2) op_ret = res; op_errno = 0; - LOCK(&priv->lock); - { - priv->write_value += op_ret; - } - UNLOCK(&priv->lock); + GF_ATOMIC_ADD(priv->write_value, op_ret); out: STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, &prebuf, &postbuf, diff --git a/xlators/storage/posix/src/posix-aio.h b/xlators/storage/posix/src/posix-aio.h index 9b4a7078ecf..b316deb3229 100644 --- a/xlators/storage/posix/src/posix-aio.h +++ b/xlators/storage/posix/src/posix-aio.h @@ -10,9 +10,6 @@ #ifndef _POSIX_AIO_H #define _POSIX_AIO_H -#include "xlator.h" -#include "glusterfs.h" - // Maximum number of concurrently submitted IO events. The heaviest load // GlusterFS has been able to handle had 60-80 concurrent calls #define POSIX_AIO_MAX_NR_EVENTS 256 diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c index 156a44588d9..f10722ec3fb 100644 --- a/xlators/storage/posix/src/posix-common.c +++ b/xlators/storage/posix/src/posix-common.c @@ -26,7 +26,6 @@ #include <signal.h> #include <sys/uio.h> #include <unistd.h> -#include <ftw.h> #ifndef GF_BSD_HOST_OS #include <alloca.h> @@ -36,30 +35,22 @@ #include <fcntl.h> #endif /* HAVE_LINKAT */ -#include "glusterfs.h" -#include "checksum.h" -#include "dict.h" -#include "logging.h" -#include "posix.h" #include "posix-inode-handle.h" -#include "xlator.h" -#include "defaults.h" -#include "common-utils.h" -#include "compat-errno.h" -#include "compat.h" -#include "byte-order.h" -#include "syscall.h" -#include "statedump.h" -#include "locking.h" -#include "timer.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/compat.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/syscall.h> +#include <glusterfs/statedump.h> +#include <glusterfs/locking.h> +#include <glusterfs/timer.h> #include "glusterfs3-xdr.h" -#include "hashfn.h" #include "posix-aio.h" -#include "glusterfs-acl.h" +#include <glusterfs/glusterfs-acl.h> #include "posix-messages.h" -#include "events.h" +#include <glusterfs/events.h> #include "posix-gfid-path.h" -#include "compat-uuid.h" +#include <glusterfs/compat-uuid.h> +#include "timer-wheel.h" extern char *marker_xattrs[]; #define ALIGN_SIZE 4096 @@ -110,13 +101,13 @@ posix_priv(xlator_t *this) struct posix_private *priv = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN]; + if (!this) + return 0; + (void)snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); gf_proc_dump_add_section("%s", key_prefix); - if (!this) - return 0; - priv = this->private; if (!priv) @@ -124,9 +115,9 @@ posix_priv(xlator_t *this) gf_proc_dump_write("base_path", "%s", priv->base_path); gf_proc_dump_write("base_path_length", "%d", priv->base_path_length); - gf_proc_dump_write("max_read", "%" PRId64, priv->read_value); - gf_proc_dump_write("max_write", "%" PRId64, priv->write_value); - gf_proc_dump_write("nr_files", "%ld", priv->nr_files); + gf_proc_dump_write("max_read", "%" PRId64, GF_ATOMIC_GET(priv->read_value)); + gf_proc_dump_write("max_write", "%" PRId64, + GF_ATOMIC_GET(priv->write_value)); return 0; } @@ -143,11 +134,60 @@ posix_inode(xlator_t *this) int32_t posix_notify(xlator_t *this, int32_t event, void *data, ...) { + xlator_t *victim = data; + struct posix_private *priv = this->private; + int ret = 0; + struct timespec sleep_till = { + 0, + }; + glusterfs_ctx_t *ctx = this->ctx; + switch (event) { case GF_EVENT_PARENT_UP: { - /* Tell the parent that posix xlator is up */ + /* Notify the parent that posix xlator is up */ default_notify(this, GF_EVENT_CHILD_UP, data); } break; + + case GF_EVENT_PARENT_DOWN: { + if (!victim->cleanup_starting) + break; + + if (priv->janitor) { + pthread_mutex_lock(&priv->janitor_mutex); + { + priv->janitor_task_stop = _gf_true; + ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, + priv->janitor); + if (!ret) { + timespec_now_realtime(&sleep_till); + sleep_till.tv_sec += 1; + /* Wait to set janitor_task flag to _gf_false by + * janitor_task_done */ + while (priv->janitor_task_stop) { + (void)pthread_cond_timedwait(&priv->janitor_cond, + &priv->janitor_mutex, + &sleep_till); + timespec_now_realtime(&sleep_till); + sleep_till.tv_sec += 1; + } + } + } + pthread_mutex_unlock(&priv->janitor_mutex); + GF_FREE(priv->janitor); + } + priv->janitor = NULL; + pthread_mutex_lock(&ctx->fd_lock); + { + while (priv->rel_fdcount > 0) { + pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock); + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", + victim->name); + default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); + } break; default: /* */ break; @@ -333,15 +373,31 @@ posix_reconfigure(xlator_t *this, dict_t *options) " fallback to <hostname>:<export>"); } - GF_OPTION_RECONF("reserve", priv->disk_reserve, options, uint32, out); - if (priv->disk_reserve) - posix_spawn_disk_space_check_thread(this); + GF_OPTION_RECONF("reserve", priv->disk_reserve, options, percent_or_size, + out); + /* option can be any one of percent or bytes */ + priv->disk_unit = 0; + if (priv->disk_reserve < 100.0) + priv->disk_unit = 'p'; + + if (priv->disk_reserve) { + ret = posix_spawn_disk_space_check_thread(this); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, + "Getting disk space check from thread failed"); + goto out; + } + } GF_OPTION_RECONF("health-check-interval", priv->health_check_interval, options, uint32, out); GF_OPTION_RECONF("health-check-timeout", priv->health_check_timeout, options, uint32, out); - posix_spawn_health_check_thread(this); + if (priv->health_check_interval) { + ret = posix_spawn_health_check_thread(this); + if (ret) + goto out; + } GF_OPTION_RECONF("shared-brick-count", priv->shared_brick_count, options, int32, out); @@ -496,6 +552,30 @@ posix_create_unlink_dir(xlator_t *this) return 0; } +int +posix_create_open_directory_based_fd(xlator_t *this, int pdirfd, char *dir_name) +{ + int ret = -1; + + ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0); + if (ret < 0 && errno == ENOENT) { + ret = sys_mkdirat(pdirfd, dir_name, 0700); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "Creating directory %s failed", dir_name); + goto out; + } + ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0); + if (ret < 0 && errno != EEXIST) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error mkdir hash-1 %s ", dir_name); + goto out; + } + } +out: + return ret; +} + /** * init - */ @@ -522,7 +602,7 @@ posix_init(xlator_t *this) uuid_t gfid = { 0, }; - uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; char *guuid = NULL; int32_t uid = -1; int32_t gid = -1; @@ -532,6 +612,15 @@ posix_init(xlator_t *this) int force_directory = -1; int create_mask = -1; int create_directory_mask = -1; + char dir_handle[PATH_MAX] = { + 0, + }; + int i; + char fhash[4] = { + 0, + }; + int hdirfd = -1; + char value; dir_data = dict_get(this->options, "directory"); @@ -572,7 +661,12 @@ posix_init(xlator_t *this) } _private->base_path = gf_strdup(dir_data->data); - _private->base_path_length = strlen(_private->base_path); + _private->base_path_length = dir_data->len - 1; + + _private->dirfd = -1; + _private->mount_lock = -1; + for (i = 0; i < 256; i++) + _private->arrdfd[i] = -1; ret = dict_get_str(this->options, "hostname", &_private->hostname); if (ret) { @@ -588,16 +682,11 @@ posix_init(xlator_t *this) } /* Check for Extended attribute support, if not present, log it */ - op_ret = sys_lsetxattr(dir_data->data, "trusted.glusterfs.test", "working", - 8, 0); - if (op_ret != -1) { - ret = sys_lremovexattr(dir_data->data, "trusted.glusterfs.test"); - if (ret) { - gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_INVALID_OPTION, - "failed to remove xattr: " - "trusted.glusterfs.test"); - } - } else { + size = sys_lgetxattr(dir_data->data, "user.x", &value, sizeof(value)); + + if ((size == -1) && (errno == EOPNOTSUPP)) { + gf_msg(this->name, GF_LOG_DEBUG, 0, P_MSG_XDATA_GETXATTR, + "getxattr returned %zd", size); tmp_data = dict_get(this->options, "mandate-attribute"); if (tmp_data) { if (gf_string2boolean(tmp_data->data, &tmp_bool) == -1) { @@ -757,6 +846,8 @@ posix_init(xlator_t *this) } LOCK_INIT(&_private->lock); + GF_ATOMIC_INIT(_private->read_value, 0); + GF_ATOMIC_INIT(_private->write_value, 0); _private->export_statfs = 1; tmp_data = dict_get(this->options, "export-statfs-size"); @@ -844,8 +935,9 @@ posix_init(xlator_t *this) /* performing open dir on brick dir locks the brick dir * and prevents it from being unmounted */ - _private->mount_lock = sys_opendir(dir_data->data); - if (!_private->mount_lock) { + _private->mount_lock = sys_open(dir_data->data, (O_DIRECTORY | O_RDONLY), + 0); + if (_private->mount_lock < 0) { ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DIR_OPERATION_FAILED, @@ -889,6 +981,28 @@ posix_init(xlator_t *this) } this->private = (void *)_private; + snprintf(dir_handle, sizeof(dir_handle), "%s/%s", _private->base_path, + GF_HIDDEN_PATH); + hdirfd = posix_create_open_directory_based_fd(this, _private->mount_lock, + dir_handle); + if (hdirfd < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error open directory failed for dir %s", dir_handle); + ret = -1; + goto out; + } + _private->dirfd = hdirfd; + for (i = 0; i < 256; i++) { + snprintf(fhash, sizeof(fhash), "%02x", i); + _private->arrdfd[i] = posix_create_open_directory_based_fd(this, hdirfd, + fhash); + if (_private->arrdfd[i] < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error openat failed for file %s", fhash); + ret = -1; + goto out; + } + } op_ret = posix_handle_init(this); if (op_ret == -1) { @@ -946,27 +1060,45 @@ posix_init(xlator_t *this) _private->disk_space_check_active = _gf_false; _private->disk_space_full = 0; - GF_OPTION_INIT("reserve", _private->disk_reserve, uint32, out); - if (_private->disk_reserve) - posix_spawn_disk_space_check_thread(this); + + GF_OPTION_INIT("reserve", _private->disk_reserve, percent_or_size, out); + + /* option can be any one of percent or bytes */ + _private->disk_unit = 0; + if (_private->disk_reserve < 100.0) + _private->disk_unit = 'p'; + + if (_private->disk_reserve) { + ret = posix_spawn_disk_space_check_thread(this); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, + "Getting disk space check from thread failed "); + goto out; + } + } _private->health_check_active = _gf_false; GF_OPTION_INIT("health-check-interval", _private->health_check_interval, uint32, out); GF_OPTION_INIT("health-check-timeout", _private->health_check_timeout, uint32, out); - if (_private->health_check_interval) - posix_spawn_health_check_thread(this); - - pthread_mutex_init(&_private->janitor_lock, NULL); - pthread_cond_init(&_private->janitor_cond, NULL); - INIT_LIST_HEAD(&_private->janitor_fds); - - posix_spawn_janitor_thread(this); + if (_private->health_check_interval) { + ret = posix_spawn_health_check_thread(this); + if (ret) + goto out; + } + posix_janitor_timer_start(this); pthread_mutex_init(&_private->fsync_mutex, NULL); pthread_cond_init(&_private->fsync_cond, NULL); + pthread_mutex_init(&_private->janitor_mutex, NULL); + pthread_cond_init(&_private->janitor_cond, NULL); + pthread_cond_init(&_private->fd_cond, NULL); INIT_LIST_HEAD(&_private->fsyncs); + _private->rel_fdcount = 0; + ret = posix_spawn_ctx_janitor_thread(this); + if (ret) + goto out; ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this, "posixfsy"); @@ -1040,9 +1172,27 @@ posix_init(xlator_t *this) out); GF_OPTION_INIT("ctime", _private->ctime, bool, out); + out: if (ret) { if (_private) { + if (_private->dirfd >= 0) { + sys_close(_private->dirfd); + _private->dirfd = -1; + } + + for (i = 0; i < 256; i++) { + if (_private->arrdfd[i] >= 0) { + sys_close(_private->arrdfd[i]); + _private->arrdfd[i] = -1; + } + } + /*unlock brick dir*/ + if (_private->mount_lock >= 0) { + (void)sys_close(_private->mount_lock); + _private->mount_lock = -1; + } + GF_FREE(_private->base_path); GF_FREE(_private->hostname); @@ -1061,36 +1211,84 @@ void posix_fini(xlator_t *this) { struct posix_private *priv = this->private; + gf_boolean_t health_check = _gf_false; + glusterfs_ctx_t *ctx = this->ctx; + uint32_t count; + int ret = 0; + int i = 0; + if (!priv) return; LOCK(&priv->lock); - if (priv->health_check_active) { + { + health_check = priv->health_check_active; priv->health_check_active = _gf_false; - pthread_cancel(priv->health_check); - priv->health_check = 0; } UNLOCK(&priv->lock); + + if (priv->dirfd >= 0) { + sys_close(priv->dirfd); + priv->dirfd = -1; + } + + for (i = 0; i < 256; i++) { + if (priv->arrdfd[i] >= 0) { + sys_close(priv->arrdfd[i]); + priv->arrdfd[i] = -1; + } + } + + if (health_check) { + (void)gf_thread_cleanup_xint(priv->health_check); + priv->health_check = 0; + } + if (priv->disk_space_check) { priv->disk_space_check_active = _gf_false; - pthread_cancel(priv->disk_space_check); + (void)gf_thread_cleanup_xint(priv->disk_space_check); priv->disk_space_check = 0; } + if (priv->janitor) { - (void)gf_thread_cleanup_xint(priv->janitor); - priv->janitor = 0; + /*TODO: Make sure the synctask is also complete */ + ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED, + "Failed to delete janitor timer"); + } + GF_FREE(priv->janitor); + priv->janitor = NULL; } + + pthread_mutex_lock(&ctx->fd_lock); + { + count = --ctx->pxl_count; + if (count == 0) { + pthread_cond_signal(&ctx->fd_cond); + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + if (count == 0) { + pthread_join(ctx->janitor, NULL); + } + if (priv->fsyncer) { (void)gf_thread_cleanup_xint(priv->fsyncer); priv->fsyncer = 0; } /*unlock brick dir*/ - if (priv->mount_lock) - (void)sys_closedir(priv->mount_lock); + if (priv->mount_lock >= 0) { + (void)sys_close(priv->mount_lock); + priv->mount_lock = -1; + } GF_FREE(priv->base_path); LOCK_DESTROY(&priv->lock); - pthread_mutex_destroy(&priv->janitor_lock); pthread_mutex_destroy(&priv->fsync_mutex); + pthread_cond_destroy(&priv->fsync_cond); + pthread_mutex_destroy(&priv->janitor_mutex); + pthread_cond_destroy(&priv->janitor_cond); GF_FREE(priv->hostname); GF_FREE(priv->trash_path); GF_FREE(priv); @@ -1099,7 +1297,7 @@ posix_fini(xlator_t *this) return; } -struct volume_options options[] = { +struct volume_options posix_options[] = { {.key = {"o-direct"}, .type = GF_OPTION_TYPE_BOOL}, {.key = {"directory"}, .type = GF_OPTION_TYPE_PATH, @@ -1162,7 +1360,7 @@ struct volume_options options[] = { {.key = {"health-check-timeout"}, .type = GF_OPTION_TYPE_INT, .min = 0, - .default_value = "10", + .default_value = "20", .validate = GF_OPT_VALIDATE_MIN, .description = "Interval in seconds to wait aio_write finish for health check, " @@ -1170,11 +1368,11 @@ struct volume_options options[] = { .op_version = {GD_OP_VERSION_4_0_0}, .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, {.key = {"reserve"}, - .type = GF_OPTION_TYPE_INT, + .type = GF_OPTION_TYPE_PERCENT_OR_SIZET, .min = 0, .default_value = "1", .validate = GF_OPT_VALIDATE_MIN, - .description = "Percentage of disk space to be reserved." + .description = "Percentage/Size of disk space to be reserved." " Set to 0 to disable", .op_version = {GD_OP_VERSION_3_13_0}, .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, @@ -1268,24 +1466,21 @@ struct volume_options options[] = { .min = 0000, .max = 0777, .default_value = "0000", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Mode bit permission that will always be set on a file."}, {.key = {"force-directory-mode"}, .type = GF_OPTION_TYPE_INT, .min = 0000, .max = 0777, .default_value = "0000", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Mode bit permission that will be always set on directory"}, {.key = {"create-mask"}, .type = GF_OPTION_TYPE_INT, .min = 0000, .max = 0777, .default_value = "0777", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Any bit not set here will be removed from the" "modes set on a file when it is created"}, {.key = {"create-directory-mask"}, @@ -1293,8 +1488,7 @@ struct volume_options options[] = { .min = 0000, .max = 0777, .default_value = "0777", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Any bit not set here will be removed from the" "modes set on a directory when it is created"}, {.key = {"max-hardlinks"}, @@ -1317,7 +1511,7 @@ struct volume_options options[] = { "SHA256 checksum. MD5 otherwise."}, {.key = {"ctime"}, .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", + .default_value = "on", .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, .op_version = {GD_OP_VERSION_4_1_0}, .tags = {"ctime"}, @@ -1326,4 +1520,5 @@ struct volume_options options[] = { "are stored in xattr to keep it consistent across replica and " "distribute set. The time attributes stored at the backend are " "not considered "}, - {.key = {NULL}}}; + {.key = {NULL}}, +}; diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c index 0e998b20f9f..8cc3ccf8c00 100644 --- a/xlators/storage/posix/src/posix-entry-ops.c +++ b/xlators/storage/posix/src/posix-entry-ops.c @@ -26,7 +26,6 @@ #include <signal.h> #include <sys/uio.h> #include <unistd.h> -#include <ftw.h> #ifndef GF_BSD_HOST_OS #include <alloca.h> @@ -36,32 +35,26 @@ #include <fcntl.h> #endif /* HAVE_LINKAT */ -#include "glusterfs.h" -#include "checksum.h" -#include "dict.h" -#include "logging.h" +#include <glusterfs/dict.h> +#include <glusterfs/logging.h> #include "posix.h" #include "posix-handle.h" -#include "xlator.h" -#include "defaults.h" -#include "common-utils.h" -#include "compat-errno.h" -#include "compat.h" -#include "byte-order.h" -#include "syscall.h" -#include "statedump.h" -#include "locking.h" -#include "timer.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/compat.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/syscall.h> +#include <glusterfs/statedump.h> +#include <glusterfs/locking.h> +#include <glusterfs/timer.h> #include "glusterfs3-xdr.h" -#include "hashfn.h" #include "posix-aio.h" -#include "glusterfs-acl.h" +#include <glusterfs/glusterfs-acl.h> #include "posix-messages.h" #include "posix-metadata.h" -#include "events.h" +#include <glusterfs/events.h> #include "posix-gfid-path.h" -#include "compat-uuid.h" -#include "syncop.h" +#include <glusterfs/compat-uuid.h> +#include <glusterfs/syncop.h> extern char *marker_xattrs[]; #define ALIGN_SIZE 4096 @@ -93,7 +86,7 @@ extern char *marker_xattrs[]; #endif -gf_boolean_t +static gf_boolean_t posix_symlinks_match(xlator_t *this, loc_t *loc, uuid_t gfid) { struct posix_private *priv = NULL; @@ -114,7 +107,7 @@ posix_symlinks_match(xlator_t *this, loc_t *loc, uuid_t gfid) loc->pargfid[0], loc->pargfid[1], uuid_utoa(loc->pargfid), loc->name); - MAKE_HANDLE_GFID_PATH(dir_handle, this, gfid, NULL); + MAKE_HANDLE_GFID_PATH(dir_handle, this, gfid); len = sys_readlink(dir_handle, linkname_actual, PATH_MAX); if (len < 0 || len == PATH_MAX) { if (len == PATH_MAX) { @@ -136,12 +129,12 @@ out: return ret; } -dict_t * +static dict_t * posix_dict_set_nlink(dict_t *req, dict_t *res, int32_t nlink) { int ret = -1; - if (req == NULL || !dict_get(req, GF_REQUEST_LINK_COUNT_XDATA)) + if (req == NULL || !dict_get_sizen(req, GF_REQUEST_LINK_COUNT_XDATA)) goto out; if (res == NULL) @@ -183,6 +176,7 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) struct posix_private *priv = NULL; posix_inode_ctx_t *ctx = NULL; int ret = 0; + int dfd = -1; VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); @@ -204,14 +198,31 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) goto out; } - op_ret = dict_get_int32(xdata, GF_GFIDLESS_LOOKUP, &gfidless); +#ifdef __NetBSD__ + /* Same for NetBSD's .attribute directory */ + if (__is_root_gfid(loc->pargfid) && loc->name && + (strcmp(loc->name, ".attribute") == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_LOOKUP_NOT_PERMITTED, + "Lookup issued on .attribute," + " which is not permitted"); + op_errno = EPERM; + op_ret = -1; + goto out; + } +#endif /* __NetBSD__ */ + + op_ret = dict_get_int32_sizen(xdata, GF_GFIDLESS_LOOKUP, &gfidless); op_ret = -1; if (gf_uuid_is_null(loc->pargfid) || (loc->name == NULL)) { /* nameless lookup */ MAKE_INODE_HANDLE(real_path, this, loc, &buf); } else { MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &buf); - + if (!real_path || !par_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } if (gf_uuid_is_null(loc->inode->gfid)) { op_ret = posix_gfid_heal(this, real_path, loc, xdata); if (op_ret < 0) { @@ -235,12 +246,12 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) if (!op_errno) op_errno = ESTALE; loc_gfid(loc, gfid); - MAKE_HANDLE_ABSPATH(gfid_path, this, gfid); - ret = sys_stat(gfid_path, &statbuf); + MAKE_HANDLE_ABSPATH_FD(gfid_path, this, gfid, dfd); + ret = sys_fstatat(dfd, gfid_path, &statbuf, 0); if (ret == 0 && ((statbuf.st_mode & S_IFMT) == S_IFDIR)) /*Don't unset if it was a symlink to a dir.*/ goto parent; - ret = sys_lstat(gfid_path, &statbuf); + ret = sys_fstatat(dfd, gfid_path, &statbuf, AT_SYMLINK_NOFOLLOW); if (ret == 0 && statbuf.st_nlink == 1) { gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_HANDLE_DELETE, @@ -259,7 +270,7 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) posix_cs_maintenance(this, NULL, loc, NULL, &buf, real_path, xdata, &xattr, _gf_true); - if (dict_get(xdata, GF_CLEAN_WRITE_PROTECTION)) { + if (dict_get_sizen(xdata, GF_CLEAN_WRITE_PROTECTION)) { ret = sys_lremovexattr(real_path, GF_PROTECT_FROM_EXTERNAL_WRITES); if (ret == -1 && (errno != ENODATA && errno != ENOATTR)) gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, errno, @@ -268,6 +279,7 @@ posix_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) } } + posix_update_iatt_buf(&buf, -1, real_path, xdata); if (priv->update_pgfid_nlinks) { if (!gf_uuid_is_null(loc->pargfid) && !IA_ISDIR(buf.ia_type)) { MAKE_PGFID_XATTR_KEY(pgfid_xattr_key, PGFID_XATTR_KEY_PREFIX, @@ -332,6 +344,38 @@ out: return 0; } +static int32_t +posix_set_gfid2path_xattr(xlator_t *this, const char *path, uuid_t pgfid, + const char *bname) +{ + char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { + 0, + }; + char pgfid_bname[1024] = { + 0, + }; + char *key = NULL; + const size_t key_size = GFID2PATH_XATTR_KEY_PREFIX_LENGTH + + GF_XXH64_DIGEST_LENGTH * 2 + 1; + int ret = 0; + int len; + + len = snprintf(pgfid_bname, sizeof(pgfid_bname), "%s/%s", uuid_utoa(pgfid), + bname); + gf_xxh64_wrapper((unsigned char *)pgfid_bname, len, + GF_XXHSUM64_DEFAULT_SEED, xxh64); + key = alloca(key_size); + snprintf(key, key_size, GFID2PATH_XATTR_KEY_PREFIX "%s", xxh64); + + ret = sys_lsetxattr(path, key, pgfid_bname, len, XATTR_CREATE); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, + "setting gfid2path xattr failed on %s: key = %s ", path, key); + } + + return ret; +} + int posix_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata) @@ -371,7 +415,8 @@ posix_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, priv = this->private; VALIDATE_OR_GOTO(priv, out); - GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, out); + GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, + uuid_req, out); MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, NULL); mode_bit = (priv->create_mask & mode) | priv->force_create_mode; @@ -403,16 +448,22 @@ posix_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, /* Check if the 'gfid' already exists, because this mknod may be an internal call from distribute for creating 'linkfile', and that linkfile may be for a hardlinked file */ - if (dict_get(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { - dict_del(xdata, GLUSTERFS_INTERNAL_FOP_KEY); - op_ret = dict_get_gfuuid(xdata, "gfid-req", &uuid_req); - if (op_ret) { - gf_msg_debug(this->name, 0, - "failed to get the gfid from " - "dict for %s", - loc->path); - goto real_op; + if (dict_get_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { + dict_del_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY); + /* trash xlator did not bring the uuid_via the call + * to GFID_NULL_CHECK_AND_GOTO() above. + * Fetch it explicitly here. + */ + if (frame->root->pid == GF_SERVER_PID_TRASH) { + op_ret = dict_get_gfuuid(xdata, "gfid-req", &uuid_req); + if (op_ret) { + gf_msg_debug(this->name, 0, + "failed to get the gfid from dict for %s", + loc->path); + goto real_op; + } } + op_ret = posix_create_link_if_gfid_exists(this, uuid_req, real_path, loc->inode->table); if (!op_ret) { @@ -495,7 +546,7 @@ post_op: posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name); } - op_ret = posix_entry_create_xattr_set(this, real_path, xdata); + op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); if (op_ret) { if (errno != EEXIST) gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, @@ -570,6 +621,7 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, int32_t op_errno = 0; char *real_path = NULL, *gfid_path = NULL; char *par_path = NULL, *xattr_name = NULL; + int xattr_name_len; struct iatt stbuf = { 0, }; @@ -587,13 +639,9 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, }; ssize_t size = 0; dict_t *xdata_rsp = NULL; - void *disk_xattr = NULL; + char *disk_xattr = NULL; data_t *arg_data = NULL; char pgfid[GF_UUID_BUF_SIZE] = {0}; - char value_buf[4096] = { - 0, - }; - gf_boolean_t have_val = _gf_false; mode_t mode_bit = 0; DECLARE_OLD_FS_ID_VAR; @@ -615,9 +663,23 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, goto out; } +#ifdef __NetBSD__ + /* Same for NetBSD's .attribute directory */ + if (__is_root_gfid(loc->pargfid) && + (strcmp(loc->name, ".attribute") == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_MKDIR_NOT_PERMITTED, + "mkdir issued on .attribute, which" + "is not permitted"); + op_errno = EPERM; + op_ret = -1; + goto out; + } +#endif + priv = this->private; VALIDATE_OR_GOTO(priv, out); - GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, out); + GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, + uuid_req, out); DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, NULL); @@ -627,11 +689,6 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, goto out; } - if (loc->parent) - gf_uuid_unparse(loc->parent->gfid, pgfid); - else - gf_uuid_unparse(loc->pargfid, pgfid); - gid = frame->root->gid; op_ret = posix_pstat(this, loc->inode, NULL, real_path, &stbuf, _gf_false); @@ -643,8 +700,7 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, mode = posix_override_umask(mode, mode_bit); if (xdata) { - op_ret = dict_get_gfuuid(xdata, "gfid-req", &uuid_req); - if (!op_ret && !gf_uuid_compare(stbuf.ia_gfid, uuid_req)) { + if (!gf_uuid_compare(stbuf.ia_gfid, uuid_req)) { op_ret = -1; op_errno = EEXIST; goto out; @@ -654,12 +710,13 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, if (!gf_uuid_is_null(uuid_req)) { op_ret = posix_istat(this, loc->inode, uuid_req, NULL, &stbuf); if ((op_ret == 0) && IA_ISDIR(stbuf.ia_type)) { - size = posix_handle_path(this, uuid_req, NULL, NULL, 0); - if (size > 0) - gfid_path = alloca(size); - - if (gfid_path) - posix_handle_path(this, uuid_req, NULL, gfid_path, size); + gfid_path = alloca(PATH_MAX); + size = posix_handle_path(this, uuid_req, NULL, gfid_path, PATH_MAX); + if (size <= 0) { + op_errno = ESTALE; + op_ret = -1; + goto out; + } if (frame->root->pid != GF_CLIENT_PID_SELF_HEALD) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DIR_OF_SAME_ID, @@ -708,25 +765,53 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, mode |= S_ISGID; } - op_ret = dict_get_str(xdata, GF_PREOP_PARENT_KEY, &xattr_name); + op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name); if (xattr_name != NULL) { - arg_data = dict_get(xdata, xattr_name); + xattr_name_len = strlen(xattr_name); + arg_data = dict_getn(xdata, xattr_name, xattr_name_len); if (arg_data) { - size = sys_lgetxattr(par_path, xattr_name, value_buf, - sizeof(value_buf) - 1); - if (size >= 0) { - have_val = _gf_true; - } else { - if (errno == ERANGE) { - gf_msg(this->name, GF_LOG_INFO, errno, + if (loc->parent) + gf_uuid_unparse(loc->parent->gfid, pgfid); + else + gf_uuid_unparse(loc->pargfid, pgfid); + + size = 256; + disk_xattr = GF_MALLOC(size + 1, gf_posix_mt_char); + if (!disk_xattr) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, + P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): GF_MALLOC failed during" + " preop of mkdir (%s)", + pgfid, loc->name, real_path); + goto out; + } + disk_xattr[size] = '\0'; + + size = sys_lgetxattr(par_path, xattr_name, disk_xattr, size); + if (size == -1) { + if (disk_xattr) { + GF_FREE(disk_xattr); + disk_xattr = NULL; + } + if (errno != ERANGE) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, - "mkdir (%s/%s): getxattr on key " - "(%s) path (%s) failed due to " - " buffer overflow", - pgfid, loc->name, xattr_name, par_path); - size = sys_lgetxattr(par_path, xattr_name, NULL, 0); + "mkdir (%s/%s): getxattr failed during" + " preop of mkdir (%s).", + pgfid, loc->name, real_path); + goto out; } - if (size < 0) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): getxattr on key " + "(%s) path (%s) failed due to " + " buffer overflow", + pgfid, loc->name, xattr_name, par_path); + size = sys_lgetxattr(par_path, xattr_name, NULL, 0); + if (size == -1) { op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, @@ -736,23 +821,20 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, pgfid, loc->name, xattr_name, par_path); goto out; } - } - disk_xattr = alloca(size); - if (disk_xattr == NULL) { - op_ret = -1; - op_errno = errno; - gf_msg(this->name, GF_LOG_ERROR, errno, - P_MSG_PREOP_CHECK_FAILED, - "mkdir (%s/%s): alloca failed during" - " preop of mkdir (%s)", - pgfid, loc->name, real_path); - goto out; - } - if (have_val) { - memcpy(disk_xattr, value_buf, size); - } else { + disk_xattr = GF_MALLOC(size + 1, gf_posix_mt_char); + if (!disk_xattr) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, + P_MSG_PREOP_CHECK_FAILED, + "mkdir (%s/%s): GF_MALLOC failed during" + " preop of mkdir (%s)", + pgfid, loc->name, real_path); + goto out; + } + disk_xattr[size] = '\0'; size = sys_lgetxattr(par_path, xattr_name, disk_xattr, size); - if (size < 0) { + if (size == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, @@ -787,13 +869,15 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, } op_errno = dict_set_int8(xdata_rsp, GF_PREOP_CHECK_FAILED, 1); + if (op_errno < 0) + op_errno = errno; goto out; } - dict_del(xdata, xattr_name); + dict_deln(xdata, xattr_name, xattr_name_len); } - dict_del(xdata, GF_PREOP_PARENT_KEY); + dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); } op_ret = sys_mkdir(real_path, mode); @@ -821,7 +905,7 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, "setting ACLs on %s failed ", real_path); } - op_ret = posix_entry_create_xattr_set(this, real_path, xdata); + op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); if (op_ret) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "setting xattrs on %s failed", real_path); @@ -863,6 +947,9 @@ posix_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, out: SET_TO_OLD_FS_ID(); + if (disk_xattr) + GF_FREE(disk_xattr); + if (op_ret < 0) { if (entry_created) sys_rmdir(real_path); @@ -881,7 +968,7 @@ out: return 0; } -int +static int posix_add_unlink_to_ctx(inode_t *inode, xlator_t *this, char *unlink_path) { uint64_t ctx = GF_UNLINK_FALSE; @@ -889,7 +976,7 @@ posix_add_unlink_to_ctx(inode_t *inode, xlator_t *this, char *unlink_path) if (!unlink_path) { gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED, - "Creation of unlink entry failed for gfid: %s", unlink_path); + "Creation of unlink entry failed"); ret = -1; goto out; } @@ -904,17 +991,17 @@ out: return ret; } -int32_t +static int32_t posix_move_gfid_to_unlink(xlator_t *this, uuid_t gfid, loc_t *loc) { char *unlink_path = NULL; char *gfid_path = NULL; - int ret = 0; + int ret = -1; struct posix_private *priv_posix = NULL; priv_posix = (struct posix_private *)this->private; - MAKE_HANDLE_GFID_PATH(gfid_path, this, gfid, NULL); + MAKE_HANDLE_GFID_PATH(gfid_path, this, gfid); POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path, loc->inode->gfid, unlink_path); @@ -938,7 +1025,7 @@ out: return ret; } -int32_t +static int32_t posix_unlink_gfid_handle_and_entry(call_frame_t *frame, xlator_t *this, const char *real_path, struct iatt *stbuf, int32_t *op_errno, loc_t *loc, @@ -981,6 +1068,8 @@ posix_unlink_gfid_handle_and_entry(call_frame_t *frame, xlator_t *this, */ ret = posix_pstat(this, NULL, loc->gfid, real_path, &prebuf, _gf_true); if (ret) { + UNLOCK(&loc->inode->lock); + locked = _gf_false; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, "lstat on %s failed", real_path); goto err; @@ -989,6 +1078,12 @@ posix_unlink_gfid_handle_and_entry(call_frame_t *frame, xlator_t *this, /* Unlink the actual file */ ret = sys_unlink(real_path); + + if (locked) { + UNLOCK(&loc->inode->lock); + locked = _gf_false; + } + if (ret == -1) { if (op_errno) *op_errno = errno; @@ -997,11 +1092,6 @@ posix_unlink_gfid_handle_and_entry(call_frame_t *frame, xlator_t *this, goto err; } - if (locked) { - UNLOCK(&loc->inode->lock); - locked = _gf_false; - } - if (update_ctime) { posix_set_ctime(frame, this, NULL, -1, loc->inode, stbuf); } @@ -1021,10 +1111,10 @@ err: return -1; } -gf_boolean_t +static gf_boolean_t posix_skip_non_linkto_unlink(dict_t *xdata, loc_t *loc, char *key, - const char *linkto_xattr, struct iatt *stbuf, - const char *real_path) + const int keylen, const char *linkto_xattr, + struct iatt *stbuf, const char *real_path) { gf_boolean_t skip_unlink = _gf_false; gf_boolean_t is_dht_linkto_file = _gf_false; @@ -1032,7 +1122,7 @@ posix_skip_non_linkto_unlink(dict_t *xdata, loc_t *loc, char *key, ssize_t xattr_size = -1; int op_ret = -1; - op_ret = dict_get_int32(xdata, key, &unlink_if_linkto); + op_ret = dict_get_int32n(xdata, key, keylen, &unlink_if_linkto); if (!op_ret && unlink_if_linkto) { is_dht_linkto_file = IS_DHT_LINKFILE_MODE(stbuf); @@ -1043,11 +1133,11 @@ posix_skip_non_linkto_unlink(dict_t *xdata, loc_t *loc, char *key, xattr_size = sys_lgetxattr(real_path, linkto_xattr, NULL, 0); + UNLOCK(&loc->inode->lock); + if (xattr_size <= 0) skip_unlink = _gf_true; - UNLOCK(&loc->inode->lock); - gf_msg("posix", GF_LOG_INFO, 0, P_MSG_XATTR_STATUS, "linkto_xattr status: %" PRIu32 " for %s", skip_unlink, real_path); @@ -1055,6 +1145,38 @@ posix_skip_non_linkto_unlink(dict_t *xdata, loc_t *loc, char *key, return skip_unlink; } +static int32_t +posix_remove_gfid2path_xattr(xlator_t *this, const char *path, uuid_t pgfid, + const char *bname) +{ + char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { + 0, + }; + char pgfid_bname[1024] = { + 0, + }; + int ret = 0; + char *key = NULL; + const size_t key_size = GFID2PATH_XATTR_KEY_PREFIX_LENGTH + + GF_XXH64_DIGEST_LENGTH * 2 + 1; + int len; + + len = snprintf(pgfid_bname, sizeof(pgfid_bname), "%s/%s", uuid_utoa(pgfid), + bname); + gf_xxh64_wrapper((unsigned char *)pgfid_bname, len, + GF_XXHSUM64_DEFAULT_SEED, xxh64); + key = alloca(key_size); + snprintf(key, key_size, GFID2PATH_XATTR_KEY_PREFIX "%s", xxh64); + + ret = sys_lremovexattr(path, key); + if (ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, + "removing gfid2path xattr failed on %s: key = %s", path, key); + } + + return ret; +} + int32_t posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, dict_t *xdata) @@ -1064,6 +1186,7 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, char *real_path = NULL; char *par_path = NULL; int32_t fd = -1; + int ret = -1; struct iatt stbuf = { 0, }; @@ -1083,9 +1206,6 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, int32_t skip_unlink = 0; int32_t fdstat_requested = 0; dict_t *unwind_dict = NULL; - void *uuid = NULL; - char uuid_str[GF_UUID_BUF_SIZE] = {0}; - char gfid_str[GF_UUID_BUF_SIZE] = {0}; gf_boolean_t get_link_count = _gf_false; posix_inode_ctx_t *ctx = NULL; @@ -1115,22 +1235,8 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, priv = this->private; - op_ret = dict_get_ptr(xdata, TIER_LINKFILE_GFID, &uuid); - - if (!op_ret && gf_uuid_compare(uuid, stbuf.ia_gfid)) { - op_errno = ENOENT; - op_ret = -1; - gf_uuid_unparse(uuid, uuid_str); - gf_uuid_unparse(stbuf.ia_gfid, gfid_str); - gf_msg_debug(this->name, op_errno, - "Mismatch in gfid for path " - "%s. Aborting the unlink. loc->gfid = %s, " - "stbuf->ia_gfid = %s", - real_path, uuid_str, gfid_str); - goto out; - } - - op_ret = dict_get_int32(xdata, DHT_SKIP_OPEN_FD_UNLINK, &check_open_fd); + op_ret = dict_get_int32_sizen(xdata, DHT_SKIP_OPEN_FD_UNLINK, + &check_open_fd); if (!op_ret && check_open_fd) { LOCK(&loc->inode->lock); @@ -1157,10 +1263,8 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, * we don't need to call second function, skip unlink. */ skip_unlink = posix_skip_non_linkto_unlink( - xdata, loc, DHT_SKIP_NON_LINKTO_UNLINK, DHT_LINKTO, &stbuf, real_path); - skip_unlink = skip_unlink || posix_skip_non_linkto_unlink( - xdata, loc, TIER_SKIP_NON_LINKTO_UNLINK, - TIER_LINKTO, &stbuf, real_path); + xdata, loc, DHT_SKIP_NON_LINKTO_UNLINK, + SLEN(DHT_SKIP_NON_LINKTO_UNLINK), DHT_LINKTO, &stbuf, real_path); if (skip_unlink) { op_ret = -1; op_errno = EBUSY; @@ -1168,7 +1272,7 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, } if (IA_ISREG(loc->inode->ia_type) && xdata && - dict_get(xdata, DHT_IATT_IN_XDATA_KEY)) { + dict_get_sizen(xdata, DHT_IATT_IN_XDATA_KEY)) { fdstat_requested = 1; } @@ -1223,12 +1327,20 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, unwind_dict = dict_new(); if (!unwind_dict) { - op_errno = -ENOMEM; + op_errno = ENOMEM; op_ret = -1; goto out; } - if (xdata && dict_get(xdata, GET_LINK_COUNT)) + if (xdata && dict_get_sizen(xdata, GF_GET_FILE_BLOCK_COUNT)) { + ret = dict_set_uint64(unwind_dict, GF_GET_FILE_BLOCK_COUNT, + stbuf.ia_blocks); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_SET_XDATA_FAIL, + "Failed to set %s in rsp dict", GF_GET_FILE_BLOCK_COUNT); + } + + if (xdata && dict_get_sizen(xdata, GET_LINK_COUNT)) get_link_count = _gf_true; op_ret = posix_unlink_gfid_handle_and_entry(frame, this, real_path, &stbuf, &op_errno, loc, get_link_count, @@ -1248,6 +1360,11 @@ posix_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, goto out; } op_ret = posix_set_iatt_in_dict(unwind_dict, NULL, &postbuf); + if (op_ret == -1) { + op_errno = ENOMEM; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_DICT_SET_FAILED, + "failed to set fdstat in dict"); + } } op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &postparent, @@ -1325,6 +1442,19 @@ posix_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, goto out; } +#ifdef __NetBSD__ + /* Same for NetBSD's .attribute directory */ + if (__is_root_gfid(loc->pargfid) && + (strcmp(loc->name, ".attribute") == 0)) { + gf_msg(this->name, GF_LOG_WARNING, EPERM, P_MSG_RMDIR_NOT_PERMITTED, + "rmdir issued on .attribute, which" + "is not permitted"); + op_errno = EPERM; + op_ret = -1; + goto out; + } +#endif + priv = this->private; MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf); @@ -1344,18 +1474,16 @@ posix_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, } if (flags) { - gfid_str = uuid_utoa(stbuf.ia_gfid); - op_ret = sys_mkdir(priv->trash_path, 0755); if (errno != EEXIST && op_ret == -1) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MKDIR_FAILED, "mkdir of %s failed", priv->trash_path); } else { + gfid_str = uuid_utoa(stbuf.ia_gfid); (void)snprintf(tmp_path, sizeof(tmp_path), "%s/%s", priv->trash_path, gfid_str); gf_msg_debug(this->name, 0, "Moving %s to %s", real_path, tmp_path); op_ret = sys_rename(real_path, tmp_path); - pthread_cond_signal(&priv->janitor_cond); } } else { op_ret = sys_rmdir(real_path); @@ -1431,6 +1559,9 @@ posix_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, char *pgfid_xattr_key = NULL; int32_t nlink_samepgfid = 0; gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; + uuid_t uuid_req = { + 0, + }; DECLARE_OLD_FS_ID_VAR; @@ -1441,7 +1572,8 @@ posix_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, priv = this->private; VALIDATE_OR_GOTO(priv, out); - GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, out); + GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, + uuid_req, out); DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf); @@ -1509,7 +1641,7 @@ posix_symlink(call_frame_t *frame, xlator_t *this, const char *linkname, } ignore: - op_ret = posix_entry_create_xattr_set(this, real_path, xdata); + op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); if (op_ret) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "setting xattrs on %s failed ", real_path); @@ -1614,7 +1746,6 @@ posix_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, priv = this->private; VALIDATE_OR_GOTO(priv, out); - DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); SET_FS_ID(frame->root->uid, frame->root->gid); MAKE_ENTRY_HANDLE(real_oldpath, par_oldpath, this, oldloc, NULL); @@ -2050,6 +2181,11 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, char *pgfid_xattr_key = NULL; gf_boolean_t entry_created = _gf_false, gfid_set = _gf_false; mode_t mode_bit = 0; + uuid_t uuid_req = { + 0, + }; + + dict_t *xdata_rsp = dict_ref(xdata); DECLARE_OLD_FS_ID_VAR; @@ -2061,7 +2197,8 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, priv = this->private; VALIDATE_OR_GOTO(priv, out); - GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, out); + GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xdata, op_ret, op_errno, + uuid_req, out); DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf); @@ -2099,6 +2236,28 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, was_present = 0; } + if (!was_present) { + if (posix_is_layout_stale(xdata, par_path, this)) { + op_ret = -1; + op_errno = EIO; + if (!xdata_rsp) { + xdata_rsp = dict_new(); + if (!xdata_rsp) { + op_errno = ENOMEM; + goto out; + } + } + + if (dict_set_int32_sizen(xdata_rsp, GF_PREOP_CHECK_FAILED, 1) == + -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DICT_SET_FAILED, + "setting key %s in dict failed", GF_PREOP_CHECK_FAILED); + } + + goto out; + } + } + if (priv->o_direct) _flags |= O_DIRECT; @@ -2147,7 +2306,7 @@ posix_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, posix_set_gfid2path_xattr(this, real_path, loc->pargfid, loc->name); } ignore: - op_ret = posix_entry_create_xattr_set(this, real_path, xdata); + op_ret = posix_entry_create_xattr_set(this, loc, real_path, xdata); if (op_ret) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "setting xattrs on %s failed ", real_path); @@ -2200,12 +2359,6 @@ fill_stat: gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, "failed to set the fd context path=%s fd=%p", real_path, fd); - LOCK(&priv->lock); - { - priv->nr_files++; - } - UNLOCK(&priv->lock); - op_ret = 0; out: @@ -2224,7 +2377,10 @@ out: STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, (loc) ? loc->inode : NULL, &stbuf, &preparent, - &postparent, xdata); + &postparent, xdata_rsp); + + if (xdata_rsp) + dict_unref(xdata_rsp); return 0; } @@ -2255,6 +2411,12 @@ posix_put(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, MAKE_ENTRY_HANDLE(real_path, par_path, this, loc, &stbuf); + if (!real_path || !par_path) { + op_ret = -1; + op_errno = ESTALE; + goto out; + } + op_ret = posix_pstat(this, loc->parent, loc->pargfid, par_path, &preparent, _gf_false); if (op_ret < 0) { @@ -2293,8 +2455,8 @@ posix_put(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, goto out; } - op_ret = syncop_writev(this, fd, vector, count, offset, iobref, flags, - xdata, NULL); + op_ret = syncop_writev(this, fd, vector, count, offset, iobref, flags, NULL, + NULL, xdata, NULL); if (op_ret < 0) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_WRITE_FAILED, diff --git a/xlators/storage/posix/src/posix-gfid-path.c b/xlators/storage/posix/src/posix-gfid-path.c index de8b4d70c07..1b38e9b0479 100644 --- a/xlators/storage/posix/src/posix-gfid-path.c +++ b/xlators/storage/posix/src/posix-gfid-path.c @@ -8,102 +8,24 @@ cases as published by the Free Software Foundation. */ -#include "common-utils.h" -#include "xlator.h" -#include "syscall.h" -#include "logging.h" +#include <stdint.h> + +#include <glusterfs/compat-errno.h> +#include <glusterfs/syscall.h> +#include <glusterfs/logging.h> #include "posix-messages.h" #include "posix-mem-types.h" #include "posix-gfid-path.h" #include "posix.h" -int32_t -posix_set_gfid2path_xattr(xlator_t *this, const char *path, uuid_t pgfid, - const char *bname) -{ - char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { - 0, - }; - char pgfid_bname[1024] = { - 0, - }; - char *key = NULL; - char *val = NULL; - size_t key_size = 0; - size_t val_size = 0; - int ret = 0; - - GF_VALIDATE_OR_GOTO("posix", this, err); - - snprintf(pgfid_bname, sizeof(pgfid_bname), "%s/%s", uuid_utoa(pgfid), - bname); - gf_xxh64_wrapper((unsigned char *)pgfid_bname, strlen(pgfid_bname), - GF_XXHSUM64_DEFAULT_SEED, xxh64); - key_size = GFID2PATH_XATTR_KEY_PREFIX_LENGTH + GF_XXH64_DIGEST_LENGTH * 2 + - 1; - key = alloca(key_size); - snprintf(key, key_size, GFID2PATH_XATTR_KEY_PREFIX "%s", xxh64); - - val_size = UUID_CANONICAL_FORM_LEN + NAME_MAX + 2; - val = alloca(val_size); - snprintf(val, val_size, "%s/%s", uuid_utoa(pgfid), bname); - - ret = sys_lsetxattr(path, key, val, strlen(val), XATTR_CREATE); - if (ret == -1) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, - "setting gfid2path xattr failed on %s: key = %s ", path, key); - goto err; - } - return 0; -err: - return -1; -} - -int32_t -posix_remove_gfid2path_xattr(xlator_t *this, const char *path, uuid_t pgfid, - const char *bname) -{ - char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { - 0, - }; - char pgfid_bname[1024] = { - 0, - }; - int ret = 0; - char *key = NULL; - size_t key_size = 0; - - GF_VALIDATE_OR_GOTO("posix", this, err); - - snprintf(pgfid_bname, sizeof(pgfid_bname), "%s/%s", uuid_utoa(pgfid), - bname); - gf_xxh64_wrapper((unsigned char *)pgfid_bname, strlen(pgfid_bname), - GF_XXHSUM64_DEFAULT_SEED, xxh64); - key_size = GFID2PATH_XATTR_KEY_PREFIX_LENGTH + GF_XXH64_DIGEST_LENGTH * 2 + - 1; - key = alloca(key_size); - snprintf(key, key_size, GFID2PATH_XATTR_KEY_PREFIX "%s", xxh64); - - ret = sys_lremovexattr(path, key); - if (ret == -1) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PGFID_OP, - "removing gfid2path xattr failed on %s: key = %s", path, key); - goto err; - } - return 0; -err: - return -1; -} - gf_boolean_t posix_is_gfid2path_xattr(const char *name) { if (name && strncmp(GFID2PATH_XATTR_KEY_PREFIX, name, - GFID2PATH_XATTR_KEY_PREFIX_LENGTH) == 0) { + GFID2PATH_XATTR_KEY_PREFIX_LENGTH) == 0) return _gf_true; - } else { - return _gf_false; - } + + return _gf_false; } static int gf_posix_xattr_enotsup_log; @@ -213,7 +135,8 @@ posix_get_gfid2path(xlator_t *this, inode_t *inode, const char *real_path, remaining_size = size; list_offset = 0; while (remaining_size > 0) { - snprintf(keybuffer, sizeof(keybuffer), "%s", list + list_offset); + len = snprintf(keybuffer, sizeof(keybuffer), "%s", + list + list_offset); if (!posix_is_gfid2path_xattr(keybuffer)) { goto ignore; @@ -243,7 +166,6 @@ posix_get_gfid2path(xlator_t *this, inode_t *inode, const char *real_path, i++; ignore: - len = strlen(keybuffer); remaining_size -= (len + 1); list_offset += (len + 1); } /* while (remaining_size > 0) */ diff --git a/xlators/storage/posix/src/posix-gfid-path.h b/xlators/storage/posix/src/posix-gfid-path.h index 21f849dd276..79096e5893f 100644 --- a/xlators/storage/posix/src/posix-gfid-path.h +++ b/xlators/storage/posix/src/posix-gfid-path.h @@ -11,16 +11,15 @@ #ifndef _POSIX_GFID_PATH_H #define _POSIX_GFID_PATH_H -#include "xlator.h" -#include "common-utils.h" -#include "compat-errno.h" +#include <glusterfs/compat-errno.h> +#include <stdint.h> // for int32_t +#include "glusterfs/dict.h" // for dict_t +#include "glusterfs/glusterfs.h" // for gf_boolean_t +#include "glusterfs/inode.h" // for inode_t +#include "uuid.h" // for uuid_t #define MAX_GFID2PATH_LINK_SUP 500 -int32_t -posix_set_gfid2path_xattr(xlator_t *, const char *, uuid_t, const char *); -int32_t -posix_remove_gfid2path_xattr(xlator_t *, const char *, uuid_t, const char *); gf_boolean_t posix_is_gfid2path_xattr(const char *name); int32_t diff --git a/xlators/storage/posix/src/posix-handle.c b/xlators/storage/posix/src/posix-handle.c index 5c50d413640..410b38da8cb 100644 --- a/xlators/storage/posix/src/posix-handle.c +++ b/xlators/storage/posix/src/posix-handle.c @@ -16,19 +16,16 @@ #include <alloca.h> #endif -#include "common-utils.h" - #include "posix-handle.h" #include "posix.h" -#include "xlator.h" -#include "syscall.h" +#include <glusterfs/syscall.h> #include "posix-messages.h" #include "posix-metadata.h" -#include "compat-errno.h" +#include <glusterfs/compat-errno.h> int -posix_handle_mkdir_hashes(xlator_t *this, const char *newpath); +posix_handle_mkdir_hashes(xlator_t *this, int dfd, uuid_t gfid); inode_t * posix_resolve(xlator_t *this, inode_table_t *itable, inode_t *parent, @@ -266,9 +263,7 @@ posix_handle_relpath(xlator_t *this, uuid_t gfid, const char *basename, char *uuid_str = NULL; int len = 0; - len = SLEN("../") + SLEN("../") + SLEN("00/") + SLEN("00/") + - SLEN(UUID0_STR) + 1 /* '\0' */ - ; + len = POSIX_GFID_HANDLE_RELSIZE; if (basename) { len += (strlen(basename) + 1); @@ -336,9 +331,23 @@ posix_handle_pump(xlator_t *this, char *buf, int len, int maxlen, int ret = 0; int blen = 0; int link_len = 0; + char tmpstr[POSIX_GFID_HASH2_LEN] = { + 0, + }; + char d2[3] = { + 0, + }; + int index = 0; + int dirfd = 0; + struct posix_private *priv = this->private; + + strncpy(tmpstr, (base_str + pfx_len + 3), 40); + strncpy(d2, (base_str + pfx_len), 2); + index = strtoul(d2, NULL, 16); + dirfd = priv->arrdfd[index]; /* is a directory's symlink-handle */ - ret = sys_readlink(base_str, linkname, 512); + ret = readlinkat(dirfd, tmpstr, linkname, 512); if (ret == -1) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READLINK_FAILED, "internal readlink failed on %s ", base_str); @@ -403,6 +412,11 @@ posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf, int pfx_len; int maxlen; char *buf; + int index = 0; + int dfd = 0; + char newstr[POSIX_GFID_HASH2_LEN] = { + 0, + }; priv = this->private; @@ -416,12 +430,14 @@ posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf, buf = alloca(maxlen); } + index = gfid[0]; + dfd = priv->arrdfd[index]; + base_len = (priv->base_path_length + SLEN(GF_HIDDEN_PATH) + 45); base_str = alloca(base_len + 1); base_len = snprintf(base_str, base_len + 1, "%s/%s/%02x/%02x/%s", priv->base_path, GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str); - pfx_len = priv->base_path_length + 1 + SLEN(GF_HIDDEN_PATH) + 1; if (basename) { @@ -430,7 +446,8 @@ posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf, len = snprintf(buf, maxlen, "%s", base_str); } - ret = sys_lstat(base_str, &stat); + snprintf(newstr, sizeof(newstr), "%02x/%s", gfid[1], uuid_str); + ret = sys_fstatat(dfd, newstr, &stat, AT_SYMLINK_NOFOLLOW); if (!(ret == 0 && S_ISLNK(stat.st_mode) && stat.st_nlink == 1)) goto out; @@ -443,7 +460,6 @@ posix_handle_path(xlator_t *this, uuid_t gfid, const char *basename, char *ubuf, if (ret == -1) break; - ret = sys_lstat(buf, &stat); } while ((ret == -1) && errno == ELOOP); @@ -452,8 +468,7 @@ out: } int -posix_handle_gfid_path(xlator_t *this, uuid_t gfid, const char *basename, - char *buf, size_t buflen) +posix_handle_gfid_path(xlator_t *this, uuid_t gfid, char *buf, size_t buflen) { struct posix_private *priv = NULL; char *uuid_str = NULL; @@ -461,16 +476,9 @@ posix_handle_gfid_path(xlator_t *this, uuid_t gfid, const char *basename, priv = this->private; - len = priv->base_path_length /* option directory "/export" */ - + SLEN("/") + SLEN(GF_HIDDEN_PATH) + SLEN("/") + SLEN("00/") + - SLEN("00/") + SLEN(UUID0_STR) + 1 /* '\0' */ - ; + len = POSIX_GFID_HANDLE_SIZE(priv->base_path_length); - if (basename) { - len += (strlen(basename) + 1); - } else { - len += 256; /* worst-case for directory's symlink-handle expansion */ - } + len += 256; /* worst-case for directory's symlink-handle expansion */ if ((buflen < len) || !buf) return len; @@ -478,22 +486,12 @@ posix_handle_gfid_path(xlator_t *this, uuid_t gfid, const char *basename, uuid_str = uuid_utoa(gfid); if (__is_root_gfid(gfid)) { - if (basename) { - len = snprintf(buf, buflen, "%s/%s", priv->base_path, basename); - } else { - len = snprintf(buf, buflen, "%s", priv->base_path); - } - goto out; - } - - if (basename) { - len = snprintf(buf, buflen, "%s/%s/%02x/%02x/%s/%s", priv->base_path, - GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str, basename); + len = snprintf(buf, buflen, "%s", priv->base_path); } else { len = snprintf(buf, buflen, "%s/%s/%02x/%02x/%s", priv->base_path, GF_HIDDEN_PATH, gfid[0], gfid[1], uuid_str); } -out: + return len; } @@ -507,7 +505,8 @@ posix_handle_init(xlator_t *this) struct stat rootbuf; struct stat exportbuf; char *rootstr = NULL; - uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + static uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + int dfd = 0; priv = this->private; @@ -557,9 +556,8 @@ posix_handle_init(xlator_t *this) return -1; } - MAKE_HANDLE_ABSPATH(rootstr, this, gfid); - - ret = sys_stat(rootstr, &rootbuf); + MAKE_HANDLE_ABSPATH_FD(rootstr, this, gfid, dfd); + ret = sys_fstatat(dfd, rootstr, &rootbuf, 0); switch (ret) { case -1: if (errno != ENOENT) { @@ -567,15 +565,14 @@ posix_handle_init(xlator_t *this) "%s", priv->base_path); return -1; } - - ret = posix_handle_mkdir_hashes(this, rootstr); + ret = posix_handle_mkdir_hashes(this, dfd, gfid); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "mkdir %s failed", rootstr); return -1; } - ret = sys_symlink("../../..", rootstr); + ret = sys_symlinkat("../../..", dfd, rootstr); if (ret) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, "symlink %s creation failed", rootstr); @@ -704,30 +701,18 @@ out: } int -posix_handle_mkdir_hashes(xlator_t *this, const char *newpath) +posix_handle_mkdir_hashes(xlator_t *this, int dirfd, uuid_t gfid) { - char *duppath = NULL; - char *parpath = NULL; - int ret = 0; - - duppath = strdupa(newpath); - parpath = dirname(duppath); - parpath = dirname(duppath); - - ret = sys_mkdir(parpath, 0700); - if (ret == -1 && errno != EEXIST) { - gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, - "error mkdir hash-1 %s ", parpath); - return -1; - } - - strcpy(duppath, newpath); - parpath = dirname(duppath); + int ret = -1; + char d2[3] = { + 0, + }; - ret = sys_mkdir(parpath, 0700); + snprintf(d2, sizeof(d2), "%02x", gfid[1]); + ret = sys_mkdirat(dirfd, d2, 0700); if (ret == -1 && errno != EEXIST) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, - "error mkdir hash-2 %s ", parpath); + "error mkdir hash-2 %s ", uuid_utoa(gfid)); return -1; } @@ -738,51 +723,59 @@ int posix_handle_hard(xlator_t *this, const char *oldpath, uuid_t gfid, struct stat *oldbuf) { - char *newpath = NULL; struct stat newbuf; + struct stat hashbuf; int ret = -1; gf_boolean_t link_exists = _gf_false; + char d2[3] = { + 0, + }; + int dfd = -1; + char *newstr = NULL; - MAKE_HANDLE_ABSPATH(newpath, this, gfid); + MAKE_HANDLE_ABSPATH_FD(newstr, this, gfid, dfd); + ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW); - ret = sys_lstat(newpath, &newbuf); if (ret == -1 && errno != ENOENT) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "%s", - newpath); + uuid_utoa(gfid)); return -1; } if (ret == -1 && errno == ENOENT) { - ret = posix_handle_mkdir_hashes(this, newpath); + snprintf(d2, sizeof(d2), "%02x", gfid[1]); + ret = sys_fstatat(dfd, d2, &hashbuf, 0); if (ret) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "mkdir %s failed ", newpath); - return -1; + ret = posix_handle_mkdir_hashes(this, dfd, gfid); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, + "mkdir %s failed ", uuid_utoa(gfid)); + return -1; + } } - - ret = sys_link(oldpath, newpath); + ret = sys_linkat(AT_FDCWD, oldpath, dfd, newstr); if (ret) { if (errno != EEXIST) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "link %s -> %s" "failed ", - oldpath, newpath); + oldpath, newstr); return -1; } else { link_exists = _gf_true; } } + ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW); - ret = sys_lstat(newpath, &newbuf); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "lstat on %s failed", newpath); + "lstat on %s failed", uuid_utoa(gfid)); return -1; } if ((link_exists) && (!S_ISREG(newbuf.st_mode))) { gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_HANDLE_CREATE, - "%s - Expected regular file", newpath); + "%s - Expected regular file", uuid_utoa(gfid)); return -1; } } @@ -792,7 +785,8 @@ posix_handle_hard(xlator_t *this, const char *oldpath, uuid_t gfid, "mismatching ino/dev between file %s (%lld/%lld) " "and handle %s (%lld/%lld)", oldpath, (long long)oldbuf->st_ino, (long long)oldbuf->st_dev, - newpath, (long long)newbuf.st_ino, (long long)newbuf.st_dev); + uuid_utoa(gfid), (long long)newbuf.st_ino, + (long long)newbuf.st_dev); ret = -1; } @@ -806,15 +800,23 @@ posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc, char *oldpath = NULL; char *newpath = NULL; struct stat newbuf; + struct stat hashbuf; int ret = -1; + char d2[3] = { + 0, + }; + int dfd = -1; + char *newstr = NULL; MAKE_HANDLE_ABSPATH(newpath, this, gfid); + MAKE_HANDLE_ABSPATH_FD(newstr, this, gfid, dfd); MAKE_HANDLE_RELPATH(oldpath, this, loc->pargfid, loc->name); - ret = sys_lstat(newpath, &newbuf); + ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW); + if (ret == -1 && errno != ENOENT) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, "%s", - newpath); + newstr); return -1; } @@ -824,24 +826,30 @@ posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc, errno = EINVAL; return -1; } - ret = posix_handle_mkdir_hashes(this, newpath); + + snprintf(d2, sizeof(d2), "%02x", gfid[1]); + ret = sys_fstatat(dfd, d2, &hashbuf, 0); + if (ret) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "mkdir %s failed ", newpath); - return -1; + ret = posix_handle_mkdir_hashes(this, dfd, gfid); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, + "mkdir %s failed ", newstr); + return -1; + } } - - ret = sys_symlink(oldpath, newpath); + ret = sys_symlinkat(oldpath, dfd, newstr); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "symlink %s -> %s failed", oldpath, newpath); + "symlink %s -> %s failed", oldpath, newstr); return -1; } - ret = sys_lstat(newpath, &newbuf); + ret = sys_fstatat(dfd, newstr, &newbuf, AT_SYMLINK_NOFOLLOW); + if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "stat on %s failed ", newpath); + "stat on %s failed ", newstr); return -1; } } @@ -849,7 +857,7 @@ posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc, ret = sys_stat(real_path, &newbuf); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_CREATE, - "stat on %s failed ", newpath); + "stat on %s failed ", real_path); return -1; } @@ -871,26 +879,33 @@ posix_handle_soft(xlator_t *this, const char *real_path, loc_t *loc, int posix_handle_unset_gfid(xlator_t *this, uuid_t gfid) { - char *path = NULL; int ret = 0; struct stat stat; + int index = 0; + int dfd = 0; + char newstr[POSIX_GFID_HASH2_LEN] = { + 0, + }; + struct posix_private *priv = this->private; - MAKE_HANDLE_GFID_PATH(path, this, gfid, NULL); + index = gfid[0]; + dfd = priv->arrdfd[index]; - ret = sys_lstat(path, &stat); + snprintf(newstr, sizeof(newstr), "%02x/%s", gfid[1], uuid_utoa(gfid)); + ret = sys_fstatat(dfd, newstr, &stat, AT_SYMLINK_NOFOLLOW); if (ret == -1) { if (errno != ENOENT) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_DELETE, "%s", - path); + newstr); } goto out; } - ret = sys_unlink(path); - if (ret == -1) { + ret = sys_unlinkat(dfd, newstr); + if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HANDLE_DELETE, - "unlink %s failed ", path); + "unlink %s is failed", newstr); } out: diff --git a/xlators/storage/posix/src/posix-handle.h b/xlators/storage/posix/src/posix-handle.h index c4d7cb14503..f33ed92620d 100644 --- a/xlators/storage/posix/src/posix-handle.h +++ b/xlators/storage/posix/src/posix-handle.h @@ -18,7 +18,7 @@ #define MAKE_PGFID_XATTR_KEY(var, prefix, pgfid) \ do { \ - var = alloca(strlen(prefix) + UUID_CANONICAL_FORM_LEN + 1); \ + var = alloca(SLEN(prefix) + UUID_CANONICAL_FORM_LEN + 1); \ strcpy(var, prefix); \ strcat(var, uuid_utoa(pgfid)); \ } while (0) @@ -111,22 +111,23 @@ } \ } while (0) -#define MAKE_HANDLE_GFID_PATH(var, this, gfid, base) \ +#define MAKE_HANDLE_GFID_PATH(var, this, gfid) \ do { \ int __len = 0; \ - __len = posix_handle_gfid_path(this, gfid, base, NULL, 0); \ - if (__len <= 0) \ - break; \ + struct posix_private *__priv = this->private; \ + __len = POSIX_GFID_HANDLE_SIZE(__priv->base_path_length); \ + __len += 256; \ var = alloca(__len); \ - __len = posix_handle_gfid_path(this, gfid, base, var, __len); \ + __len = posix_handle_gfid_path(this, gfid, var, __len); \ } while (0) #define MAKE_HANDLE_RELPATH(var, this, gfid, base) \ do { \ int __len; \ - __len = posix_handle_relpath(this, gfid, base, NULL, 0); \ - if (__len <= 0) \ - break; \ + __len = POSIX_GFID_HANDLE_RELSIZE; \ + if (base) { \ + __len += (strlen(base) + 1); \ + } \ var = alloca(__len); \ __len = posix_handle_relpath(this, gfid, base, var, __len); \ } while (0) @@ -140,6 +141,16 @@ __priv->base_path, gfid[0], gfid[1], uuid_utoa(gfid)); \ } while (0) +#define MAKE_HANDLE_ABSPATH_FD(var, this, gfid, dfd) \ + do { \ + struct posix_private *__priv = this->private; \ + int findex = gfid[0]; \ + int __len = POSIX_GFID_HASH2_LEN; \ + var = alloca(__len); \ + snprintf(var, __len, "%02x/%s", gfid[1], uuid_utoa(gfid)); \ + dfd = __priv->arrdfd[findex]; \ + } while (0) + #define MAKE_ENTRY_HANDLE(entp, parp, this, loc, ent_p) \ do { \ char *__parp; \ @@ -183,9 +194,9 @@ /* expand ELOOP */ \ } while (0) +#define POSIX_GFID_HASH2_LEN 45 int -posix_handle_gfid_path(xlator_t *this, uuid_t gfid, const char *basename, - char *buf, size_t len); +posix_handle_gfid_path(xlator_t *this, uuid_t gfid, char *buf, size_t len); int posix_handle_hard(xlator_t *this, const char *path, uuid_t gfid, diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index 1200692db4e..67db3324083 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -33,49 +33,42 @@ #endif /* GF_BSD_HOST_OS */ #include <fnmatch.h> -#include "glusterfs.h" -#include "checksum.h" -#include "dict.h" -#include "logging.h" #include "posix.h" #include "posix-messages.h" #include "posix-metadata.h" #include "posix-handle.h" -#include "xlator.h" -#include "defaults.h" -#include "common-utils.h" -#include "compat-errno.h" -#include "compat.h" -#include "byte-order.h" -#include "syscall.h" -#include "statedump.h" -#include "locking.h" -#include "timer.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/compat.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/syscall.h> +#include <glusterfs/statedump.h> +#include <glusterfs/locking.h> +#include <glusterfs/timer.h> #include "glusterfs3-xdr.h" -#include "hashfn.h" -#include "glusterfs-acl.h" +#include <glusterfs/glusterfs-acl.h> #include "posix-gfid-path.h" -#include "events.h" -#include "glusterfsd.h" +#include <glusterfs/events.h> +#include "glusterfs/syncop.h" +#include "timer-wheel.h" #include <sys/types.h> char *marker_xattrs[] = {"trusted.glusterfs.quota.*", "trusted.glusterfs.*.xtime", NULL}; -char *marker_contri_key = "trusted.*.*.contri"; +static char *marker_contri_key = "trusted.*.*.contri"; static char *posix_ignore_xattrs[] = {"gfid-req", + GLUSTERFS_INTERNAL_FOP_KEY, GLUSTERFS_ENTRYLK_COUNT, GLUSTERFS_INODELK_COUNT, GLUSTERFS_POSIXLK_COUNT, GLUSTERFS_PARENT_ENTRYLK, GF_GFIDLESS_LOOKUP, GLUSTERFS_INODELK_DOM_COUNT, - GLUSTERFS_INTERNAL_FOP_KEY, NULL}; -static char *list_xattr_ignore_xattrs[] = { - GF_SELINUX_XATTR_KEY, GF_XATTR_VOL_ID_KEY, GFID_XATTR_KEY, NULL}; +static char *list_xattr_ignore_xattrs[] = {GFID_XATTR_KEY, GF_XATTR_VOL_ID_KEY, + GF_SELINUX_XATTR_KEY, NULL}; gf_boolean_t posix_special_xattr(char **pattern, char *key) @@ -144,9 +137,6 @@ posix_handle_georep_xattrs(call_frame_t *frame, const char *name, int *op_errno, static const char *georep_xattr[] = { "*.glusterfs.*.stime", "*.glusterfs.*.xtime", "*.glusterfs.*.entry_stime", "*.glusterfs.volume-mark.*", NULL}; - if (frame && frame->root) { - pid = frame->root->pid; - } if (!name) { /* No need to do anything here */ @@ -154,6 +144,10 @@ posix_handle_georep_xattrs(call_frame_t *frame, const char *name, int *op_errno, goto out; } + if (frame && frame->root) { + pid = frame->root->pid; + } + if (pid == GF_CLIENT_PID_GSYNCD && is_getxattr) { filter_xattr = _gf_false; @@ -219,14 +213,11 @@ posix_xattr_ignorable(char *key) static int _posix_xattr_get_set_from_backend(posix_xattr_filler_t *filler, char *key) { - ssize_t xattr_size = -1; - int ret = 0; + ssize_t xattr_size = 256; /* guesstimated initial size of xattr */ + int ret = -1; char *value = NULL; - char val_buf[256] = {0}; - gf_boolean_t have_val = _gf_false; if (!gf_is_valid_xattr_namespace(key)) { - ret = -1; goto out; } @@ -235,46 +226,54 @@ _posix_xattr_get_set_from_backend(posix_xattr_filler_t *filler, char *key) * of getxattr with NULL buf to find the length and then getxattr with * allocated buf to fill the data. This way we reduce lot of getxattrs. */ - if (filler->real_path) - xattr_size = sys_lgetxattr(filler->real_path, key, val_buf, - sizeof(val_buf) - 1); - else - xattr_size = sys_fgetxattr(filler->fdnum, key, val_buf, - sizeof(val_buf) - 1); - if (xattr_size >= 0) { - have_val = _gf_true; - } else if (xattr_size == -1 && errno != ERANGE) { - ret = -1; + value = GF_MALLOC(xattr_size + 1, gf_posix_mt_char); + if (!value) { goto out; } - if (have_val) { - /*No need to do getxattr*/ - } else if (filler->real_path) { - xattr_size = sys_lgetxattr(filler->real_path, key, NULL, 0); - } else { - xattr_size = sys_fgetxattr(filler->fdnum, key, NULL, 0); - } + if (filler->real_path) + xattr_size = sys_lgetxattr(filler->real_path, key, value, xattr_size); + else + xattr_size = sys_fgetxattr(filler->fdnum, key, value, xattr_size); + + if (xattr_size == -1) { + if (value) { + GF_FREE(value); + value = NULL; + } + /* xattr_size == -1 - failed to fetch the xattr with + * current settings. + * If it was not because value was too small, abort + */ + if (errno != ERANGE) { + goto out; + } + + /* Get the real length needed */ + if (filler->real_path) { + xattr_size = sys_lgetxattr(filler->real_path, key, NULL, 0); + } else { + xattr_size = sys_fgetxattr(filler->fdnum, key, NULL, 0); + } + if (xattr_size == -1) { + goto out; + } - if (xattr_size != -1) { value = GF_MALLOC(xattr_size + 1, gf_posix_mt_char); - if (!value) + if (!value) { goto out; + } - if (have_val) { - memcpy(value, val_buf, xattr_size); + if (filler->real_path) { + xattr_size = sys_lgetxattr(filler->real_path, key, value, + xattr_size); } else { - bzero(value, xattr_size + 1); - if (filler->real_path) { - xattr_size = sys_lgetxattr(filler->real_path, key, value, - xattr_size); - } else { - xattr_size = sys_fgetxattr(filler->fdnum, key, value, - xattr_size); - } + xattr_size = sys_fgetxattr(filler->fdnum, key, value, xattr_size); } if (xattr_size == -1) { + GF_FREE(value); + value = NULL; if (filler->real_path) gf_msg(filler->this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_FAILED, "getxattr failed. path: %s, key: %s", @@ -283,24 +282,25 @@ _posix_xattr_get_set_from_backend(posix_xattr_filler_t *filler, char *key) gf_msg(filler->this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_FAILED, "getxattr failed. gfid: %s, key: %s", uuid_utoa(filler->fd->inode->gfid), key); - GF_FREE(value); goto out; } + } - value[xattr_size] = '\0'; - ret = dict_set_bin(filler->xattr, key, value, xattr_size); - if (ret < 0) { - if (filler->real_path) - gf_msg_debug(filler->this->name, 0, - "dict set failed. path: %s, key: %s", - filler->real_path, key); - else - gf_msg_debug(filler->this->name, 0, - "dict set failed. gfid: %s, key: %s", - uuid_utoa(filler->fd->inode->gfid), key); + value[xattr_size] = '\0'; + ret = dict_set_bin(filler->xattr, key, value, xattr_size); + + if (ret < 0) { + if (value) GF_FREE(value); - goto out; - } + if (filler->real_path) + gf_msg_debug(filler->this->name, 0, + "dict set failed. path: %s, key: %s", + filler->real_path, key); + else + gf_msg_debug(filler->this->name, 0, + "dict set failed. gfid: %s, key: %s", + uuid_utoa(filler->fd->inode->gfid), key); + goto out; } ret = 0; out: @@ -365,11 +365,10 @@ _posix_get_marker_all_contributions(posix_xattr_filler_t *filler) list_offset = 0; while (remaining_size > 0) { - strcpy(key, list + list_offset); + len = snprintf(key, sizeof(key), "%s", list + list_offset); if (fnmatch(marker_contri_key, key, 0) == 0) { - ret = _posix_xattr_get_set_from_backend(filler, key); + (void)_posix_xattr_get_set_from_backend(filler, key); } - len = strlen(key); remaining_size -= (len + 1); list_offset += (len + 1); } @@ -388,6 +387,9 @@ _posix_get_marker_quota_contributions(posix_xattr_filler_t *filler, char *key) int i = 0, ret = 0; tmp_key = ptr = gf_strdup(key); + if (tmp_key == NULL) { + return -1; + } for (i = 0; i < 4; i++) { token = strtok_r(tmp_key, ".", &saveptr); tmp_key = NULL; @@ -421,7 +423,7 @@ _posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, { posix_xattr_filler_t *filler = xattrargs; int ret = -1; - int len; + int len = 0; char *databuf = NULL; int _fd = -1; ssize_t req_size = 0; @@ -429,11 +431,18 @@ _posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, ssize_t remaining_size = 0; char *xattr = NULL; inode_t *inode = NULL; + char *value = NULL; + struct iatt stbuf = { + 0, + }; if (posix_xattr_ignorable(key)) goto out; + + len = strlen(key); /* should size be put into the data_t ? */ - if (!strcmp(key, GF_CONTENT_KEY) && IA_ISREG(filler->stbuf->ia_type)) { + if ((filler->stbuf != NULL && IA_ISREG(filler->stbuf->ia_type)) && + (len == SLEN(GF_CONTENT_KEY) && !strcmp(key, GF_CONTENT_KEY))) { if (!filler->real_path) goto out; @@ -498,7 +507,8 @@ _posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, sys_close(_fd); GF_FREE(databuf); } - } else if (!strcmp(key, GLUSTERFS_OPEN_FD_COUNT)) { + } else if (len == SLEN(GLUSTERFS_OPEN_FD_COUNT) && + !strcmp(key, GLUSTERFS_OPEN_FD_COUNT)) { inode = _get_filler_inode(filler); if (!inode || gf_uuid_is_null(inode->gfid)) goto out; @@ -507,7 +517,8 @@ _posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, gf_msg(filler->this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, "Failed to set dictionary value for %s", key); } - } else if (!strcmp(key, GLUSTERFS_ACTIVE_FD_COUNT)) { + } else if (len == SLEN(GLUSTERFS_ACTIVE_FD_COUNT) && + !strcmp(key, GLUSTERFS_ACTIVE_FD_COUNT)) { inode = _get_filler_inode(filler); if (!inode || gf_uuid_is_null(inode->gfid)) goto out; @@ -516,7 +527,8 @@ _posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, gf_msg(filler->this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, "Failed to set dictionary value for %s", key); } - } else if (!strcmp(key, GET_ANCESTRY_PATH_KEY)) { + } else if (len == SLEN(GET_ANCESTRY_PATH_KEY) && + !strcmp(key, GET_ANCESTRY_PATH_KEY)) { /* As of now, the only consumers of POSIX_ANCESTRY_PATH attempt * fetching it via path-based fops. Hence, leaving it as it is * for now. @@ -531,7 +543,7 @@ _posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, goto out; } - ret = dict_set_dynstr(filler->xattr, GET_ANCESTRY_PATH_KEY, path); + ret = dict_set_dynstr_sizen(filler->xattr, GET_ANCESTRY_PATH_KEY, path); if (ret < 0) { GF_FREE(path); goto out; @@ -539,13 +551,55 @@ _posix_xattr_get_set(dict_t *xattr_req, char *key, data_t *data, } else if (fnmatch(marker_contri_key, key, 0) == 0) { ret = _posix_get_marker_quota_contributions(filler, key); - } else if (strcmp(key, GF_REQUEST_LINK_COUNT_XDATA) == 0) { - ret = dict_set(filler->xattr, GF_REQUEST_LINK_COUNT_XDATA, data); - } else if (strcmp(key, GF_GET_SIZE) == 0) { + } else if (len == SLEN(GF_REQUEST_LINK_COUNT_XDATA) && + strcmp(key, GF_REQUEST_LINK_COUNT_XDATA) == 0) { + ret = dict_set_sizen(filler->xattr, GF_REQUEST_LINK_COUNT_XDATA, data); + } else if (len == SLEN(GF_GET_SIZE) && strcmp(key, GF_GET_SIZE) == 0) { if (filler->stbuf && IA_ISREG(filler->stbuf->ia_type)) { ret = dict_set_uint64(filler->xattr, GF_GET_SIZE, filler->stbuf->ia_size); } + } else if (GF_POSIX_ACL_REQUEST(key)) { + if (filler->real_path) + ret = posix_pstat(filler->this, NULL, NULL, filler->real_path, + &stbuf, _gf_false); + else + ret = posix_fdstat(filler->this, filler->fd->inode, filler->fdnum, + &stbuf); + if (ret < 0) { + gf_msg(filler->this->name, GF_LOG_ERROR, errno, + P_MSG_XDATA_GETXATTR, "lstat on %s failed", + filler->real_path ?: uuid_utoa(filler->fd->inode->gfid)); + goto out; + } + + /* Avoid link follow in virt_pacl_get, donot fill acl for symlink.*/ + if (IA_ISLNK(stbuf.ia_type)) + goto out; + + /* ACL_TYPE_DEFAULT is not supported for non-directory, skip */ + if (!IA_ISDIR(stbuf.ia_type) && + !strncmp(key, GF_POSIX_ACL_DEFAULT, SLEN(GF_POSIX_ACL_DEFAULT))) + goto out; + + ret = posix_pacl_get(filler->real_path, filler->fdnum, key, &value); + if (ret || !value) { + gf_msg(filler->this->name, GF_LOG_ERROR, errno, + P_MSG_XDATA_GETXATTR, "could not get acl (%s) for %s, %d", + key, filler->real_path ?: uuid_utoa(filler->fd->inode->gfid), + ret); + goto out; + } + + ret = dict_set_dynstrn(filler->xattr, (char *)key, len, value); + if (ret < 0) { + GF_FREE(value); + gf_msg(filler->this->name, GF_LOG_ERROR, errno, + P_MSG_XDATA_GETXATTR, + "could not set acl (%s) for %s in dictionary", key, + filler->real_path ?: uuid_utoa(filler->fd->inode->gfid)); + goto out; + } } else { remaining_size = filler->list_size; while (remaining_size > 0) { @@ -770,6 +824,11 @@ posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *path, gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED, "lstat failed on %s", path); errno = op_errno; /*gf_msg could have changed errno*/ + } else { + op_errno = errno; + gf_msg_debug(this->name, 0, "lstat failed on %s (%s)", path, + strerror(errno)); + errno = op_errno; /*gf_msg could have changed errno*/ } goto out; } @@ -785,17 +844,26 @@ posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *path, iatt_from_stat(&stbuf, &lstatbuf); - if (inode && priv->ctime) { - if (!inode_locked) { - ret = posix_get_mdata_xattr(this, path, -1, inode, &stbuf); + if (priv->ctime) { + if (inode) { + if (!inode_locked) { + ret = posix_get_mdata_xattr(this, path, -1, inode, &stbuf); + } else { + ret = __posix_get_mdata_xattr(this, path, -1, inode, &stbuf); + } + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, + "posix get mdata failed on gfid: %s", + uuid_utoa(inode->gfid)); + goto out; + } } else { - ret = __posix_get_mdata_xattr(this, path, -1, inode, &stbuf); - } - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, - "posix get mdata failed on gfid: %s", - uuid_utoa(inode->gfid)); - goto out; + ret = __posix_get_mdata_xattr(this, path, -1, NULL, &stbuf); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_GETMDATA_FAILED, + "posix get mdata failed on path: %s", path); + goto out; + } } } @@ -838,18 +906,17 @@ out: } static void -_handle_list_xattr(dict_t *xattr_req, const char *real_path, int fdnum, - posix_xattr_filler_t *filler) +_handle_list_xattr(posix_xattr_filler_t *filler) { int32_t list_offset = 0; ssize_t remaining_size = 0; char *key = NULL; int len; - list_offset = 0; remaining_size = filler->list_size; while (remaining_size > 0) { key = filler->list + list_offset; + len = strlen(key); if (gf_get_index_by_elem(list_xattr_ignore_xattrs, key) >= 0) goto next; @@ -863,12 +930,11 @@ _handle_list_xattr(dict_t *xattr_req, const char *real_path, int fdnum, if (posix_is_gfid2path_xattr(key)) goto next; - if (dict_get(filler->xattr, key)) + if (dict_getn(filler->xattr, key, len)) goto next; (void)_posix_xattr_get_set_from_backend(filler, key); next: - len = strlen(key); remaining_size -= (len + 1); list_offset += (len + 1); @@ -886,8 +952,8 @@ posix_xattr_fill(xlator_t *this, const char *real_path, loc_t *loc, fd_t *fd, }; gf_boolean_t list = _gf_false; - if (dict_get(xattr_req, "list-xattr")) { - dict_del(xattr_req, "list-xattr"); + if (dict_get_sizen(xattr_req, "list-xattr")) { + dict_del_sizen(xattr_req, "list-xattr"); list = _gf_true; } @@ -907,7 +973,7 @@ posix_xattr_fill(xlator_t *this, const char *real_path, loc_t *loc, fd_t *fd, _get_list_xattr(&filler); dict_foreach(xattr_req, _posix_xattr_get_set, &filler); if (list) - _handle_list_xattr(xattr_req, real_path, fdnum, &filler); + _handle_list_xattr(&filler); GF_FREE(filler.list); out: @@ -961,6 +1027,7 @@ posix_gfid_set(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req, if (sys_lstat(path, &stat) != 0) { ret = -1; + *op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, "lstat on %s failed", path); goto out; @@ -1003,23 +1070,41 @@ verify_handle: ret = posix_handle_soft(this, path, loc, uuid_curr, &stat); out: - if (!(*op_errno)) + if (ret && !(*op_errno)) *op_errno = errno; return ret; } #ifdef HAVE_SYS_ACL_H -int -posix_pacl_set(const char *path, const char *key, const char *acl_s) +static int +posix_pacl_set(const char *path, int fdnum, const char *key, const char *acl_s) { int ret = -1; acl_t acl = NULL; acl_type_t type = 0; + if ((!path) && (fdnum < 0)) { + errno = -EINVAL; + return -1; + } + type = gf_posix_acl_get_type(key); + if (!type) + return -1; acl = acl_from_text(acl_s); - ret = acl_set_file(path, type, acl); + if (!acl) + return -1; + + if (path) + ret = acl_set_file(path, type, acl); + else if (type == ACL_TYPE_ACCESS) + ret = acl_set_fd(fdnum, acl); + else { + errno = -EINVAL; + return -1; + } + if (ret) /* posix_handle_pair expects ret to be the errno */ ret = -errno; @@ -1030,18 +1115,31 @@ posix_pacl_set(const char *path, const char *key, const char *acl_s) } int -posix_pacl_get(const char *path, const char *key, char **acl_s) +posix_pacl_get(const char *path, int fdnum, const char *key, char **acl_s) { int ret = -1; acl_t acl = NULL; acl_type_t type = 0; char *acl_tmp = NULL; + if ((!path) && (fdnum < 0)) { + errno = -EINVAL; + return -1; + } + type = gf_posix_acl_get_type(key); if (!type) return -1; - acl = acl_get_file(path, type); + if (path) + acl = acl_get_file(path, type); + else if (type == ACL_TYPE_ACCESS) + acl = acl_get_fd(fdnum); + else { + errno = -EINVAL; + return -1; + } + if (!acl) return -1; @@ -1066,14 +1164,14 @@ free_acl: } #else /* !HAVE_SYS_ACL_H (NetBSD) */ int -posix_pacl_set(const char *path, const char *key, const char *acl_s) +posix_pacl_set(const char *path, int fdnum, const char *key, const char *acl_s) { errno = ENOTSUP; return -1; } int -posix_pacl_get(const char *path, const char *key, char **acl_s) +posix_pacl_get(const char *path, int fdnum, const char *key, char **acl_s) { errno = ENOTSUP; return -1; @@ -1101,11 +1199,15 @@ posix_dump_buffer(xlator_t *this, const char *real_path, const char *key, #endif int -posix_handle_pair(xlator_t *this, const char *real_path, char *key, +posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key, data_t *value, int flags, struct iatt *stbuf) { int sys_ret = -1; int ret = 0; + int op_errno = 0; + struct mdata_iatt mdata_iatt = { + 0, + }; #ifdef GF_DARWIN_HOST_OS const int error_code = EINVAL; #else @@ -1121,14 +1223,32 @@ posix_handle_pair(xlator_t *this, const char *real_path, char *key, } else if (GF_POSIX_ACL_REQUEST(key)) { if (stbuf && IS_DHT_LINKFILE_MODE(stbuf)) goto out; - ret = posix_pacl_set(real_path, key, value->data); - } else if (!strncmp(key, POSIX_ACL_ACCESS_XATTR, strlen(key)) && stbuf && - IS_DHT_LINKFILE_MODE(stbuf)) { + ret = posix_pacl_set(real_path, -1, key, value->data); + } else if (!strncmp(key, POSIX_ACL_ACCESS_XATTR, + SLEN(POSIX_ACL_ACCESS_XATTR)) && + stbuf && IS_DHT_LINKFILE_MODE(stbuf)) { goto out; - } else if (!strncmp(key, GF_INTERNAL_CTX_KEY, strlen(key))) { + } else if (!strncmp(key, GF_INTERNAL_CTX_KEY, SLEN(GF_INTERNAL_CTX_KEY))) { /* ignore this key value pair */ ret = 0; goto out; + } else if (!strncmp(key, GF_XATTR_MDATA_KEY, strlen(key))) { + /* This is either by rebalance or self heal. Create the xattr if it's + * not present. Compare and update the larger value if the xattr is + * already present. + */ + if (loc == NULL) { + ret = -EINVAL; + goto out; + } + posix_mdata_iatt_from_disk(&mdata_iatt, + (posix_mdata_disk_t *)value->data); + ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path, + &mdata_iatt, &op_errno); + if (ret != 0) { + ret = -op_errno; + } + goto out; } else { sys_ret = sys_lsetxattr(real_path, key, value->data, value->len, flags); #ifdef GF_DARWIN_HOST_OS @@ -1175,8 +1295,9 @@ posix_fhandle_pair(call_frame_t *frame, xlator_t *this, int fd, char *key, } else if (posix_is_gfid2path_xattr(key)) { ret = -ENOTSUP; goto out; - } else if (!strncmp(key, POSIX_ACL_ACCESS_XATTR, strlen(key)) && stbuf && - IS_DHT_LINKFILE_MODE(stbuf)) { + } else if (!strncmp(key, POSIX_ACL_ACCESS_XATTR, + SLEN(POSIX_ACL_ACCESS_XATTR)) && + stbuf && IS_DHT_LINKFILE_MODE(stbuf)) { goto out; } @@ -1235,7 +1356,7 @@ del_stale_dir_handle(xlator_t *this, uuid_t gfid) 0, }; - MAKE_HANDLE_GFID_PATH(hpath, this, gfid, NULL); + MAKE_HANDLE_GFID_PATH(hpath, this, gfid); /* check that it is valid directory handle */ size = sys_lstat(hpath, &stbuf); @@ -1333,131 +1454,255 @@ janitor_walker(const char *fpath, const struct stat *sb, int typeflag, return 0; /* 0 = FTW_CONTINUE */ } -static struct posix_fd * -janitor_get_next_fd(xlator_t *this) +void +__posix_janitor_timer_start(xlator_t *this); + +static int +posix_janitor_task_done(int ret, call_frame_t *frame, void *data) { + xlator_t *this = NULL; struct posix_private *priv = NULL; - struct posix_fd *pfd = NULL; - - struct timespec timeout; + this = data; priv = this->private; - pthread_mutex_lock(&priv->janitor_lock); + pthread_mutex_lock(&priv->janitor_mutex); { - if (list_empty(&priv->janitor_fds)) { - time(&timeout.tv_sec); - timeout.tv_sec += priv->janitor_sleep_duration; - timeout.tv_nsec = 0; - - pthread_cond_timedwait(&priv->janitor_cond, &priv->janitor_lock, - &timeout); - goto unlock; + if (priv->janitor_task_stop) { + priv->janitor_task_stop = _gf_false; + pthread_cond_signal(&priv->janitor_cond); + pthread_mutex_unlock(&priv->janitor_mutex); + goto out; } + } + pthread_mutex_unlock(&priv->janitor_mutex); - pfd = list_entry(priv->janitor_fds.next, struct posix_fd, list); - - list_del(priv->janitor_fds.next); + LOCK(&priv->lock); + { + __posix_janitor_timer_start(this); } -unlock: - pthread_mutex_unlock(&priv->janitor_lock); + UNLOCK(&priv->lock); - return pfd; +out: + return 0; } -static void * -posix_janitor_thread_proc(void *data) +static int +posix_janitor_task(void *data) { xlator_t *this = NULL; struct posix_private *priv = NULL; - struct posix_fd *pfd; + xlator_t *old_this = NULL; time_t now; this = data; priv = this->private; - + /* We need THIS to be set for janitor_walker */ + old_this = THIS; THIS = this; - while (1) { - time(&now); - if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) { - if (priv->disable_landfill_purge) { - gf_msg_debug(this->name, 0, - "Janitor would have " - "cleaned out %s, but purge" - "is disabled.", - priv->trash_path); - } else { - gf_msg_trace(this->name, 0, "janitor cleaning out %s", - priv->trash_path); + if (!priv) + goto out; - nftw(priv->trash_path, janitor_walker, 32, - FTW_DEPTH | FTW_PHYS); - } - priv->last_landfill_check = now; + now = gf_time(); + if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) { + if (priv->disable_landfill_purge) { + gf_msg_debug(this->name, 0, + "Janitor would have " + "cleaned out %s, but purge" + "is disabled.", + priv->trash_path); + } else { + gf_msg_trace(this->name, 0, "janitor cleaning out %s", + priv->trash_path); + + nftw(priv->trash_path, janitor_walker, 32, FTW_DEPTH | FTW_PHYS); } + priv->last_landfill_check = now; + } - pfd = janitor_get_next_fd(this); - if (pfd) { - if (pfd->dir == NULL) { - gf_msg_trace(this->name, 0, "janitor: closing file fd=%d", - pfd->fd); - sys_close(pfd->fd); - } else { - gf_msg_debug(this->name, 0, - "janitor: closing" - " dir fd=%p", - pfd->dir); - sys_closedir(pfd->dir); - } + THIS = old_this; - GF_FREE(pfd); - } +out: + return 0; +} + +static void +posix_janitor_task_initator(struct gf_tw_timer_list *timer, void *data, + unsigned long calltime) +{ + xlator_t *this = NULL; + int ret = 0; + + this = data; + + ret = synctask_new(this->ctx->env, posix_janitor_task, + posix_janitor_task_done, NULL, this); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, + "spawning janitor " + "thread failed"); } - return NULL; + return; } void -posix_spawn_janitor_thread(xlator_t *this) +__posix_janitor_timer_start(xlator_t *this) { struct posix_private *priv = NULL; - int ret = 0; + struct gf_tw_timer_list *timer = NULL; + + priv = this->private; + timer = priv->janitor; + + INIT_LIST_HEAD(&timer->entry); + timer->expires = priv->janitor_sleep_duration; + timer->function = posix_janitor_task_initator; + timer->data = this; + gf_tw_add_timer(glusterfs_ctx_tw_get(this->ctx), timer); + + return; +} + +void +posix_janitor_timer_start(xlator_t *this) +{ + struct posix_private *priv = NULL; + struct gf_tw_timer_list *timer = NULL; priv = this->private; LOCK(&priv->lock); { - if (!priv->janitor_present) { - ret = gf_thread_create(&priv->janitor, NULL, - posix_janitor_thread_proc, this, "posixjan"); - - if (ret < 0) { - gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, - "spawning janitor " - "thread failed"); + if (!priv->janitor) { + timer = GF_CALLOC(1, sizeof(struct gf_tw_timer_list), + gf_common_mt_tw_timer_list); + if (!timer) { goto unlock; } - - priv->janitor_present = _gf_true; + priv->janitor = timer; + __posix_janitor_timer_start(this); } } unlock: UNLOCK(&priv->lock); + + return; +} + +static struct posix_fd * +janitor_get_next_fd(glusterfs_ctx_t *ctx) +{ + struct posix_fd *pfd = NULL; + + while (list_empty(&ctx->janitor_fds)) { + if (ctx->pxl_count == 0) { + return NULL; + } + + pthread_cond_wait(&ctx->fd_cond, &ctx->fd_lock); + } + + pfd = list_first_entry(&ctx->janitor_fds, struct posix_fd, list); + list_del_init(&pfd->list); + + return pfd; +} + +static void +posix_close_pfd(xlator_t *xl, struct posix_fd *pfd) +{ + THIS = xl; + + if (pfd->dir == NULL) { + gf_msg_trace(xl->name, 0, "janitor: closing file fd=%d", pfd->fd); + sys_close(pfd->fd); + } else { + gf_msg_debug(xl->name, 0, "janitor: closing dir fd=%p", pfd->dir); + sys_closedir(pfd->dir); + } + + GF_FREE(pfd); +} + +static void * +posix_ctx_janitor_thread_proc(void *data) +{ + xlator_t *xl; + struct posix_fd *pfd; + glusterfs_ctx_t *ctx = NULL; + struct posix_private *priv_fd; + + ctx = data; + + pthread_mutex_lock(&ctx->fd_lock); + + while ((pfd = janitor_get_next_fd(ctx)) != NULL) { + pthread_mutex_unlock(&ctx->fd_lock); + + xl = pfd->xl; + posix_close_pfd(xl, pfd); + + pthread_mutex_lock(&ctx->fd_lock); + + priv_fd = xl->private; + priv_fd->rel_fdcount--; + if (!priv_fd->rel_fdcount) + pthread_cond_signal(&priv_fd->fd_cond); + } + + pthread_mutex_unlock(&ctx->fd_lock); + + return NULL; +} + +int +posix_spawn_ctx_janitor_thread(xlator_t *this) +{ + int ret = 0; + glusterfs_ctx_t *ctx = NULL; + + ctx = this->ctx; + + pthread_mutex_lock(&ctx->fd_lock); + { + if (ctx->pxl_count++ == 0) { + ret = gf_thread_create(&ctx->janitor, NULL, + posix_ctx_janitor_thread_proc, ctx, + "posixctxjan"); + + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_THREAD_FAILED, + "spawning janitor thread failed"); + ctx->pxl_count--; + } + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + return ret; } static int -is_fresh_file(int64_t ctime_sec) +is_fresh_file(struct timespec *ts) { - struct timeval tv; + struct timespec now; + int64_t elapsed; - gettimeofday(&tv, NULL); + timespec_now_realtime(&now); + elapsed = (int64_t)gf_tsdiff(ts, &now); - if ((ctime_sec >= (tv.tv_sec - 1)) && (ctime_sec <= tv.tv_sec)) - return 1; + if (elapsed < 0) { + /* The file has been modified in the future !!! + * Is it fresh ? previous implementation considered this as a + * non-fresh file, so maintaining the same behavior. */ + return 0; + } - return 0; + /* If the file is newer than a second, we consider it fresh. */ + return elapsed < 1000000; } int @@ -1518,7 +1763,11 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req) } ret = sys_lgetxattr(path, GFID_XATTR_KEY, uuid_curr, 16); if (ret != 16) { - if (is_fresh_file(stbuf.ia_ctime)) { + /* TODO: This is a very hacky way of doing this, and very prone to + * errors and unexpected behavior. This should be changed. */ + struct timespec ts = {.tv_sec = stbuf.ia_ctime, + .tv_nsec = stbuf.ia_ctime_nsec}; + if (is_fresh_file(&ts)) { gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE, "Fresh file: %s", path); return -ENOENT; @@ -1530,7 +1779,9 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req) } ret = sys_lgetxattr(path, GFID_XATTR_KEY, uuid_curr, 16); if (ret != 16) { - if (is_fresh_file(stat.st_ctime)) { + /* TODO: This is a very hacky way of doing this, and very prone to + * errors and unexpected behavior. This should be changed. */ + if (is_fresh_file(&stat.st_ctim)) { gf_msg(this->name, GF_LOG_ERROR, ENOENT, P_MSG_FRESHFILE, "Fresh file: %s", path); return -ENOENT; @@ -1538,7 +1789,7 @@ posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req) } } - posix_gfid_set(this, path, loc, xattr_req, GF_CLIENT_PID_MAX, &ret); + (void)posix_gfid_set(this, path, loc, xattr_req, GF_CLIENT_PID_MAX, &ret); return 0; } @@ -1601,8 +1852,8 @@ _handle_entry_create_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) return 0; } - ret = posix_handle_pair(filler->this, filler->real_path, k, v, XATTR_CREATE, - filler->stbuf); + ret = posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v, + XATTR_CREATE, filler->stbuf); if (ret < 0) { errno = -ret; return -1; @@ -1611,7 +1862,8 @@ _handle_entry_create_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) } int -posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict) +posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path, + dict_t *dict) { int ret = -1; @@ -1625,6 +1877,7 @@ posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict) filler.this = this; filler.real_path = path; filler.stbuf = NULL; + filler.loc = loc; ret = dict_foreach(dict, _handle_entry_create_keyvalue_pair, &filler); @@ -1657,8 +1910,8 @@ __posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd_p, if (!fd_is_anonymous(fd)) { gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_READ_FAILED, "Failed to get fd context for a non-anonymous fd, " - "file: %s, gfid: %s", - real_path, uuid_utoa(fd->inode->gfid)); + "gfid: %s", + uuid_utoa(fd->inode->gfid)); op_errno = EINVAL; goto out; } @@ -1756,13 +2009,12 @@ posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd, int *op_errno) return ret; } -int -posix_fs_health_check(xlator_t *this) +static int +posix_fs_health_check(xlator_t *this, char *file_path) { struct posix_private *priv = NULL; int ret = -1; - char *subvol_path = NULL; - char timestamp[256] = { + char timestamp[GF_TIMESTR_SIZE] = { 0, }; int fd = -1; @@ -1771,25 +2023,15 @@ posix_fs_health_check(xlator_t *this) 0, }; char buff[256] = {0}; - char file_path[PATH_MAX] = {0}; char *op = NULL; int op_errno = 0; - int cnt = 0; + int cnt; int timeout = 0; struct aiocb aiocb; - GF_VALIDATE_OR_GOTO(this->name, this, out); priv = this->private; - GF_VALIDATE_OR_GOTO("posix-helpers", priv, out); - subvol_path = priv->base_path; timeout = priv->health_check_timeout; - snprintf(file_path, sizeof(file_path) - 1, "%s/%s/health_check", - subvol_path, GF_HIDDEN_PATH); - - time_sec = time(NULL); - gf_time_fmt(timestamp, sizeof timestamp, time_sec, gf_timefmt_FT); - timelen = strlen(timestamp); fd = open(file_path, O_CREAT | O_WRONLY | O_TRUNC, 0644); if (fd == -1) { @@ -1797,6 +2039,11 @@ posix_fs_health_check(xlator_t *this) op = "open_for_write"; goto out; } + + time_sec = gf_time(); + gf_time_fmt(timestamp, sizeof timestamp, time_sec, gf_timefmt_FT); + timelen = strlen(timestamp); + memset(&aiocb, 0, sizeof(struct aiocb)); aiocb.aio_fildes = fd; aiocb.aio_buf = timestamp; @@ -1808,6 +2055,7 @@ posix_fs_health_check(xlator_t *this) goto out; } + cnt = 0; /* Wait until write completion */ while ((aio_error(&aiocb) == EINPROGRESS) && (++cnt <= timeout)) sleep(1); @@ -1816,7 +2064,6 @@ posix_fs_health_check(xlator_t *this) if (ret != 0) { op_errno = errno; op = "aio_write_error"; - ret = -1; goto out; } @@ -1855,7 +2102,6 @@ posix_fs_health_check(xlator_t *this) if (ret != 0) { op_errno = errno; op = "aio_read_error"; - ret = -1; goto out; } @@ -1878,13 +2124,20 @@ out: if (fd != -1) { sys_close(fd); } + if (ret && file_path[0]) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_HEALTHCHECK_FAILED, - "%s() on %s returned", op, file_path); - gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED, - "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op, - file_path, strerror(op_errno), priv->hostname, priv->base_path, - timeout); + "%s() on %s returned ret is %d error is %s", op, file_path, ret, + ret != -1 ? strerror(ret) : strerror(op_errno)); + + if ((op_errno == EAGAIN) || (ret == EAGAIN)) { + ret = 0; + } else { + gf_event(EVENT_POSIX_HEALTH_CHECK_FAILED, + "op=%s;path=%s;error=%s;brick=%s:%s timeout is %d", op, + file_path, strerror(op_errno), priv->hostname, + priv->base_path, timeout); + } } return ret; } @@ -1892,30 +2145,30 @@ out: static void * posix_health_check_thread_proc(void *data) { - xlator_t *this = NULL; - struct posix_private *priv = NULL; - uint32_t interval = 0; + xlator_t *this = data; + struct posix_private *priv = this->private; + uint32_t interval = priv->health_check_interval; int ret = -1; xlator_t *top = NULL; xlator_t *victim = NULL; xlator_list_t **trav_p = NULL; int count = 0; gf_boolean_t victim_found = _gf_false; - glusterfs_ctx_t *ctx = NULL; - - this = data; - priv = this->private; - ctx = THIS->ctx; + glusterfs_ctx_t *ctx = THIS->ctx; + char file_path[PATH_MAX]; /* prevent races when the interval is updated */ - interval = priv->health_check_interval; if (interval == 0) goto out; + snprintf(file_path, sizeof(file_path) - 1, "%s/%s/health_check", + priv->base_path, GF_HIDDEN_PATH); + gf_msg_debug(this->name, 0, "health-check thread started, " + "on path %s, " "interval = %d seconds", - interval); + file_path, interval); while (1) { /* aborting sleep() is a request to exit this thread, sleep() * will normally not return when cancelled */ @@ -1926,7 +2179,7 @@ posix_health_check_thread_proc(void *data) pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); /* Do the health-check.*/ - ret = posix_fs_health_check(this); + ret = posix_fs_health_check(this, file_path); if (ret < 0 && priv->health_check_active) goto abort; if (!priv->health_check_active) @@ -1986,7 +2239,7 @@ abort: if (ret == 0) kill(getpid(), SIGKILL); - } else { + } else if (top) { LOCK(&ctx->volfile_lock); for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { victim = (*trav_p)->xlator; @@ -2010,7 +2263,7 @@ abort: return NULL; } -void +int posix_spawn_health_check_thread(xlator_t *xl) { struct posix_private *priv = NULL; @@ -2032,7 +2285,7 @@ posix_spawn_health_check_thread(xlator_t *xl) ret = gf_thread_create(&priv->health_check, NULL, posix_health_check_thread_proc, xl, "posixhc"); - if (ret < 0) { + if (ret) { priv->health_check_interval = 0; priv->health_check_active = _gf_false; gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_HEALTHCHECK_FAILED, @@ -2040,12 +2293,11 @@ posix_spawn_health_check_thread(xlator_t *xl) goto unlock; } - /* run the thread detached, resources will be freed on exit */ - pthread_detach(priv->health_check); priv->health_check_active = _gf_true; } unlock: UNLOCK(&priv->lock); + return ret; } void @@ -2054,17 +2306,17 @@ posix_disk_space_check(xlator_t *this) struct posix_private *priv = NULL; char *subvol_path = NULL; int op_ret = 0; - int percent = 0; + double size = 0; + double percent = 0; struct statvfs buf = {0}; - uint64_t totsz = 0; - uint64_t freesz = 0; + double totsz = 0; + double freesz = 0; - GF_VALIDATE_OR_GOTO(this->name, this, out); + GF_VALIDATE_OR_GOTO("posix-helpers", this, out); priv = this->private; GF_VALIDATE_OR_GOTO(this->name, priv, out); subvol_path = priv->base_path; - percent = priv->disk_reserve; op_ret = sys_statvfs(subvol_path, &buf); @@ -2073,10 +2325,17 @@ posix_disk_space_check(xlator_t *this) "statvfs failed on %s", subvol_path); goto out; } - totsz = (buf.f_blocks * buf.f_bsize); - freesz = (buf.f_bfree * buf.f_bsize); - if (freesz <= ((totsz * percent) / 100)) { + if (priv->disk_unit == 'p') { + percent = priv->disk_reserve; + totsz = (buf.f_blocks * buf.f_bsize); + size = ((totsz * percent) / 100); + } else { + size = priv->disk_reserve; + } + + freesz = (buf.f_bfree * buf.f_bsize); + if (freesz <= size) { priv->disk_space_full = 1; } else { priv->disk_space_full = 0; @@ -2128,7 +2387,7 @@ out: return NULL; } -void +int posix_spawn_disk_space_check_thread(xlator_t *xl) { struct posix_private *priv = NULL; @@ -2144,10 +2403,10 @@ posix_spawn_disk_space_check_thread(xlator_t *xl) priv->disk_space_check_active = _gf_false; } - ret = gf_thread_create_detached(&priv->disk_space_check, - posix_disk_space_check_thread_proc, xl, - "posix_reserve"); - if (ret < 0) { + ret = gf_thread_create(&priv->disk_space_check, NULL, + posix_disk_space_check_thread_proc, xl, + "posixrsv"); + if (ret) { priv->disk_space_check_active = _gf_false; gf_msg(xl->name, GF_LOG_ERROR, errno, P_MSG_DISK_SPACE_CHECK_FAILED, "unable to setup disk space check thread"); @@ -2158,6 +2417,7 @@ posix_spawn_disk_space_check_thread(xlator_t *xl) } unlock: UNLOCK(&priv->lock); + return ret; } int @@ -2197,7 +2457,7 @@ posix_fsyncer_process(xlator_t *this, call_stub_t *stub, gf_boolean_t do_fsync) return; } - if (do_fsync) { + if (do_fsync && pfd) { if (stub->args.datasync) ret = sys_fdatasync(pfd->fd); else @@ -2225,23 +2485,8 @@ posix_fsyncer_syncfs(xlator_t *this, struct list_head *head) stub = list_entry(head->prev, call_stub_t, list); ret = posix_fd_ctx_get(stub->args.fd, this, &pfd, NULL); - if (ret) - return; - -#ifdef GF_LINUX_HOST_OS - /* syncfs() is not "declared" in RHEL's glibc even though - the kernel has support. - */ -#include <sys/syscall.h> -#include <unistd.h> -#ifdef SYS_syncfs - syscall(SYS_syncfs, pfd->fd); -#else - sync(); -#endif -#else - sync(); -#endif + if (!ret) + (void)gf_syncfs(pfd->fd); } void * @@ -2262,7 +2507,7 @@ posix_fsyncer(void *d) count = posix_fsyncer_pick(this, &list); - usleep(priv->batch_fsync_delay_usec); + gf_nanosleep(priv->batch_fsync_delay_usec * GF_US_IN_NS); gf_msg_debug(this->name, 0, "picked %d fsyncs", count); @@ -2534,10 +2779,10 @@ posix_resolve_dirgfid_to_path(const uuid_t dirgfid, const char *brick_path, linkname[len] = '\0'; - pgfidstr = strtok_r(linkname + strlen("../../00/00/"), "/", &saveptr); + pgfidstr = strtok_r(linkname + SLEN("../../00/00/"), "/", &saveptr); dir_name = strtok_r(NULL, "/", &saveptr); - if (strlen(pre_dir_name) != 0) { /* Remove '/' at the end */ + if (pre_dir_name[0] != '\0') { /* Remove '/' at the end */ len = snprintf(result, PATH_MAX, "%s/%s", dir_name, pre_dir_name); } else { len = snprintf(result, PATH_MAX, "%s", dir_name); @@ -2582,7 +2827,7 @@ __posix_inode_ctx_get(inode_t *inode, xlator_t *this) ret = __inode_ctx_get(inode, this, &ctx_uint); if (ret == 0) { - return (posix_inode_ctx_t *)ctx_uint; + return (posix_inode_ctx_t *)(uintptr_t)ctx_uint; } ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_posix_mt_inode_ctx_t); @@ -2593,7 +2838,8 @@ __posix_inode_ctx_get(inode_t *inode, xlator_t *this) pthread_mutex_init(&ctx_p->write_atomic_lock, NULL); pthread_mutex_init(&ctx_p->pgfid_lock, NULL); - ret = __inode_ctx_set(inode, this, (uint64_t *)&ctx_p); + ctx_uint = (uint64_t)(uintptr_t)ctx_p; + ret = __inode_ctx_set(inode, this, &ctx_uint); if (ret < 0) { pthread_mutex_destroy(&ctx_p->xattrop_lock); pthread_mutex_destroy(&ctx_p->write_atomic_lock); @@ -2665,7 +2911,7 @@ posix_inode_ctx_get_all(inode_t *inode, xlator_t *this, posix_inode_ctx_t **ctx) gf_boolean_t posix_is_bulk_removexattr(char *name, dict_t *xdata) { - if (name && (strlen(name) == 0) && xdata) + if (name && (name[0] == '\0') && xdata) return _gf_true; return _gf_false; } @@ -2745,7 +2991,7 @@ posix_check_internal_writes(xlator_t *this, fd_t *fd, int sysfd, dict_t *xdata) LOCK(&fd->inode->lock); { - val = dict_get(xdata, GF_PROTECT_FROM_EXTERNAL_WRITES); + val = dict_get_sizen(xdata, GF_PROTECT_FROM_EXTERNAL_WRITES); if (val) { ret = sys_fsetxattr(sysfd, GF_PROTECT_FROM_EXTERNAL_WRITES, val->data, val->len, 0); @@ -2758,7 +3004,7 @@ posix_check_internal_writes(xlator_t *this, fd_t *fd, int sysfd, dict_t *xdata) goto out; } - if (dict_get(xdata, GF_AVOID_OVERWRITE)) { + if (dict_get_sizen(xdata, GF_AVOID_OVERWRITE)) { xattrsize = sys_fgetxattr(sysfd, GF_PROTECT_FROM_EXTERNAL_WRITES, NULL, 0); if ((xattrsize == -1) && @@ -3035,6 +3281,11 @@ posix_cs_set_state(xlator_t *this, dict_t **rsp, gf_cs_obj_state state, char *value = NULL; size_t xattrsize = 0; + if (!rsp) { + ret = -1; + goto out; + } + if (!(*rsp)) { *rsp = dict_new(); if (!(*rsp)) { @@ -3128,14 +3379,20 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, { gf_cs_obj_state state = GF_CS_ERROR; int ret = 0; + gf_boolean_t is_cs_obj_status = _gf_false; + gf_boolean_t is_cs_obj_repair = _gf_false; + + if (dict_get_sizen(xattr_req, GF_CS_OBJECT_STATUS)) + is_cs_obj_status = _gf_true; + if (dict_get_sizen(xattr_req, GF_CS_OBJECT_REPAIR)) + is_cs_obj_repair = _gf_true; - if (!(dict_get(xattr_req, GF_CS_OBJECT_STATUS) || - dict_get(xattr_req, GF_CS_OBJECT_REPAIR))) + if (!(is_cs_obj_status || is_cs_obj_repair)) return 0; if (fd) { LOCK(&fd->inode->lock); - if (dict_get(xattr_req, GF_CS_OBJECT_STATUS)) { + if (is_cs_obj_status) { state = posix_cs_check_status(this, NULL, pfd, buf); gf_msg_debug(this->name, 0, "state : %d", state); ret = posix_cs_set_state(this, xattr_rsp, state, NULL, pfd); @@ -3155,7 +3412,7 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, } } - if (dict_get(xattr_req, GF_CS_OBJECT_REPAIR)) { + if (is_cs_obj_repair) { state = posix_cs_check_status(this, NULL, pfd, buf); gf_msg_debug(this->name, 0, "state : %d", state); @@ -3186,7 +3443,7 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, } LOCK(&loc->inode->lock); - if (dict_get(xattr_req, GF_CS_OBJECT_STATUS)) { + if (is_cs_obj_status) { state = posix_cs_check_status(this, realpath, NULL, buf); gf_msg_debug(this->name, 0, "state : %d", state); ret = posix_cs_set_state(this, xattr_rsp, state, realpath, NULL); @@ -3206,7 +3463,7 @@ posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, } } - if (dict_get(xattr_req, GF_CS_OBJECT_REPAIR)) { + if (is_cs_obj_repair) { state = posix_cs_check_status(this, realpath, NULL, buf); gf_msg_debug(this->name, 0, "state : %d", state); @@ -3240,3 +3497,170 @@ unlock: out: return ret; } + +int +posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno) +{ + int ret = -1; + + if (inode->ia_type == IA_IFBLK || inode->ia_type == IA_IFCHR) { + *op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_INVALID_ARGUMENT, + "%s received on %s file (%s)", fop, + (inode->ia_type == IA_IFBLK) ? "block" : "char", + uuid_utoa(inode->gfid)); + goto out; + } + + ret = 0; + +out: + return ret; +} + +void +posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xattr_req) +{ + int ret = 0; + char val[4096] = { + 0, + }; + + if (!xattr_req) + return; + + if (!dict_get_sizen(xattr_req, GF_CS_OBJECT_STATUS)) + return; + + if (fd != -1) { + ret = sys_fgetxattr(fd, GF_CS_OBJECT_SIZE, &val, sizeof(val)); + if (ret > 0) { + buf->ia_size = atoll(val); + } else { + /* Safe to assume that the other 2 xattrs are also not set*/ + return; + } + ret = sys_fgetxattr(fd, GF_CS_BLOCK_SIZE, &val, sizeof(val)); + if (ret > 0) { + buf->ia_blksize = atoll(val); + } + ret = sys_fgetxattr(fd, GF_CS_NUM_BLOCKS, &val, sizeof(val)); + if (ret > 0) { + buf->ia_blocks = atoll(val); + } + } else { + ret = sys_lgetxattr(loc, GF_CS_OBJECT_SIZE, &val, sizeof(val)); + if (ret > 0) { + buf->ia_size = atoll(val); + } else { + /* Safe to assume that the other 2 xattrs are also not set*/ + return; + } + ret = sys_lgetxattr(loc, GF_CS_BLOCK_SIZE, &val, sizeof(val)); + if (ret > 0) { + buf->ia_blksize = atoll(val); + } + ret = sys_lgetxattr(loc, GF_CS_NUM_BLOCKS, &val, sizeof(val)); + if (ret > 0) { + buf->ia_blocks = atoll(val); + } + } +} + +gf_boolean_t +posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this) +{ + int op_ret = 0; + ssize_t size = 0; + char value_buf[4096] = { + 0, + }; + gf_boolean_t have_val = _gf_false; + data_t *arg_data = NULL; + char *xattr_name = NULL; + size_t xattr_len = 0; + gf_boolean_t is_stale = _gf_false; + + op_ret = dict_get_str_sizen(xdata, GF_PREOP_PARENT_KEY, &xattr_name); + if (xattr_name == NULL) { + op_ret = 0; + return is_stale; + } + + xattr_len = strlen(xattr_name); + arg_data = dict_getn(xdata, xattr_name, xattr_len); + if (!arg_data) { + op_ret = 0; + dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); + return is_stale; + } + + size = sys_lgetxattr(par_path, xattr_name, value_buf, + sizeof(value_buf) - 1); + + if (size >= 0) { + have_val = _gf_true; + } else { + if (errno == ERANGE) { + gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_PREOP_CHECK_FAILED, + "getxattr on key (%s) path (%s) failed due to" + " buffer overflow", + xattr_name, par_path); + size = sys_lgetxattr(par_path, xattr_name, NULL, 0); + } + if (size < 0) { + op_ret = -1; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, + "getxattr on key (%s) failed, path : %s", xattr_name, + par_path); + goto out; + } + } + + if (!have_val) { + size = sys_lgetxattr(par_path, xattr_name, value_buf, size); + if (size < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_PREOP_CHECK_FAILED, + "getxattr on key (%s) failed (%s)", xattr_name, + strerror(errno)); + goto out; + } + } + + if ((arg_data->len != size) || (memcmp(arg_data->data, value_buf, size))) { + gf_msg(this->name, GF_LOG_INFO, EIO, P_MSG_PREOP_CHECK_FAILED, + "failing preop as on-disk xattr value differs from argument " + "value for key %s", + xattr_name); + op_ret = -1; + } + +out: + dict_deln(xdata, xattr_name, xattr_len); + dict_del_sizen(xdata, GF_PREOP_PARENT_KEY); + + if (op_ret == -1) { + is_stale = _gf_true; + } + + return is_stale; +} + +/* Delete user xattr from the file at the file-path specified by data and from + * dict */ +int +posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data) +{ + int ret; + char *real_path = data; + + ret = sys_lremovexattr(real_path, k); + if (ret) { + gf_msg("posix-helpers", GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, errno, + "removexattr failed. key %s path %s", k, real_path); + } + + dict_del(dict, k); + + return ret; +} diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c index 2cb65d7fd0e..6d54d37e5aa 100644 --- a/xlators/storage/posix/src/posix-inode-fd-ops.c +++ b/xlators/storage/posix/src/posix-inode-fd-ops.c @@ -26,7 +26,6 @@ #include <signal.h> #include <sys/uio.h> #include <unistd.h> -#include <ftw.h> #include <regex.h> #ifndef GF_BSD_HOST_OS @@ -37,30 +36,25 @@ #include <fcntl.h> #endif /* HAVE_LINKAT */ -#include "glusterfs.h" -#include "checksum.h" -#include "dict.h" -#include "logging.h" -#include "posix.h" -#include "xlator.h" -#include "defaults.h" -#include "common-utils.h" -#include "compat-errno.h" -#include "compat.h" -#include "byte-order.h" -#include "syscall.h" -#include "statedump.h" -#include "locking.h" -#include "timer.h" +#include <glusterfs/checksum.h> +#include <glusterfs/dict.h> +#include <glusterfs/logging.h> +#include "posix-handle.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/compat.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/syscall.h> +#include <glusterfs/statedump.h> +#include <glusterfs/locking.h> +#include <glusterfs/timer.h> #include "glusterfs3-xdr.h" -#include "hashfn.h" -#include "posix-aio.h" -#include "glusterfs-acl.h" +#include <glusterfs/glusterfs-acl.h> #include "posix-messages.h" #include "posix-metadata.h" -#include "events.h" +#include <glusterfs/events.h> #include "posix-gfid-path.h" -#include "compat-uuid.h" +#include <glusterfs/compat-uuid.h> +#include <glusterfs/common-utils.h> extern char *marker_xattrs[]; #define ALIGN_SIZE 4096 @@ -108,6 +102,61 @@ extern char *marker_xattrs[]; static char *disallow_removexattrs[] = {GF_XATTR_VOL_ID_KEY, GFID_XATTR_KEY, NULL}; +void +posix_cs_build_xattr_rsp(xlator_t *this, dict_t **rsp, dict_t *req, int fd, + char *loc) +{ + int ret = 0; + uuid_t uuid; + + if (!dict_get_sizen(req, GF_CS_OBJECT_STATUS)) + return; + + if (!(*rsp)) { + *rsp = dict_new(); + if (!(*rsp)) { + return; + } + } + + if (fd != -1) { + if (dict_get_sizen(req, GF_CS_XATTR_ARCHIVE_UUID)) { + ret = sys_fgetxattr(fd, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16); + if (ret > 0) { + ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid, + true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for %s for fd %d", + uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, fd); + } + } else { + gf_msg_debug(this->name, 0, "getxattr failed on %s for fd %d", + GF_CS_XATTR_ARCHIVE_UUID, fd); + } + } + } else { + if (dict_get_sizen(req, GF_CS_XATTR_ARCHIVE_UUID)) { + ret = sys_lgetxattr(loc, GF_CS_XATTR_ARCHIVE_UUID, uuid, 16); + if (ret > 0) { + ret = dict_set_gfuuid(*rsp, GF_CS_XATTR_ARCHIVE_UUID, uuid, + true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DICT_SET_FAILED, + "%s: Failed to set " + "dictionary value for %s for loc %s", + uuid_utoa(uuid), GF_CS_XATTR_ARCHIVE_UUID, loc); + } + } else { + gf_msg_debug(this->name, 0, "getxattr failed on %s for %s", + GF_CS_XATTR_ARCHIVE_UUID, loc); + } + } + } + return; +} + int32_t posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { @@ -136,11 +185,15 @@ posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) if (op_ret == -1) { op_errno = errno; if (op_errno == ENOENT) { - gf_msg_debug(this->name, 0, "lstat on %s failed: %s", - real_path ? real_path : "<null>", strerror(op_errno)); + gf_msg_debug(this->name, 0, + "lstat on gfid-handle %s (path: %s)" + "failed: %s", + real_path ? real_path : "<null>", loc->path, + strerror(op_errno)); } else { gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_LSTAT_FAILED, - "lstat on %s failed", real_path ? real_path : "<null>"); + "lstat on gfid-handle %s (path: %s) failed", + real_path ? real_path : "<null>", loc->path); } goto out; } @@ -150,8 +203,11 @@ posix_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) posix_cs_maintenance(this, NULL, loc, NULL, &buf, real_path, xdata, &xattr_rsp, _gf_true); + + posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, -1, real_path); } + posix_update_iatt_buf(&buf, -1, real_path, xdata); op_ret = 0; out: @@ -326,8 +382,8 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "setattr (lstat) on %s failed", - real_path ? real_path : "<null>"); + "setattr (lstat) on gfid-handle %s (path: %s) failed", + real_path ? real_path : "<null>", loc->path); goto out; } @@ -348,9 +404,9 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_CHMOD_FAILED, - "setattr (chmod) on %s " + "setattr (chmod) on gfid-handle %s (path: %s) " "failed", - real_path); + real_path, loc->path); goto out; } } @@ -360,31 +416,18 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UTIMES_FAILED, - "setattr (utimes) on %s " + "setattr (utimes) on gfid-handle %s (path: %s) " "failed", - real_path); + real_path, loc->path); goto out; } - posix_update_utime_in_mdata(this, real_path, -1, loc->inode, stbuf, - valid); + posix_update_utime_in_mdata(this, real_path, -1, loc->inode, + &frame->root->ctime, stbuf, valid); } - if (valid & GF_SET_ATTR_CTIME && !priv->ctime) { - /* - * If ctime is not enabled, we have no means to associate an - * arbitrary ctime with the file, so as a fallback, we ignore - * the ctime payload and update the file ctime to current time - * (which is possible directly with the POSIX API). - */ - op_ret = PATH_SET_TIMESPEC_OR_TIMEVAL(real_path, NULL); - if (op_ret == -1) { - op_errno = errno; - gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_UTIMES_FAILED, - "setattr (utimes) on %s " - "failed", - real_path); - goto out; - } + if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) { + posix_update_ctime_in_mdata(this, real_path, -1, loc->inode, + &frame->root->ctime, stbuf, valid); } if (!valid) { @@ -392,9 +435,9 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LCHOWN_FAILED, - "lchown (%s, -1, -1) " + "lchown (gfid-handle: %s, path: %s, -1, -1) " "failed", - real_path); + real_path, loc->path); goto out; } @@ -405,23 +448,18 @@ posix_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "setattr (lstat) on %s failed", real_path); + "setattr (lstat) on gfid-handle %s (path: %s) failed", real_path, + loc->path); goto out; } - if (valid & GF_SET_ATTR_CTIME && priv->ctime) { - /* - * If we got ctime payload, we override - * the ctime of statpost with that. - */ - statpost.ia_ctime = stbuf->ia_ctime; - statpost.ia_ctime_nsec = stbuf->ia_ctime_nsec; - } posix_set_ctime(frame, this, real_path, -1, loc->inode, &statpost); if (xdata) xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, &statpost); + posix_update_iatt_buf(&statpre, -1, real_path, xdata); + posix_update_iatt_buf(&statpost, -1, real_path, xdata); op_ret = 0; out: @@ -484,18 +522,19 @@ posix_do_futimes(xlator_t *this, int fd, struct iatt *stbuf, int valid) struct stat stat = { 0, }; - - ret = sys_fstat(fd, &stat); - if (ret != 0) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, "%d", - fd); - goto out; - } + gf_boolean_t fstat_executed = _gf_false; if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { tv[0].tv_sec = stbuf->ia_atime; tv[0].tv_usec = stbuf->ia_atime_nsec / 1000; } else { + ret = sys_fstat(fd, &stat); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, + "%d", fd); + goto out; + } + fstat_executed = _gf_true; /* atime is not given, use current values */ tv[0].tv_sec = ST_ATIM_SEC(&stat); tv[0].tv_usec = ST_ATIM_NSEC(&stat) / 1000; @@ -505,6 +544,14 @@ posix_do_futimes(xlator_t *this, int fd, struct iatt *stbuf, int valid) tv[1].tv_sec = stbuf->ia_mtime; tv[1].tv_usec = stbuf->ia_mtime_nsec / 1000; } else { + if (!fstat_executed) { + ret = sys_fstat(fd, &stat); + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FILE_OP_FAILED, + "%d", fd); + goto out; + } + } /* mtime is not given, use current values */ tv[1].tv_sec = ST_MTIM_SEC(&stat); tv[1].tv_usec = ST_MTIM_NSEC(&stat) / 1000; @@ -530,6 +577,7 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt statpost = { 0, }; + struct posix_private *priv = NULL; struct posix_fd *pfd = NULL; dict_t *xattr_rsp = NULL; int32_t ret = -1; @@ -542,6 +590,9 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); + priv = this->private; + VALIDATE_OR_GOTO(priv, out); + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { gf_msg_debug(this->name, 0, "pfd is NULL from fd=%p", fd); @@ -590,8 +641,13 @@ posix_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, fd); goto out; } - posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode, stbuf, - valid); + posix_update_utime_in_mdata(this, NULL, pfd->fd, fd->inode, + &frame->root->ctime, stbuf, valid); + } + + if ((valid & GF_SET_ATTR_CTIME) && priv->ctime) { + posix_update_ctime_in_mdata(this, NULL, pfd->fd, fd->inode, + &frame->root->ctime, stbuf, valid); } if (!valid) { @@ -642,6 +698,10 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, gf_boolean_t locked = _gf_false; posix_inode_ctx_t *ctx = NULL; struct posix_private *priv = NULL; + gf_boolean_t check_space_error = _gf_false; + struct stat statbuf = { + 0, + }; DECLARE_OLD_FS_ID_VAR; @@ -661,7 +721,10 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, if (priv->disk_reserve) posix_disk_space_check(this); - DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, out); + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, ret, ret, unlock); + +overwrite: + check_space_error = _gf_true; ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); if (ret < 0) { @@ -685,7 +748,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, ret = -errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "fallocate (fstat) failed on fd=%p", fd); - goto out; + goto unlock; } if (xdata) { @@ -695,7 +758,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, gf_msg(this->name, GF_LOG_ERROR, 0, 0, "file state check failed, fd %p", fd); ret = -EIO; - goto out; + goto unlock; } } @@ -706,7 +769,7 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, "fallocate failed on %s offset: %jd, " "len:%zu, flags: %d", uuid_utoa(fd->inode->gfid), offset, len, flags); - goto out; + goto unlock; } ret = posix_fdstat(this, fd->inode, pfd->fd, statpost); @@ -714,16 +777,47 @@ posix_do_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, ret = -errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, "fallocate (fstat) failed on fd=%p", fd); - goto out; + goto unlock; } posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, statpost); -out: +unlock: if (locked) { pthread_mutex_unlock(&ctx->write_atomic_lock); locked = _gf_false; } + + if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { +#ifdef FALLOC_FL_KEEP_SIZE + if (flags & FALLOC_FL_KEEP_SIZE) { + goto overwrite; + } +#endif + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + if (sys_fstat(pfd->fd, &statbuf) < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, + "%d", pfd->fd); + goto out; + } + + if (offset + len <= statbuf.st_size) { + gf_msg_debug(this->name, 0, + "io vector size will not" + " change disk size so allow overwrite for" + " fd %d", + pfd->fd); + goto overwrite; + } + } + +out: SET_TO_OLD_FS_ID(); if (ret == ENOSPC) ret = -ENOSPC; @@ -898,6 +992,7 @@ posix_do_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, } } + posix_update_iatt_buf(statpre, pfd->fd, NULL, xdata); /* See if we can use FALLOC_FL_ZERO_RANGE to perform the zero fill. * If it fails, fall back to _posix_do_zerofill() and an optional fsync. */ @@ -965,6 +1060,7 @@ posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt statpost = { 0, }; + dict_t *rsp_xdata = NULL; #ifdef FALLOC_FL_KEEP_SIZE if (keep_size) @@ -972,15 +1068,15 @@ posix_glfallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, #endif /* FALLOC_FL_KEEP_SIZE */ ret = posix_do_fallocate(frame, this, fd, flags, offset, len, &statpre, - &statpost, xdata, NULL); + &statpost, xdata, &rsp_xdata); if (ret < 0) goto err; - STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, NULL); + STACK_UNWIND_STRICT(fallocate, frame, 0, 0, &statpre, &statpost, rsp_xdata); return 0; err: - STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, NULL); + STACK_UNWIND_STRICT(fallocate, frame, -1, -ret, NULL, NULL, rsp_xdata); return 0; } @@ -1029,27 +1125,59 @@ posix_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, }; struct posix_private *priv = NULL; int op_ret = -1; - int op_errno = -EINVAL; + int op_errno = EINVAL; dict_t *rsp_xdata = NULL; + gf_boolean_t check_space_error = _gf_false; + struct posix_fd *pfd = NULL; + struct stat statbuf = { + 0, + }; - VALIDATE_OR_GOTO(frame, out); - VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(frame, unwind); + VALIDATE_OR_GOTO(this, unwind); priv = this->private; DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); +overwrite: + check_space_error = _gf_true; ret = posix_do_zerofill(frame, this, fd, offset, len, &statpre, &statpost, xdata, &rsp_xdata); if (ret < 0) { op_ret = -1; op_errno = -ret; - goto out; + goto unwind; } STACK_UNWIND_STRICT(zerofill, frame, 0, 0, &statpre, &statpost, rsp_xdata); return 0; out: + if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto out; + } + + if (sys_fstat(pfd->fd, &statbuf) < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, + "%d", pfd->fd); + goto out; + } + + if (offset + len <= statbuf.st_size) { + gf_msg_debug(this->name, 0, + "io vector size will not" + " change disk size so allow overwrite for" + " fd %d", + pfd->fd); + goto overwrite; + } + } + +unwind: STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, NULL, NULL, rsp_xdata); return 0; @@ -1065,7 +1193,7 @@ posix_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) */ gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_IPC_NOT_HANDLE, "GF_LOG_IPC(%d) not handled", op); - STACK_UNWIND_STRICT(ipc, frame, -1, -EOPNOTSUPP, NULL); + STACK_UNWIND_STRICT(ipc, frame, -1, EOPNOTSUPP, NULL); return 0; } @@ -1180,7 +1308,8 @@ posix_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, if (dir == NULL) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_OPENDIR_FAILED, - "opendir failed on %s", real_path); + "opendir failed on gfid-handle: %s (path: %s)", real_path, + loc->path); goto out; } @@ -1188,7 +1317,8 @@ posix_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, if (op_ret < 0) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_DIRFD_FAILED, - "dirfd() failed on %s", real_path); + "dirfd() failed (path: %s, gfid-handle: %s", loc->path, + real_path); goto out; } @@ -1206,8 +1336,9 @@ posix_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, if (op_ret) gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, "failed to set the fd" - "context path=%s fd=%p", - real_path, fd); + "context path=%s " + "gfid-handle= %s,fd=%p", + loc->path, real_path, fd); posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, NULL); @@ -1230,6 +1361,22 @@ out: return 0; } +static void +posix_add_fd_to_cleanup(xlator_t *this, struct posix_fd *pfd) +{ + glusterfs_ctx_t *ctx = this->ctx; + struct posix_private *priv = this->private; + + pfd->xl = this; + pthread_mutex_lock(&ctx->fd_lock); + { + list_add_tail(&pfd->list, &ctx->janitor_fds); + priv->rel_fdcount++; + pthread_cond_signal(&ctx->fd_cond); + } + pthread_mutex_unlock(&ctx->fd_lock); +} + int32_t posix_releasedir(xlator_t *this, fd_t *fd) { @@ -1237,8 +1384,6 @@ posix_releasedir(xlator_t *this, fd_t *fd) uint64_t tmp_pfd = 0; int ret = 0; - struct posix_private *priv = NULL; - VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); @@ -1254,18 +1399,7 @@ posix_releasedir(xlator_t *this, fd_t *fd) "pfd->dir is NULL for fd=%p", fd); goto out; } - - priv = this->private; - if (!priv) - goto out; - - pthread_mutex_lock(&priv->janitor_lock); - { - INIT_LIST_HEAD(&pfd->list); - list_add_tail(&pfd->list, &priv->janitor_fds); - pthread_cond_signal(&priv->janitor_cond); - } - pthread_mutex_unlock(&priv->janitor_lock); + posix_add_fd_to_cleanup(this, pfd); out: return 0; @@ -1304,7 +1438,8 @@ posix_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_READYLINK_FAILED, - "readlink on %s failed", real_path); + "readlink on gfid-handle: %s (path: %s) failed", real_path, + loc->path); goto out; } @@ -1348,8 +1483,9 @@ posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "pre-operation lstat on %s failed", - real_path ? real_path : "<null>"); + "pre-operation lstat on (path: %s gfid-handle: %s) " + "failed", + loc->path, real_path ? real_path : "<null>"); goto out; } @@ -1364,11 +1500,13 @@ posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, } } + posix_update_iatt_buf(&prebuf, -1, real_path, xdata); op_ret = sys_truncate(real_path, offset); if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TRUNCATE_FAILED, - "truncate on %s failed", real_path); + "truncate on gfid-handle: %s (path: %s) failed", real_path, + loc->path); goto out; } @@ -1377,7 +1515,8 @@ posix_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "lstat on %s failed", real_path); + "lstat on gfid-handle %s (path: %s) failed", real_path, + loc->path); goto out; } @@ -1403,6 +1542,10 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, int32_t _fd = -1; struct posix_fd *pfd = NULL; struct posix_private *priv = NULL; + struct iatt preop = { + 0, + }; + dict_t *rsp_xdata = NULL; struct iatt stbuf = { 0, }; @@ -1454,7 +1597,8 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FILE_OP_FAILED, - "open on %s, flags: %d", real_path, flags); + "open on gfid-handle %s (path: %s), flags: %d", real_path, + loc->path, flags); goto out; } @@ -1469,16 +1613,24 @@ posix_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, pfd->flags = flags; pfd->fd = _fd; + if (xdata) { + op_ret = posix_fdstat(this, fd->inode, pfd->fd, &preop); + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd); + GF_FREE(pfd); + goto out; + } + + posix_cs_maintenance(this, fd, NULL, &pfd->fd, &preop, NULL, xdata, + &rsp_xdata, _gf_true); + } + op_ret = fd_ctx_set(fd, this, (uint64_t)(long)pfd); if (op_ret) gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_FD_PATH_SETTING_FAILED, - "failed to set the fd context path=%s fd=%p", real_path, fd); - - LOCK(&priv->lock); - { - priv->nr_files++; - } - UNLOCK(&priv->lock); + "failed to set the fd context gfid-handle=%s path=%s fd=%p", + real_path, loc->path, fd); op_ret = 0; @@ -1491,7 +1643,7 @@ out: SET_TO_OLD_FS_ID(); - STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, NULL); + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, rsp_xdata); return 0; } @@ -1522,13 +1674,13 @@ posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, VALIDATE_OR_GOTO(frame, out); VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); + VALIDATE_OR_GOTO(fd->inode, out); VALIDATE_OR_GOTO(this->private, out); priv = this->private; VALIDATE_OR_GOTO(priv, out); - if (fd->inode && ((fd->inode->ia_type == IA_IFBLK) || - (fd->inode->ia_type == IA_IFCHR))) { + if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) { gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT, "readv received on a block/char file (%s)", uuid_utoa(fd->inode->gfid)); @@ -1576,6 +1728,7 @@ posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, } } + posix_update_iatt_buf(&preop, _fd, NULL, xdata); op_ret = sys_pread(_fd, iobuf->ptr, size, offset); if (op_ret == -1) { op_errno = errno; @@ -1588,11 +1741,7 @@ posix_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, goto out; } - LOCK(&priv->lock); - { - priv->read_value += op_ret; - } - UNLOCK(&priv->lock); + GF_ATOMIC_ADD(priv->read_value, op_ret); vec.iov_base = iobuf->ptr; vec.iov_len = op_ret; @@ -1794,20 +1943,29 @@ posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, gf_boolean_t write_append = _gf_false; gf_boolean_t update_atomic = _gf_false; posix_inode_ctx_t *ctx = NULL; + gf_boolean_t check_space_error = _gf_false; + struct stat statbuf = { + 0, + }; + int totlen = 0; + int idx = 0; - VALIDATE_OR_GOTO(frame, out); - VALIDATE_OR_GOTO(this, out); - VALIDATE_OR_GOTO(fd, out); - VALIDATE_OR_GOTO(vector, out); - VALIDATE_OR_GOTO(this->private, out); + VALIDATE_OR_GOTO(frame, unwind); + VALIDATE_OR_GOTO(this, unwind); + VALIDATE_OR_GOTO(fd, unwind); + VALIDATE_OR_GOTO(fd->inode, unwind); + VALIDATE_OR_GOTO(vector, unwind); + VALIDATE_OR_GOTO(this->private, unwind); priv = this->private; - VALIDATE_OR_GOTO(priv, out); + VALIDATE_OR_GOTO(priv, unwind); DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); - if (fd->inode && ((fd->inode->ia_type == IA_IFBLK) || - (fd->inode->ia_type == IA_IFCHR))) { +overwrite: + + check_space_error = _gf_true; + if ((fd->inode->ia_type == IA_IFBLK) || (fd->inode->ia_type == IA_IFCHR)) { gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_INVALID_ARGUMENT, "writev received on a block/char file (%s)", uuid_utoa(fd->inode->gfid)); @@ -1885,6 +2043,7 @@ posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, } } + posix_update_iatt_buf(&preop, _fd, NULL, xdata); if (locked && write_append) { if (preop.ia_size == offset || (fd->flags & O_APPEND)) is_append = 1; @@ -1938,11 +2097,7 @@ posix_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, } } - LOCK(&priv->lock); - { - priv->write_value += op_ret; - } - UNLOCK(&priv->lock); + GF_ATOMIC_ADD(priv->write_value, op_ret); out: @@ -1951,6 +2106,36 @@ out: locked = _gf_false; } + if (op_errno == ENOSPC && priv->disk_space_full && !check_space_error) { + ret = posix_fd_ctx_get(fd, this, &pfd, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd); + goto unwind; + } + + if (sys_fstat(pfd->fd, &statbuf) < 0) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FILE_OP_FAILED, + "%d", pfd->fd); + goto unwind; + } + + for (idx = 0; idx < count; idx++) { + totlen = vector[idx].iov_len; + } + + if ((offset + totlen <= statbuf.st_size) && + !(statbuf.st_blocks * statbuf.st_blksize < statbuf.st_size)) { + gf_msg_debug(this->name, 0, + "io vector size will not" + " change disk size so allow overwrite for" + " fd %d", + pfd->fd); + goto overwrite; + } + } + +unwind: STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, &preop, &postop, rsp_xdata); @@ -1960,6 +2145,270 @@ out: } int32_t +posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd_in = -1; + int _fd_out = -1; + struct posix_private *priv = NULL; + struct posix_fd *pfd_in = NULL; + struct posix_fd *pfd_out = NULL; + struct iatt preop_dst = { + 0, + }; + struct iatt postop_dst = { + 0, + }; + struct iatt stbuf = { + 0, + }; + int ret = -1; + dict_t *rsp_xdata = NULL; + int is_append = 0; + gf_boolean_t locked = _gf_false; + gf_boolean_t update_atomic = _gf_false; + posix_inode_ctx_t *ctx = NULL; + char in_uuid_str[64] = {0}, out_uuid_str[64] = {0}; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd_in, out); + VALIDATE_OR_GOTO(fd_in->inode, out); + VALIDATE_OR_GOTO(fd_out, out); + VALIDATE_OR_GOTO(fd_out->inode, out); + VALIDATE_OR_GOTO(this->private, out); + + priv = this->private; + + VALIDATE_OR_GOTO(priv, out); + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + if (posix_check_dev_file(this, fd_in->inode, "copy_file_range", &op_errno)) + goto out; + + if (posix_check_dev_file(this, fd_out->inode, "copy_file_range", &op_errno)) + goto out; + + ret = posix_fd_ctx_get(fd_in, this, &pfd_in, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd_in); + goto out; + } + + _fd_in = pfd_in->fd; + + ret = posix_fd_ctx_get(fd_out, this, &pfd_out, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd_out); + goto out; + } + + _fd_out = pfd_out->fd; + + /* + * Currently, the internal write is checked via xdata which + * is set by some xlator above. It could be due to several of + * the reasons such as healing or a snapshot operation happening + * using copy_file_range. As of now (i.e. writing the patch with + * this change) none of the xlators above posix are using the + * internal write with copy_file_range. In future it might + * change. Atleast as of now the hope is that, when that happens + * this functon or fop does not require additional changes for + * handling internal writes. + */ + ret = posix_check_internal_writes(this, fd_out, _fd_out, xdata); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "possible overwrite from internal client, fd=%p", fd_out); + op_ret = -1; + op_errno = EBUSY; + goto out; + } + + if (xdata) { + if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) + update_atomic = _gf_true; + } + + /* + * The update_atomic option is to instruct posix to do prestat, + * write and poststat atomically. This is to prevent any modification to + * ia_size and ia_blocks until poststat and the diff in their values + * between pre and poststat could be of use for some translators. + * This is similar to the atomic write operation. atmoic write is + * (i.e. prestat + write + poststat) used by shard as of now. In case, + * some xlator needs copy_file_range to be atomic from prestat and postat + * prespective (i.e. prestat + copy_file_range + poststat) then it has + * to send "GLUSTERFS_WRITE_UPDATE_ATOMIC" key in xdata. + */ + + op_ret = posix_inode_ctx_get_all(fd_out->inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + + if (update_atomic) { + ret = pthread_mutex_lock(&ctx->write_atomic_lock); + if (!ret) + locked = _gf_true; + else { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MUTEX_FAILED, + "failed to hold write atomic lock on %s", + uuid_utoa(fd_out->inode->gfid)); + goto out; + } + } + + op_ret = posix_fdstat(this, fd_out->inode, _fd_out, &preop_dst); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd_out); + goto out; + } + + /* + * Since, only the destination file (fd_out) is undergoing + * modification, the write related tests are done on that. + * i.e. this is treater similar to as if the destination file + * undergoing write fop from maintenance perspective. + */ + if (xdata) { + op_ret = posix_cs_maintenance(this, fd_out, NULL, &_fd_out, &preop_dst, + NULL, xdata, &rsp_xdata, _gf_false); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd_out); + op_errno = EIO; + goto out; + } + } + + /* + * NOTE: This is just doing a single execution of copy_file_range + * system call. If the returned value of this system call is less + * than len, then should we keep doing it in a for loop until the + * copy_file_range of all the len bytes is done? + * Check the example program provided in the man page of + * copy_file_range. + * If so, then a separate variables for both off_in and off_out + * should be used which are initialized to off_in and off_out + * that this function call receives, but then advanced by the + * value returned by sys_copy_file_range and then use that as + * off_in and off_out for next instance of copy_file_range execution. + */ + op_ret = sys_copy_file_range(_fd_in, &off_in, _fd_out, &off_out, len, + flags); + + if (op_ret < 0) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED, + "copy_file_range failed: fd_in: %p (gfid: %s) ," + " fd_out %p (gfid:%s)", + fd_in, uuid_utoa_r(fd_in->inode->gfid, in_uuid_str), fd_out, + uuid_utoa_r(fd_out->inode->gfid, out_uuid_str)); + goto out; + } + + /* + * Let this be as it is for now. This function collects + * infomration such as open fd count etc. So, even though + * is_append does not apply to copy_file_range, for now, + * allowing it to be recorded in the dict as _gf_false. + */ + rsp_xdata = _fill_writev_xdata(fd_out, xdata, this, is_append); + + /* copy_file_range successful, we also need to get the stat of + * the file we wrote to (i.e. destination file or fd_out). + */ + ret = posix_fdstat(this, fd_out->inode, _fd_out, &postop_dst); + if (ret == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd_out); + goto out; + } + + /* + * Also perform the stat on the source fd (i.e. fd_in). For now, + * allowing it to be done within the locked region if the request + * is for atomic operation (and update) of copy_file_range. + */ + ret = posix_fdstat(this, fd_in->inode, _fd_in, &stbuf); + if (ret == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd_in); + goto out; + } + + /* + * The core logic of what time attributes are to be updated + * on a fop is decided at client side xlator utime. + * All the remaining fops call posix_set_ctime function + * to update the {a,m,c}time. But, for all the other fops, + * the operation is happening on only one file (or inode). + * But here, there are 2 fds (source and destination). Hence + * the new function below to update the appropriate times for + * both the source and the destination file. + * For the source file, if at all anything has to be updated, + * it would be atime (as that file is only read, not updated). + * For the destination file, the attributes that require the + * modification would be mtime and ctime. + * What times have to be changed is actually determined by + * utime xlator. But, all of them would be in frame->root->flags. + * So, currently posix assumes that, the atime flag is for + * the source file and the other 2 flags are for the destination + * file. Since, the assumption is rigid (i.e. atime for source + * and {m,c}time for destination), the below function is called + * posix_set_ctime_cfr (cfr standing for copy_file_range). + * FUTURE TODO: + * In future, some other functionality or fop might operate + * simultaneously on 2 files. Then, depending upon what that new + * fop does or what are its requirements, the below function might + * require changes to become generic for consumption in case of + * simultaneous operations on 2 files. + */ + posix_set_ctime_cfr(frame, this, NULL, pfd_in->fd, fd_in->inode, &stbuf, + NULL, pfd_out->fd, fd_out->inode, &postop_dst); + + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + /* + * Record copy_file_range in priv->write_value for now. + * If not needed, remove below section of code along with + * this comment (or add comment to explain why it is not + * needed). + */ + GF_ATOMIC_ADD(priv->write_value, op_ret); + +out: + + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, &stbuf, + &preop_dst, &postop_dst, rsp_xdata); + + if (rsp_xdata) + dict_unref(rsp_xdata); + return 0; +} + +int32_t posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { char *real_path = NULL; @@ -1970,7 +2419,7 @@ posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) }; struct posix_private *priv = NULL; int shared_by = 1; - int percent = 0; + double percent = 0; uint64_t reserved_blocks = 0; VALIDATE_OR_GOTO(frame, out); @@ -1992,12 +2441,20 @@ posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) if (op_ret == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STATVFS_FAILED, - "statvfs failed on %s", real_path); + "statvfs failed on gfid-handle %s (path: %s)", real_path, + loc->path); goto out; } - percent = priv->disk_reserve; - reserved_blocks = (buf.f_blocks * percent) / 100; + if (priv->disk_unit == 'p') { + percent = priv->disk_reserve; + reserved_blocks = (((buf.f_blocks * percent) / 100) + 0.5); + } else { + if (buf.f_bsize) { + reserved_blocks = (priv->disk_reserve + buf.f_bsize - 1) / + buf.f_bsize; + } + } if (buf.f_bfree > reserved_blocks) { buf.f_bfree = (buf.f_bfree - reserved_blocks); @@ -2065,7 +2522,6 @@ out: int32_t posix_release(xlator_t *this, fd_t *fd) { - struct posix_private *priv = NULL; struct posix_fd *pfd = NULL; int ret = -1; uint64_t tmp_pfd = 0; @@ -2073,36 +2529,20 @@ posix_release(xlator_t *this, fd_t *fd) VALIDATE_OR_GOTO(this, out); VALIDATE_OR_GOTO(fd, out); - priv = this->private; - ret = fd_ctx_del(fd, this, &tmp_pfd); if (ret < 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_PFD_NULL, "pfd is NULL from fd=%p", fd); goto out; } - pfd = (struct posix_fd *)(long)tmp_pfd; + pfd = (struct posix_fd *)(long)tmp_pfd; if (pfd->dir) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_DIR_NOT_NULL, "pfd->dir is %p (not NULL) for file fd=%p", pfd->dir, fd); } - if (!priv) - goto out; - pthread_mutex_lock(&priv->janitor_lock); - { - INIT_LIST_HEAD(&pfd->list); - list_add_tail(&pfd->list, &priv->janitor_fds); - pthread_cond_signal(&priv->janitor_cond); - } - pthread_mutex_unlock(&priv->janitor_lock); - - LOCK(&priv->lock); - { - priv->nr_files--; - } - UNLOCK(&priv->lock); + posix_add_fd_to_cleanup(this, pfd); out: return 0; @@ -2237,7 +2677,7 @@ _handle_setxattr_keyvalue_pair(dict_t *d, char *k, data_t *v, void *tmp) filler = tmp; - return posix_handle_pair(filler->this, filler->real_path, k, v, + return posix_handle_pair(filler->this, filler->loc, filler->real_path, k, v, filler->flags, filler->stbuf); } @@ -2272,6 +2712,7 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, int32_t ret = 0; ssize_t acl_size = 0; dict_t *xattr = NULL; + dict_t *subvol_xattrs = NULL; posix_xattr_filler_t filler = { 0, }; @@ -2280,12 +2721,17 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, 0, }; data_t *tdata = NULL; - char stime[4096]; - char sxattr[4096]; + char *cs_var = NULL; gf_cs_obj_state state = -1; - char remotepath[4096] = {0}; int i = 0; int len; + struct mdata_iatt mdata_iatt = { + 0, + }; + int8_t sync_backend_xattrs = _gf_false; + data_pair_t *custom_xattrs; + data_t *keyval = NULL; + char **xattrs_to_heal = get_xattrs_to_heal(); DECLARE_OLD_FS_ID_VAR; SET_FS_ID(frame->root->uid, frame->root->gid); @@ -2306,6 +2752,20 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, goto out; } + ret = dict_get_mdata(dict, CTIME_MDATA_XDATA_KEY, &mdata_iatt); + if (ret == 0) { + /* This is initiated by lookup when ctime feature is enabled to create + * "trusted.glusterfs.mdata" xattr if not present. These are the files + * which were created when ctime feature is disabled. + */ + ret = posix_set_mdata_xattr_legacy_files(this, loc->inode, real_path, + &mdata_iatt, &op_errno); + if (ret != 0) { + op_ret = -1; + } + goto out; + } + posix_pstat(this, loc->inode, loc->gfid, real_path, &preop, _gf_false); op_ret = -1; @@ -2337,10 +2797,11 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, goto unlock; } - sprintf(stime, "%lu", tmp_stbuf.ia_mtime); + cs_var = alloca(4096); + sprintf(cs_var, "%" PRId64, tmp_stbuf.ia_mtime); /*TODO: may be should consider nano-second also */ - if (strncmp(stime, tdata->data, tdata->len) != 0) { + if (strncmp(cs_var, tdata->data, tdata->len) > 0) { gf_msg(this->name, GF_LOG_ERROR, 0, 0, "mtime " "passed is different from seen by file now." @@ -2350,31 +2811,54 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, goto unlock; } - len = sprintf(sxattr, "%lu", tmp_stbuf.ia_size); + len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_size); - ret = sys_lsetxattr(real_path, GF_CS_OBJECT_SIZE, sxattr, len, + ret = sys_lsetxattr(real_path, GF_CS_OBJECT_SIZE, cs_var, len, flags); if (ret) { + op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, 0, 0, "setxattr failed. key %s err %d", GF_CS_OBJECT_SIZE, ret); + goto unlock; + } + + len = sprintf(cs_var, "%" PRIu64, tmp_stbuf.ia_blocks); + + ret = sys_lsetxattr(real_path, GF_CS_NUM_BLOCKS, cs_var, len, + flags); + if (ret) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "setxattr failed. key %s err %d", GF_CS_NUM_BLOCKS, ret); + goto unlock; + } + + len = sprintf(cs_var, "%" PRIu32, tmp_stbuf.ia_blksize); + + ret = sys_lsetxattr(real_path, GF_CS_BLOCK_SIZE, cs_var, len, + flags); + if (ret) { op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "setxattr failed. key %s err %d", GF_CS_BLOCK_SIZE, ret); goto unlock; } + memset(cs_var, 0, 4096); if (loc->path[0] == '/') { for (i = 1; i < strlen(loc->path); i++) { - remotepath[i - 1] = loc->path[i]; + cs_var[i - 1] = loc->path[i]; } - remotepath[i] = '\0'; - gf_msg_debug(this->name, GF_LOG_ERROR, "remotepath %s", - remotepath); + cs_var[i] = '\0'; + gf_msg_debug(this->name, GF_LOG_ERROR, "remotepath %s", cs_var); } - ret = sys_lsetxattr(real_path, GF_CS_OBJECT_REMOTE, remotepath, - strlen(loc->path), flags); + ret = sys_lsetxattr(real_path, GF_CS_OBJECT_REMOTE, cs_var, + strlen(cs_var), flags); if (ret) { + op_errno = errno; gf_log("POSIX", GF_LOG_ERROR, "setxattr failed - %s" " %d", @@ -2384,13 +2868,14 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, ret = sys_truncate(real_path, 0); if (ret) { + op_errno = errno; gf_log("POSIX", GF_LOG_ERROR, "truncate failed - %s" " %d", GF_CS_OBJECT_SIZE, ret); - op_errno = errno; ret = sys_lremovexattr(real_path, GF_CS_OBJECT_REMOTE); if (ret) { + op_errno = errno; gf_log("POSIX", GF_LOG_ERROR, "removexattr " "failed post processing- %s" @@ -2408,12 +2893,14 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, } unlock: UNLOCK(&loc->inode->lock); + op_ret = ret; goto out; } filler.real_path = real_path; filler.this = this; filler.stbuf = &preop; + filler.loc = loc; #ifdef GF_DARWIN_HOST_OS filler.flags = map_xattr_flags(flags); @@ -2427,6 +2914,66 @@ posix_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, goto out; } + ret = dict_get_int8(xdata, "sync_backend_xattrs", &sync_backend_xattrs); + if (ret) { + gf_msg_debug(this->name, -ret, "Unable to get sync_backend_xattrs"); + } + + if (sync_backend_xattrs) { + /* List all custom xattrs */ + subvol_xattrs = dict_new(); + if (!subvol_xattrs) + goto out; + + ret = dict_set_int32_sizen(xdata, "list-xattr", 1); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, + "Unable to set list-xattr in dict "); + goto out; + } + + subvol_xattrs = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, + NULL); + + /* Remove all user xattrs from the file */ + dict_foreach_fnmatch(subvol_xattrs, "user.*", posix_delete_user_xattr, + real_path); + + /* Remove all custom xattrs from the file */ + for (i = 1; xattrs_to_heal[i]; i++) { + keyval = dict_get(subvol_xattrs, xattrs_to_heal[i]); + if (keyval) { + ret = sys_lremovexattr(real_path, xattrs_to_heal[i]); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, P_MSG_XATTR_NOT_REMOVED, + errno, "removexattr failed. key %s path %s", + xattrs_to_heal[i], loc->path); + goto out; + } + + dict_del(subvol_xattrs, xattrs_to_heal[i]); + keyval = NULL; + } + } + + /* Set custom xattrs based on info provided by DHT */ + custom_xattrs = dict->members_list; + + while (custom_xattrs != NULL) { + ret = sys_lsetxattr(real_path, custom_xattrs->key, + custom_xattrs->value->data, + custom_xattrs->value->len, flags); + if (ret) { + op_errno = errno; + gf_log(this->name, GF_LOG_ERROR, "setxattr failed - %s %d", + custom_xattrs->key, ret); + goto out; + } + + custom_xattrs = custom_xattrs->next; + } + } + xattr = dict_new(); if (!xattr) goto out; @@ -2534,6 +3081,9 @@ out: if (xattr) dict_unref(xattr); + if (subvol_xattrs) + dict_unref(subvol_xattrs); + return 0; } @@ -2560,7 +3110,9 @@ posix_xattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc, } if (op_ret == -1) { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_LSTAT_FAILED, - "posix_xattr_get_real_filename (lstat) on %s failed", real_path); + "posix_xattr_get_real_filename (lstat) on " + "gfid-handle %s (path: %s) failed", + real_path, loc->path); return -errno; } @@ -2589,7 +3141,7 @@ posix_xattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc, (void)sys_closedir(fd); if (!found) - return -ENOENT; + return -ENOATTR; ret = dict_set_dynstr(dict, (char *)key, found); if (ret) { @@ -2709,6 +3261,13 @@ posix_links_in_same_directory(char *dirpath, int count, inode_t *leaf_inode, entry->d_name); gf_entry = gf_dirent_for_name(entry->d_name); + if (!gf_entry) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0, "gf_entry is NULL"); + op_ret = -1; + *op_errno = ENOMEM; + inode_unref(loc.inode); + goto out; + } gf_entry->inode = inode_ref(leaf_inode); gf_entry->dict = posix_xattr_fill(this, temppath, &loc, NULL, -1, xdata, NULL); @@ -2856,7 +3415,7 @@ posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode, } while (remaining_size > 0) { - snprintf(key, sizeof(key), "%s", list + list_offset); + len = snprintf(key, sizeof(key), "%s", list + list_offset); if (strncmp(key, PGFID_XATTR_KEY_PREFIX, SLEN(PGFID_XATTR_KEY_PREFIX)) != 0) goto next; @@ -2904,7 +3463,6 @@ posix_get_ancestry_non_directory(xlator_t *this, inode_t *leaf_inode, } next: - len = strlen(key); remaining_size -= (len + 1); list_offset += (len + 1); } /* while (remaining_size > 0) */ @@ -2970,6 +3528,10 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, int keybuff_len; char *value_buf = NULL; gf_boolean_t have_val = _gf_false; + struct iatt buf = { + 0, + }; + dict_t *xattr_rsp = NULL; DECLARE_OLD_FS_ID_VAR; @@ -3011,13 +3573,13 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, } if (loc->inode && name && GF_POSIX_ACL_REQUEST(name)) { - ret = posix_pacl_get(real_path, name, &value); + ret = posix_pacl_get(real_path, -1, name, &value); if (ret || !value) { op_errno = errno; gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_FAILED, "could not get acl (%s) for" - "%s", - name, real_path); + "gfid-handle %s (path: %s)", + name, real_path, loc->path); op_ret = -1; goto out; } @@ -3026,9 +3588,9 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (ret < 0) { GF_FREE(value); gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_ACL_FAILED, - "could not set acl (%s) for" - "%s in dictionary", - name, real_path); + "could not set acl (%s) for %s " + "(gfid-handle: %s) in dictionary", + name, loc->path, real_path); op_ret = -1; op_errno = ENOMEM; goto out; @@ -3046,7 +3608,7 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (ret < 0) { op_ret = -1; op_errno = -ret; - if (op_errno == ENOENT) { + if (op_errno == ENOATTR) { gf_msg_debug(this->name, 0, "Failed to get " "real filename (%s, %s)", @@ -3229,9 +3791,9 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (strncmp(key, "user.", 5) == 0) { key += 5; gf_msg_debug(this->name, 0, - "getxattr for file %s" + "getxattr for file %s (gfid-handle: %s)" " stripping user key: %s -> %s", - real_path, keybuffer, key); + loc->path, real_path, keybuffer, key); } } #endif @@ -3242,8 +3804,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (errno == ERANGE) { gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, "getxattr failed due to overflow of buffer" - " on %s: %s ", - real_path, key); + " on gfid-handle %s (path: %s) : %s ", + real_path, loc->path, key); size = sys_lgetxattr(real_path, key, NULL, 0); } if (size == -1) { @@ -3258,12 +3820,14 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, } if ((op_errno == ENOATTR) || (op_errno == ENODATA)) { gf_msg_debug(this->name, 0, - "No such attribute:%s for file %s", key, - real_path); + "No such attribute:%s for file %s (path: %s)", + key, real_path, loc->path); } else { gf_msg(this->name, GF_LOG_ERROR, op_errno, - P_MSG_XATTR_FAILED, "getxattr failed on %s: %s ", - real_path, key); + P_MSG_XATTR_FAILED, + "getxattr failed on " + "%s (path: %s): %s ", + real_path, loc->path, key); } goto out; } @@ -3283,7 +3847,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, - "getxattr failed on %s: key = %s", real_path, key); + "getxattr failed on %s (path: %s): key = %s", real_path, + loc->path, key); GF_FREE(value); goto out; } @@ -3294,8 +3859,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, op_errno = -op_ret; gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_DICT_SET_FAILED, "dict set operation " - "on %s for the key %s failed.", - real_path, key); + "on %s (gfid-handle: %s) for the key %s failed.", + loc->path, real_path, key); GF_FREE(value); goto out; } @@ -3311,8 +3876,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (errno == ERANGE) { gf_msg(this->name, GF_LOG_INFO, errno, P_MSG_XATTR_FAILED, "listxattr failed due to overflow of buffer" - " on %s ", - real_path); + " on %s (path: %s) ", + real_path, loc->path); size = sys_llistxattr(real_path, NULL, 0); } if (size == -1) { @@ -3326,7 +3891,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, "flag)"); } else { gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, - "listxattr failed on %s", real_path); + "listxattr failed on %s (path: %s)", real_path, + loc->path); } goto out; } @@ -3377,16 +3943,16 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, if (errno == ERANGE) { gf_msg(this->name, GF_LOG_INFO, op_errno, P_MSG_XATTR_FAILED, "getxattr failed due to overflow of" - " buffer on %s: %s ", - real_path, keybuffer); + " buffer on %s (path: %s): %s ", + real_path, loc->path, keybuffer); size = sys_lgetxattr(real_path, keybuffer, NULL, 0); } if (size == -1) { op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "getxattr failed on" - " %s: key = %s ", - real_path, keybuffer); + " %s (path: %s): key = %s ", + real_path, loc->path, keybuffer); goto out; } } @@ -3404,8 +3970,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "getxattr failed on" - " %s: key = %s ", - real_path, keybuffer); + " %s (path: %s): key = %s ", + real_path, loc->path, keybuffer); GF_FREE(value); goto out; } @@ -3423,8 +3989,8 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, op_errno = -op_ret; gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_DICT_SET_FAILED, "dict set operation " - "on %s for the key %s failed.", - real_path, keybuffer); + "on %s (gfid-handle: %s) for the key %s failed.", + loc->path, real_path, keybuffer); GF_FREE(value); goto out; } @@ -3438,6 +4004,11 @@ posix_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, done: op_ret = size; + if (xdata && (op_ret >= 0)) { + xattr_rsp = posix_xattr_fill(this, real_path, loc, NULL, -1, xdata, + &buf); + } + if (dict) { dict_del(dict, GFID_XATTR_KEY); dict_del(dict, GF_XATTR_VOL_ID_KEY); @@ -3446,7 +4017,10 @@ done: out: SET_TO_OLD_FS_ID(); - STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, NULL); + STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xattr_rsp); + + if (xattr_rsp) + dict_unref(xattr_rsp); if (dict) { dict_unref(dict); @@ -3476,6 +4050,10 @@ posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, int key_len; char *value_buf = NULL; gf_boolean_t have_val = _gf_false; + struct iatt buf = { + 0, + }; + dict_t *xattr_rsp = NULL; DECLARE_OLD_FS_ID_VAR; @@ -3678,7 +4256,6 @@ posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, size = sys_fgetxattr(_fd, key, NULL, 0); } if (size == -1) { - op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "fgetxattr failed " @@ -3699,7 +4276,6 @@ posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, bzero(value, size + 1); size = sys_fgetxattr(_fd, key, value, size); if (size == -1) { - op_ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, "fgetxattr failed o" @@ -3730,6 +4306,11 @@ posix_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, done: op_ret = size; + if (xdata && (op_ret >= 0)) { + xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, pfd->fd, xdata, + &buf); + } + if (dict) { dict_del(dict, GFID_XATTR_KEY); dict_del(dict, GF_XATTR_VOL_ID_KEY); @@ -3738,7 +4319,10 @@ done: out: SET_TO_OLD_FS_ID(); - STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, NULL); + STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xattr_rsp); + + if (xattr_rsp) + dict_unref(xattr_rsp); if (dict) dict_unref(dict); @@ -3969,10 +4553,19 @@ posix_common_removexattr(call_frame_t *frame, loc_t *loc, fd_t *fd, goto out; } - if (loc) + if (loc) { ret = posix_pstat(this, inode, loc->gfid, real_path, &preop, _gf_false); - else + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PSTAT_FAILED, + "pstat operaton failed on %s", real_path); + } + } else { ret = posix_fdstat(this, inode, _fd, &preop); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FDSTAT_FAILED, + "fdstat operaton failed on %s", real_path ? real_path : ""); + } + } if (gf_get_index_by_elem(disallow_removexattrs, (char *)name) >= 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_XATTR_NOT_REMOVED, @@ -4028,9 +4621,17 @@ posix_common_removexattr(call_frame_t *frame, loc_t *loc, fd_t *fd, posix_set_ctime(frame, this, real_path, -1, inode, NULL); ret = posix_pstat(this, inode, loc->gfid, real_path, &postop, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_PSTAT_FAILED, + "pstat operaton failed on %s", real_path); + } } else { posix_set_ctime(frame, this, NULL, _fd, inode, NULL); ret = posix_fdstat(this, inode, _fd, &postop); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_FDSTAT_FAILED, + "fdstat operaton failed on %s", real_path); + } } if (ret) goto out; @@ -4413,6 +5014,7 @@ unlock: op_ret = -1; op_errno = EINVAL; GF_FREE(array); + array = NULL; goto out; } array = NULL; @@ -4645,6 +5247,7 @@ posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, } } + posix_update_iatt_buf(&preop, _fd, NULL, xdata); op_ret = sys_ftruncate(_fd, offset); if (op_ret == -1) { @@ -4662,6 +5265,8 @@ posix_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, goto out; } + posix_set_ctime(frame, this, NULL, pfd->fd, fd->inode, &postop); + op_ret = 0; out: @@ -4720,10 +5325,16 @@ posix_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) if (xdata) { xattr_rsp = posix_xattr_fill(this, NULL, NULL, fd, _fd, xdata, &buf); - posix_cs_maintenance(this, fd, NULL, &_fd, &buf, NULL, xdata, - &xattr_rsp, _gf_false); + op_ret = posix_cs_maintenance(this, fd, NULL, &_fd, &buf, NULL, xdata, + &xattr_rsp, _gf_false); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd); + } + posix_cs_build_xattr_rsp(this, &xattr_rsp, xdata, _fd, NULL); } + posix_update_iatt_buf(&buf, _fd, NULL, xdata); op_ret = 0; out: @@ -4859,20 +5470,13 @@ posix_fill_readdir(fd_t *fd, DIR *dir, off_t off, size_t size, } if (skip_dirs) { - len = posix_handle_path(this, fd->inode->gfid, NULL, NULL, 0); + hpath = alloca(PATH_MAX); + len = posix_handle_path(this, fd->inode->gfid, NULL, hpath, PATH_MAX); if (len <= 0) { errno = ESTALE; count = -1; goto out; } - hpath = alloca(len + 256); /* NAME_MAX */ - - if (posix_handle_path(this, fd->inode->gfid, NULL, hpath, len) <= 0) { - errno = ESTALE; - count = -1; - goto out; - } - len = strlen(hpath); hpath[len] = '/'; } @@ -5020,31 +5624,6 @@ posix_entry_xattr_fill(xlator_t *this, inode_t *inode, fd_t *fd, return posix_xattr_fill(this, entry_path, &tmp_loc, NULL, -1, dict, stbuf); } -#ifdef _DIRENT_HAVE_D_TYPE -static int -posix_d_type_from_ia_type(ia_type_t type) -{ - switch (type) { - case IA_IFDIR: - return DT_DIR; - case IA_IFCHR: - return DT_CHR; - case IA_IFBLK: - return DT_BLK; - case IA_IFIFO: - return DT_FIFO; - case IA_IFLNK: - return DT_LNK; - case IA_IFREG: - return DT_REG; - case IA_IFSOCK: - return DT_SOCK; - default: - return DT_UNKNOWN; - } -} -#endif - int posix_readdirp_fill(xlator_t *this, fd_t *fd, gf_dirent_t *entries, dict_t *dict) @@ -5065,22 +5644,14 @@ posix_readdirp_fill(xlator_t *this, fd_t *fd, gf_dirent_t *entries, itable = fd->inode->table; - len = posix_handle_path(this, fd->inode->gfid, NULL, NULL, 0); + hpath = alloca(PATH_MAX); + len = posix_handle_path(this, fd->inode->gfid, NULL, hpath, PATH_MAX); if (len <= 0) { gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLEPATH_FAILED, "Failed to create handle path, fd=%p, gfid=%s", fd, uuid_utoa(fd->inode->gfid)); return -1; } - - hpath = alloca(len + 256); /* NAME_MAX */ - if (posix_handle_path(this, fd->inode->gfid, NULL, hpath, len) <= 0) { - gf_msg(this->name, GF_LOG_WARNING, 0, P_MSG_HANDLEPATH_FAILED, - "Failed to create handle path, fd=%p, gfid=%s", fd, - uuid_utoa(fd->inode->gfid)); - return -1; - } - len = strlen(hpath); hpath[len] = '/'; @@ -5102,6 +5673,8 @@ posix_readdirp_fill(xlator_t *this, fd_t *fd, gf_dirent_t *entries, continue; } + posix_update_iatt_buf(&stbuf, -1, hpath, dict); + if (!inode) inode = inode_find(itable, stbuf.ia_gfid); @@ -5119,14 +5692,12 @@ posix_readdirp_fill(xlator_t *this, fd_t *fd, gf_dirent_t *entries, if (stbuf.ia_ino) entry->d_ino = stbuf.ia_ino; -#ifdef _DIRENT_HAVE_D_TYPE if (entry->d_type == DT_UNKNOWN && !IA_ISINVAL(stbuf.ia_type)) { /* The platform supports d_type but the underlying filesystem doesn't. We set d_type to the correct value from ia_type */ - entry->d_type = posix_d_type_from_ia_type(stbuf.ia_type); + entry->d_type = gf_d_type_from_ia_type(stbuf.ia_type); } -#endif inode = NULL; } @@ -5263,7 +5834,9 @@ posix_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ssize_t bytes_read = 0; int32_t weak_checksum = 0; int32_t zerofillcheck = 0; - unsigned char md5_checksum[MD5_DIGEST_LENGTH] = {0}; + /* Protocol version 4 uses 32 bytes i.e SHA256_DIGEST_LENGTH, + so this is used. */ + unsigned char md5_checksum[SHA256_DIGEST_LENGTH] = {0}; unsigned char strong_checksum[SHA256_DIGEST_LENGTH] = {0}; unsigned char *checksum = NULL; struct posix_private *priv = NULL; @@ -5388,19 +5961,21 @@ posix_forget(xlator_t *this, inode_t *inode) { int ret = 0; char *unlink_path = NULL; - uint64_t ctx_uint = 0; + uint64_t ctx_uint1 = 0; + uint64_t ctx_uint2 = 0; posix_inode_ctx_t *ctx = NULL; + posix_mdata_t *mdata = NULL; struct posix_private *priv_posix = NULL; priv_posix = (struct posix_private *)this->private; if (!priv_posix) return 0; - ret = inode_ctx_del(inode, this, &ctx_uint); - if (!ctx_uint) - return 0; + ret = inode_ctx_del2(inode, this, &ctx_uint1, &ctx_uint2); + if (!ctx_uint1) + goto check_ctx2; - ctx = (posix_inode_ctx_t *)ctx_uint; + ctx = (posix_inode_ctx_t *)(uintptr_t)ctx_uint1; if (ctx->unlink_flag == GF_UNLINK_TRUE) { POSIX_GET_FILE_UNLINK_PATH(priv_posix->base_path, inode->gfid, @@ -5409,14 +5984,21 @@ posix_forget(xlator_t *this, inode_t *inode) gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED, "Failed to remove gfid :%s", uuid_utoa(inode->gfid)); ret = -1; - goto out; + goto ctx_free; } ret = sys_unlink(unlink_path); } -out: +ctx_free: pthread_mutex_destroy(&ctx->xattrop_lock); pthread_mutex_destroy(&ctx->write_atomic_lock); pthread_mutex_destroy(&ctx->pgfid_lock); GF_FREE(ctx); + +check_ctx2: + if (ctx_uint2) { + mdata = (posix_mdata_t *)(uintptr_t)ctx_uint2; + } + + GF_FREE(mdata); return ret; } diff --git a/xlators/storage/posix/src/posix-inode-handle.h b/xlators/storage/posix/src/posix-inode-handle.h index f225d94912b..36c47f2bebc 100644 --- a/xlators/storage/posix/src/posix-inode-handle.h +++ b/xlators/storage/posix/src/posix-inode-handle.h @@ -12,8 +12,7 @@ #include <limits.h> #include <sys/types.h> -#include "xlator.h" -#include "gf-dirent.h" +#include <glusterfs/gf-dirent.h> #include "posix.h" /* From Open Group Base Specifications Issue 6 */ @@ -45,14 +44,13 @@ #define MAKE_HANDLE_PATH(var, this, gfid, base) \ do { \ - int __len; \ - __len = posix_handle_path(this, gfid, base, NULL, 0); \ - if (__len <= 0) \ - break; \ - var = alloca(__len); \ - __len = posix_handle_path(this, gfid, base, var, __len); \ - if (__len <= 0) \ + int __len = 0; \ + int tot = PATH_MAX; \ + var = alloca(tot); \ + __len = posix_handle_path(this, gfid, base, var, tot); \ + if (__len <= 0) { \ var = NULL; \ + } \ } while (0) /* TODO: it is not a good idea to change a variable which @@ -91,8 +89,10 @@ (loc)->path); \ } \ break; \ + } /* __ret == -1 && errno == ELOOP */ \ + else { \ + op_ret = -1; \ } \ - /* __ret == -1 && errno == ELOOP */ \ } while (0) #define POSIX_ANCESTRY_PATH (1 << 0) diff --git a/xlators/storage/posix/src/posix-mem-types.h b/xlators/storage/posix/src/posix-mem-types.h index b9bd79ba3b8..2253f381ac5 100644 --- a/xlators/storage/posix/src/posix-mem-types.h +++ b/xlators/storage/posix/src/posix-mem-types.h @@ -10,15 +10,12 @@ #ifndef __POSIX_MEM_TYPES_H__ #define __POSIX_MEM_TYPES_H__ -#include "mem-types.h" +#include <glusterfs/mem-types.h> enum gf_posix_mem_types_ { - gf_posix_mt_dir_entry_t = gf_common_mt_end + 1, - gf_posix_mt_posix_fd, + gf_posix_mt_posix_fd = gf_common_mt_end + 1, gf_posix_mt_char, gf_posix_mt_posix_private, - gf_posix_mt_int32_t, - gf_posix_mt_posix_dev_t, gf_posix_mt_trash_path, gf_posix_mt_paiocb, gf_posix_mt_inode_ctx_t, diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h index 14167a5f829..f5bede266da 100644 --- a/xlators/storage/posix/src/posix-messages.h +++ b/xlators/storage/posix/src/posix-messages.h @@ -11,7 +11,7 @@ #ifndef _POSIX_MESSAGES_H_ #define _POSIX_MESSAGES_H_ -#include "glfs-message-id.h" +#include <glusterfs/glfs-message-id.h> /* To add new message IDs, append new identifiers at the end of the list. * @@ -67,6 +67,8 @@ GLFS_MSGID(POSIX, P_MSG_XATTR_FAILED, P_MSG_NULL_GFID, P_MSG_FCNTL_FAILED, P_MSG_ANCESTORY_FAILED, P_MSG_DISK_SPACE_CHECK_FAILED, P_MSG_FALLOCATE_FAILED, P_MSG_STOREMDATA_FAILED, P_MSG_FETCHMDATA_FAILED, P_MSG_GETMDATA_FAILED, - P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE); + P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE, P_MSG_MUTEX_FAILED, + P_MSG_COPY_FILE_RANGE_FAILED, P_MSG_TIMER_DELETE_FAILED, P_MSG_NOMEM, + P_MSG_PSTAT_FAILED, P_MSG_FDSTAT_FAILED); #endif /* !_GLUSTERD_MESSAGES_H_ */ diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c index 3fce0b23d1f..b1889052f11 100644 --- a/xlators/storage/posix/src/posix-metadata.c +++ b/xlators/storage/posix/src/posix-metadata.c @@ -8,14 +8,14 @@ cases as published by the Free Software Foundation. */ -#include "xlator.h" +#include <glusterfs/xlator.h> #include "posix-metadata.h" #include "posix-metadata-disk.h" #include "posix-handle.h" #include "posix-messages.h" -#include "syscall.h" -#include "compat-errno.h" -#include "compat.h" +#include <glusterfs/syscall.h> +#include <glusterfs/compat-errno.h> +#include <glusterfs/compat.h> static int gf_posix_xattr_enotsup_log; @@ -56,22 +56,32 @@ posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in) out->atime.tv_nsec = be64toh(in->atime.tv_nsec); } +void +posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in) +{ + out->ia_ctime = be64toh(in->ctime.tv_sec); + out->ia_ctime_nsec = be64toh(in->ctime.tv_nsec); + + out->ia_mtime = be64toh(in->mtime.tv_sec); + out->ia_mtime_nsec = be64toh(in->mtime.tv_nsec); + + out->ia_atime = be64toh(in->atime.tv_sec); + out->ia_atime_nsec = be64toh(in->atime.tv_nsec); +} + /* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */ static int posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd, inode_t *inode, posix_mdata_t *metadata, int *op_errno) { - size_t size = -1; + size_t size = 256; int op_ret = -1; char *value = NULL; gf_boolean_t fd_based_fop = _gf_false; char gfid_str[64] = {0}; char *real_path = NULL; - char *key = GF_XATTR_MDATA_KEY; - if (!metadata) { - op_ret = -1; goto out; } @@ -79,84 +89,101 @@ posix_fetch_mdata_xattr(xlator_t *this, const char *real_path_arg, int _fd, fd_based_fop = _gf_true; } if (!(fd_based_fop || real_path_arg)) { + GF_VALIDATE_OR_GOTO(this->name, inode, out); MAKE_HANDLE_PATH(real_path, this, inode->gfid, NULL); if (!real_path) { + *op_errno = errno; uuid_utoa_r(inode->gfid, gfid_str); - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_LSTAT_FAILED, + gf_msg(this->name, GF_LOG_WARNING, *op_errno, P_MSG_LSTAT_FAILED, "lstat on gfid %s failed", gfid_str); - op_ret = -1; - *op_errno = errno; goto out; } } + value = GF_MALLOC(size * sizeof(char), gf_posix_mt_char); + if (!value) { + *op_errno = ENOMEM; + goto out; + } + if (fd_based_fop) { - size = sys_fgetxattr(_fd, key, NULL, 0); + size = sys_fgetxattr(_fd, GF_XATTR_MDATA_KEY, value, size); } else if (real_path_arg) { - size = sys_lgetxattr(real_path_arg, key, NULL, 0); + size = sys_lgetxattr(real_path_arg, GF_XATTR_MDATA_KEY, value, size); } else if (real_path) { - size = sys_lgetxattr(real_path, key, NULL, 0); + size = sys_lgetxattr(real_path, GF_XATTR_MDATA_KEY, value, size); } if (size == -1) { *op_errno = errno; + if (value) { + GF_FREE(value); + value = NULL; + } if ((*op_errno == ENOTSUP) || (*op_errno == ENOSYS)) { GF_LOG_OCCASIONALLY(gf_posix_xattr_enotsup_log, this->name, GF_LOG_WARNING, - "Extended attributes not " - "supported (try remounting" - " brick with 'user_xattr' " + "Extended attributes not supported" + " (try remounting brick with 'user xattr' " "flag)"); } else if (*op_errno == ENOATTR || *op_errno == ENODATA) { gf_msg_debug(this->name, 0, - "No such attribute:%s for file %s " - "gfid: %s", - key, + "No such attribute:%s for file %s gfid: %s", + GF_XATTR_MDATA_KEY, real_path ? real_path : (real_path_arg ? real_path_arg : "null"), - uuid_utoa(inode->gfid)); - } else { - gf_msg(this->name, GF_LOG_DEBUG, *op_errno, P_MSG_XATTR_FAILED, - "getxattr failed" - " on %s gfid: %s key: %s ", + inode ? uuid_utoa(inode->gfid) : "null"); + goto out; + } + + if (fd_based_fop) { + size = sys_fgetxattr(_fd, GF_XATTR_MDATA_KEY, NULL, 0); + } else if (real_path_arg) { + size = sys_lgetxattr(real_path_arg, GF_XATTR_MDATA_KEY, NULL, 0); + } else if (real_path) { + size = sys_lgetxattr(real_path, GF_XATTR_MDATA_KEY, NULL, 0); + } + + if (size == -1) { /* give up now and exist with an error */ + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_XATTR_FAILED, + "getxattr failed on %s gfid: %s key: %s ", real_path ? real_path : (real_path_arg ? real_path_arg : "null"), - uuid_utoa(inode->gfid), key); + inode ? uuid_utoa(inode->gfid) : "null", GF_XATTR_MDATA_KEY); + goto out; } - op_ret = -1; - goto out; - } - value = GF_CALLOC(size + 1, sizeof(char), gf_posix_mt_char); - if (!value) { - op_ret = -1; - *op_errno = ENOMEM; - goto out; - } + value = GF_MALLOC(size * sizeof(char), gf_posix_mt_char); + if (!value) { + *op_errno = ENOMEM; + goto out; + } - if (fd_based_fop) { - size = sys_fgetxattr(_fd, key, value, size); - } else if (real_path_arg) { - size = sys_lgetxattr(real_path_arg, key, value, size); - } else if (real_path) { - size = sys_lgetxattr(real_path, key, value, size); - } - if (size == -1) { - op_ret = -1; - *op_errno = errno; - gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED, - "getxattr failed on " - " on %s gfid: %s key: %s ", - real_path ? real_path : (real_path_arg ? real_path_arg : "null"), - uuid_utoa(inode->gfid), key); - goto out; + if (fd_based_fop) { + size = sys_fgetxattr(_fd, GF_XATTR_MDATA_KEY, value, size); + } else if (real_path_arg) { + size = sys_lgetxattr(real_path_arg, GF_XATTR_MDATA_KEY, value, + size); + } else if (real_path) { + size = sys_lgetxattr(real_path, GF_XATTR_MDATA_KEY, value, size); + } + if (size == -1) { + *op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_XATTR_FAILED, + "getxattr failed on %s gfid: %s key: %s ", + real_path ? real_path + : (real_path_arg ? real_path_arg : "null"), + inode ? uuid_utoa(inode->gfid) : "null", GF_XATTR_MDATA_KEY); + goto out; + } } - posix_mdata_from_disk(metadata, (posix_mdata_disk_t *)value); op_ret = 0; out: - GF_FREE(value); + if (value) + GF_FREE(value); return op_ret; } @@ -229,16 +256,29 @@ int __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, inode_t *inode, struct iatt *stbuf) { + uint64_t ctx; posix_mdata_t *mdata = NULL; int ret = -1; int op_errno = 0; - GF_VALIDATE_OR_GOTO(this->name, inode, out); + /* Handle readdirp: inode might be null, time attributes should be served + * from xattr not from backend's file attributes */ + if (inode) { + ret = __inode_ctx_get1(inode, this, &ctx); + if (ret == 0) { + mdata = (posix_mdata_t *)(uintptr_t)ctx; + } + } else { + ret = -1; + } - ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata); if (ret == -1 || !mdata) { mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); if (!mdata) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM, + "Could not allocate mdata. file: %s: gfid: %s", + real_path ? real_path : "null", + inode ? uuid_utoa(inode->gfid) : "null"); ret = -1; goto out; } @@ -251,24 +291,18 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, * is hit when in-memory status is lost due to brick * down scenario */ - __inode_ctx_set1(inode, this, (uint64_t *)&mdata); + if (inode) { + ctx = (uint64_t)(uintptr_t)mdata; + __inode_ctx_set1(inode, this, &ctx); + } } else { /* Failed to get mdata from disk, xattr missing. - * This happens on two cases. - * 1. File is created before ctime is enabled. - * 2. On new file creation. - * - * Do nothing, just return success. It is as - * good as ctime feature is not enabled for this - * file. For files created before ctime is enabled, - * time attributes gets updated into ctime structure - * once the metadata modification fop happens and - * time attributes become consistent eventually. - * For new files, it would obviously get updated - * before the fop completion. + * This happens when the file is created before + * ctime is enabled. */ if (stbuf && op_errno != ENOENT) { ret = 0; + GF_FREE(mdata); goto out; } else { /* This case should not be hit. If it hits, @@ -277,7 +311,8 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, */ gf_msg(this->name, GF_LOG_WARNING, op_errno, P_MSG_FETCHMDATA_FAILED, "file: %s: gfid: %s key:%s ", - real_path ? real_path : "null", uuid_utoa(inode->gfid), + real_path ? real_path : "null", + inode ? uuid_utoa(inode->gfid) : "null", GF_XATTR_MDATA_KEY); GF_FREE(mdata); ret = 0; @@ -296,6 +331,10 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, stbuf->ia_atime = mdata->atime.tv_sec; stbuf->ia_atime_nsec = mdata->atime.tv_nsec; } + /* Not set in inode context, hence free mdata */ + if (!inode) { + GF_FREE(mdata); + } out: return ret; @@ -331,24 +370,129 @@ posix_compare_timespec(struct timespec *first, struct timespec *second) return first->tv_sec - second->tv_sec; } +int +posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, + const char *realpath, + struct mdata_iatt *mdata_iatt, int *op_errno) +{ + uint64_t ctx; + posix_mdata_t *mdata = NULL; + posix_mdata_t imdata = { + 0, + }; + int ret = 0; + gf_boolean_t mdata_already_set = _gf_false; + + GF_VALIDATE_OR_GOTO("posix", this, out); + GF_VALIDATE_OR_GOTO(this->name, inode, out); + + LOCK(&inode->lock); + { + ret = __inode_ctx_get1(inode, this, &ctx); + if (ret == 0 && ctx) { + mdata = (posix_mdata_t *)(uintptr_t)ctx; + mdata_already_set = _gf_true; + } else { + mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); + if (!mdata) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM, + "Could not allocate mdata. gfid: %s", + uuid_utoa(inode->gfid)); + ret = -1; + *op_errno = ENOMEM; + goto unlock; + } + + ret = posix_fetch_mdata_xattr(this, realpath, -1, inode, + (void *)mdata, op_errno); + if (ret == 0) { + /* Got mdata from disk. This is a race, another client + * has healed the xattr during lookup. So set it in inode + * ctx */ + ctx = (uint64_t)(uintptr_t)mdata; + __inode_ctx_set1(inode, this, &ctx); + mdata_already_set = _gf_true; + } else { + *op_errno = 0; + mdata->version = 1; + mdata->flags = 0; + mdata->ctime.tv_sec = mdata_iatt->ia_ctime; + mdata->ctime.tv_nsec = mdata_iatt->ia_ctime_nsec; + mdata->atime.tv_sec = mdata_iatt->ia_atime; + mdata->atime.tv_nsec = mdata_iatt->ia_atime_nsec; + mdata->mtime.tv_sec = mdata_iatt->ia_mtime; + mdata->mtime.tv_nsec = mdata_iatt->ia_mtime_nsec; + + ctx = (uint64_t)(uintptr_t)mdata; + __inode_ctx_set1(inode, this, &ctx); + } + } + + if (mdata_already_set) { + /* Compare and update the larger time */ + imdata.ctime.tv_sec = mdata_iatt->ia_ctime; + imdata.ctime.tv_nsec = mdata_iatt->ia_ctime_nsec; + imdata.atime.tv_sec = mdata_iatt->ia_atime; + imdata.atime.tv_nsec = mdata_iatt->ia_atime_nsec; + imdata.mtime.tv_sec = mdata_iatt->ia_mtime; + imdata.mtime.tv_nsec = mdata_iatt->ia_mtime_nsec; + + if (posix_compare_timespec(&imdata.ctime, &mdata->ctime) > 0) { + mdata->ctime = imdata.ctime; + } + if (posix_compare_timespec(&imdata.mtime, &mdata->mtime) > 0) { + mdata->mtime = imdata.mtime; + } + if (posix_compare_timespec(&imdata.atime, &mdata->atime) > 0) { + mdata->atime = imdata.atime; + } + } + + ret = posix_store_mdata_xattr(this, realpath, -1, inode, mdata); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_STOREMDATA_FAILED, + "gfid: %s key:%s ", uuid_utoa(inode->gfid), + GF_XATTR_MDATA_KEY); + *op_errno = errno; + goto unlock; + } + } +unlock: + UNLOCK(&inode->lock); +out: + return ret; +} + /* posix_set_mdata_xattr updates the posix_mdata_t based on the flag * in inode context and stores it on disk */ static int posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, - inode_t *inode, struct timespec *time, struct iatt *stbuf, - posix_mdata_flag_t *flag, gf_boolean_t update_utime) + inode_t *inode, struct timespec *time, + struct timespec *u_atime, struct timespec *u_mtime, + struct iatt *stbuf, posix_mdata_flag_t *flag, + gf_boolean_t update_utime) { + uint64_t ctx; posix_mdata_t *mdata = NULL; int ret = -1; int op_errno = 0; GF_VALIDATE_OR_GOTO("posix", this, out); GF_VALIDATE_OR_GOTO(this->name, inode, out); + GF_VALIDATE_OR_GOTO(this->name, time, out); + + if (update_utime && (flag->atime && !u_atime) && + (flag->mtime && !u_mtime)) { + goto out; + } LOCK(&inode->lock); { - ret = __inode_ctx_get1(inode, this, (uint64_t *)&mdata); + ret = __inode_ctx_get1(inode, this, &ctx); + if (ret == 0) { + mdata = (posix_mdata_t *)(uintptr_t)ctx; + } if (ret == -1 || !mdata) { /* * Do we need to fetch the data from xattr @@ -357,6 +501,9 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, */ mdata = GF_CALLOC(1, sizeof(posix_mdata_t), gf_posix_mt_mdata_attr); if (!mdata) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, P_MSG_NOMEM, + "Could not allocate mdata. file: %s: gfid: %s", + real_path ? real_path : "null", uuid_utoa(inode->gfid)); ret = -1; goto unlock; } @@ -368,39 +515,32 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, * is hit when in-memory status is lost due to brick * down scenario */ - __inode_ctx_set1(inode, this, (uint64_t *)&mdata); - } else if (ret && time) { + ctx = (uint64_t)(uintptr_t)mdata; + __inode_ctx_set1(inode, this, &ctx); + } else { /* - * This is the first time creating the time - * attr. This happens when you activate this - * feature, and the legacy file will not have - * any xattr set. - * - * New files will create extended attributes. + * This is the first time creating the time attr. This happens + * when you activate this feature. On this code path, only new + * files will create mdata xattr. The legacy files (files + * created before ctime enabled) will not have any xattr set. + * The xattr on legacy file will be set via lookup. */ - /* - * TODO: This is wrong approach, because before - * creating fresh xattr, we should consult - * to all replica and/or distribution set. - * - * We should contact the time management - * xlators, and ask them to create an xattr. - */ - /* We should not be relying on backend file's - * time attributes to load the initial ctime - * time attribute structure. This is incorrect - * as each replica set would have witnessed the - * file creation at different times. - * - * For new file creation, ctime, atime and mtime - * should be same, hence initiate the ctime - * structure with the time from the frame. But - * for the files which were created before ctime - * feature is enabled, this is not accurate but - * still fine as the times would get eventually - * accurate. + /* Don't create xattr with utimes/utimensat, only update if + * present. This otherwise causes issues during inservice + * upgrade. It causes inconsistent xattr values with in replica + * set. The scenario happens during upgrade where clients are + * older versions (without the ctime feature) and the server is + * upgraded to the new version (with the ctime feature which + * is enabled by default). */ + + if (update_utime) { + UNLOCK(&inode->lock); + GF_FREE(mdata); + return 0; + } + mdata->version = 1; mdata->flags = 0; mdata->ctime.tv_sec = time->tv_sec; @@ -410,36 +550,35 @@ posix_set_mdata_xattr(xlator_t *this, const char *real_path, int fd, mdata->mtime.tv_sec = time->tv_sec; mdata->mtime.tv_nsec = time->tv_nsec; - __inode_ctx_set1(inode, this, (uint64_t *)&mdata); + ctx = (uint64_t)(uintptr_t)mdata; + __inode_ctx_set1(inode, this, &ctx); } } - /* Earlier, mdata was updated only if the existing time is less - * than the time to be updated. This would fail the scenarios - * where mtime can be set to any time using the syscall. Hence - * just updating without comparison. But the ctime is not - * allowed to changed to older date. - */ - - if (flag->ctime && posix_compare_timespec(time, &mdata->ctime) > 0) { - mdata->ctime = *time; - } - /* In distributed systems, there could be races with fops * updating mtime/atime which could result in different * mtime/atime for same file. So this makes sure, only the * highest time is retained. If the mtime/atime update comes * from the explicit utime syscall, it is allowed to set to - * previous time + * previous or future time but the ctime is always set to + * current time. */ if (update_utime) { + if (flag->ctime && + posix_compare_timespec(time, &mdata->ctime) > 0) { + mdata->ctime = *time; + } if (flag->mtime) { - mdata->mtime = *time; + mdata->mtime = *u_mtime; } if (flag->atime) { - mdata->atime = *time; + mdata->atime = *u_atime; } } else { + if (flag->ctime && + posix_compare_timespec(time, &mdata->ctime) > 0) { + mdata->ctime = *time; + } if (flag->mtime && posix_compare_timespec(time, &mdata->mtime) > 0) { mdata->mtime = *time; @@ -492,15 +631,22 @@ out: */ void posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, - inode_t *inode, struct iatt *stbuf, int valid) + inode_t *inode, struct timespec *ctime, + struct iatt *stbuf, int valid) { int32_t ret = 0; #if defined(HAVE_UTIMENSAT) - struct timespec tv = { + struct timespec tv_atime = { + 0, + }; + struct timespec tv_mtime = { 0, }; #else - struct timeval tv = { + struct timeval tv_atime = { + 0, + }; + struct timeval tv_mtime = { 0, }; #endif @@ -512,37 +658,35 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, priv = this->private; + /* NOTE: + * This routine (utimes) is intentionally allowed for all internal and + * external clients even if ctime is not set. This is because AFR and + * WORM uses time attributes for it's internal operations + */ if (inode && priv->ctime) { if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) { - tv.tv_sec = stbuf->ia_atime; - SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_atime_nsec); + tv_atime.tv_sec = stbuf->ia_atime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_atime, stbuf->ia_atime_nsec); - flag.ctime = 0; - flag.mtime = 0; + flag.ctime = 1; flag.atime = 1; - ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv, NULL, - &flag, _gf_true); - if (ret) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, - "posix set mdata atime failed on file:" - " %s gfid:%s", - real_path, uuid_utoa(inode->gfid)); - } } if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) { - tv.tv_sec = stbuf->ia_mtime; - SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_mtime_nsec); + tv_mtime.tv_sec = stbuf->ia_mtime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_mtime, stbuf->ia_mtime_nsec); flag.ctime = 1; flag.mtime = 1; - flag.atime = 0; + } - ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv, NULL, - &flag, _gf_true); + if (flag.mtime || flag.atime) { + ret = posix_set_mdata_xattr(this, real_path, -1, inode, ctime, + &tv_atime, &tv_mtime, NULL, &flag, + _gf_true); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, - "posix set mdata mtime failed on file:" + "posix set mdata atime failed on file:" " %s gfid:%s", real_path, uuid_utoa(inode->gfid)); } @@ -551,6 +695,48 @@ posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, return; } +/* posix_update_ctime_in_mdata updates the posix_mdata_t when ctime needs + * to be modified + */ +void +posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd, + inode_t *inode, struct timespec *ctime, + struct iatt *stbuf, int valid) +{ + int32_t ret = 0; +#if defined(HAVE_UTIMENSAT) + struct timespec tv_ctime = { + 0, + }; +#else + struct timeval tv_ctime = { + 0, + }; +#endif + posix_mdata_flag_t flag = { + 0, + }; + + struct posix_private *priv = NULL; + priv = this->private; + + if (inode && priv->ctime) { + tv_ctime.tv_sec = stbuf->ia_ctime; + SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv_ctime, stbuf->ia_ctime_nsec); + flag.ctime = 1; + + ret = posix_set_mdata_xattr(this, real_path, -1, inode, &tv_ctime, NULL, + NULL, NULL, &flag, _gf_true); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata atime failed on file:" + " %s gfid:%s", + real_path, uuid_utoa(inode->gfid)); + } + } + return; +} + static void posix_get_mdata_flag(uint64_t flags, posix_mdata_flag_t *flag) { @@ -604,17 +790,9 @@ posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { goto out; } - - if (frame->root->ctime.tv_sec == 0) { - gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, - "posix set mdata failed, No ctime : %s gfid:%s", real_path, - inode ? uuid_utoa(inode->gfid) : "No inode"); - goto out; - } - ret = posix_set_mdata_xattr(this, real_path, fd, inode, - &frame->root->ctime, stbuf, &flag, - _gf_false); + &frame->root->ctime, NULL, NULL, stbuf, + &flag, _gf_false); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, "posix set mdata failed on file: %s gfid:%s", real_path, @@ -644,8 +822,8 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, goto out; } ret = posix_set_mdata_xattr(this, real_path, fd, inode, - &frame->root->ctime, stbuf, &flag, - _gf_false); + &frame->root->ctime, NULL, NULL, stbuf, + &flag, _gf_false); if (ret) { gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, "posix set mdata failed on file: %s gfid:%s", real_path, @@ -655,3 +833,84 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, out: return; } + +void +posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, + const char *real_path_in, int fd_in, inode_t *inode_in, + struct iatt *stbuf_in, const char *real_path_out, + int fd_out, inode_t *inode_out, struct iatt *stbuf_out) +{ + posix_mdata_flag_t flag = { + 0, + }; + posix_mdata_flag_t flag_dup = { + 0, + }; + int ret = 0; + struct posix_private *priv = NULL; + char in_uuid_str[64] = {0}, out_uuid_str[64] = {0}; + + priv = this->private; + + if (priv->ctime) { + (void)posix_get_mdata_flag(frame->root->flags, &flag); + if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { + goto out; + } + + if (frame->root->ctime.tv_sec == 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed, No ctime : in: %s gfid_in:%s " + "out: %s gfid_out:%s", + real_path_in, + (inode_in ? uuid_utoa_r(inode_in->gfid, in_uuid_str) + : "No inode"), + real_path_out, + (inode_out ? uuid_utoa_r(inode_out->gfid, out_uuid_str) + : "No inode")); + goto out; + } + + flag_dup = flag; + + /* + * For the destination file, no need to update atime. + * It got modified. Hence the things that need to be + * changed are mtime and ctime (provided the utime + * xlator from the client has set those flags, which + * are just copied to flag_dup). + */ + if (flag.atime) + flag_dup.atime = 0; + + ret = posix_set_mdata_xattr(this, real_path_out, fd_out, inode_out, + &frame->root->ctime, NULL, NULL, stbuf_out, + &flag_dup, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path_out, + inode_out ? uuid_utoa(inode_out->gfid) : "No inode"); + } + + /* + * For the source file, no need to change the mtime and ctime. + * For source file, it is only read operation. So, if at all + * anything needs to be updated, it is only the atime. + */ + if (flag.atime) + flag_dup.atime = flag.atime; + flag_dup.mtime = 0; + flag_dup.ctime = 0; + + ret = posix_set_mdata_xattr(this, real_path_in, fd_out, inode_out, + &frame->root->ctime, NULL, NULL, stbuf_out, + &flag_dup, _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path_in, + inode_in ? uuid_utoa(inode_in->gfid) : "No inode"); + } + } +out: + return; +} diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h index e1b549d55a1..d37014af93e 100644 --- a/xlators/storage/posix/src/posix-metadata.h +++ b/xlators/storage/posix/src/posix-metadata.h @@ -15,13 +15,15 @@ /* In memory representation posix metadata xattr */ typedef struct { - /* version of structure, bumped up if any new member is added */ - uint8_t version; /* flags indicates valid fields in the structure */ uint64_t flags; struct timespec ctime; struct timespec mtime; struct timespec atime; + /* version of structure, bumped up if any new member is added */ + uint8_t version; + + char _pad[7]; /* manual padding */ } posix_mdata_t; typedef struct { @@ -40,7 +42,12 @@ __posix_get_mdata_xattr(xlator_t *this, const char *real_path, int _fd, inode_t *inode, struct iatt *stbuf); void posix_update_utime_in_mdata(xlator_t *this, const char *real_path, int fd, - inode_t *inode, struct iatt *stbuf, int valid); + inode_t *inode, struct timespec *ctime, + struct iatt *stbuf, int valid); +void +posix_update_ctime_in_mdata(xlator_t *this, const char *real_path, int fd, + inode_t *inode, struct timespec *ctime, + struct iatt *stbuf, int valid); void posix_set_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, int fd, inode_t *inode, struct iatt *stbuf); @@ -48,5 +55,17 @@ void posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, int fd, inode_t *inode, struct iatt *stbuf); +void +posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, + const char *real_path_in, int fd_in, inode_t *inode_in, + struct iatt *stbuf_in, const char *read_path_put, + int fd_out, inode_t *inode_out, struct iatt *stbuf_out); +int +posix_set_mdata_xattr_legacy_files(xlator_t *this, inode_t *inode, + const char *realpath, + struct mdata_iatt *mdata_iatt, + int *op_errno); +void +posix_mdata_iatt_from_disk(struct mdata_iatt *out, posix_mdata_disk_t *in); #endif /* _POSIX_METADATA_H */ diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 61aa14d2827..42b965434b9 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -14,13 +14,13 @@ #define _GNU_SOURCE #endif -#include "xlator.h" +#include <glusterfs/xlator.h> #include "posix.h" -class_methods_t class_methods = {.init = posix_init, - .fini = posix_fini, - .reconfigure = posix_reconfigure, - .notify = posix_notify}; +int32_t +mem_acct_init(xlator_t *this); + +extern struct volume_options posix_options[]; struct xlator_dumpops dumpops = { .priv = posix_priv, @@ -76,8 +76,26 @@ struct xlator_fops fops = { .seek = posix_seek, .lease = posix_lease, .put = posix_put, + .copy_file_range = posix_copy_file_range, +}; + +struct xlator_cbks cbks = { + .release = posix_release, + .releasedir = posix_releasedir, + .forget = posix_forget, }; -struct xlator_cbks cbks = {.release = posix_release, - .releasedir = posix_releasedir, - .forget = posix_forget}; +xlator_api_t xlator_api = { + .init = posix_init, + .fini = posix_fini, + .notify = posix_notify, + .reconfigure = posix_reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = posix_options, + .identifier = "posix", + .category = GF_MAINTAINED, +}; diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index 4c32509cf04..b8db146eef2 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -16,12 +16,8 @@ #include <dirent.h> #include <time.h> -#ifdef linux -#ifdef __GLIBC__ +#ifdef HAVE_SET_FSID #include <sys/fsuid.h> -#else -#include <unistd.h> -#endif #endif #ifdef HAVE_SYS_XATTR_H @@ -32,12 +28,10 @@ #include <sys/extattr.h> #endif -#include "xlator.h" -#include "compat.h" -#include "timer.h" +#include <glusterfs/compat.h> +#include <glusterfs/timer.h> #include "posix-mem-types.h" -#include "posix-handle.h" -#include "call-stub.h" +#include <glusterfs/call-stub.h> #ifdef HAVE_LIBAIO #include <libaio.h> @@ -53,21 +47,21 @@ #define ACL_BUFFER_MAX 4096 /* size of character buffer */ #define DHT_LINKTO "trusted.glusterfs.dht.linkto" -/* - * TIER_MODE need to be changed when we stack tiers - */ -#define TIER_LINKTO "trusted.tier.tier-dht.linkto" #define POSIX_GFID_HANDLE_SIZE(base_path_len) \ (base_path_len + SLEN("/") + SLEN(GF_HIDDEN_PATH) + SLEN("/") + \ SLEN("00/") + SLEN("00/") + SLEN(UUID0_STR) + 1) /* '\0' */; + +#define POSIX_GFID_HANDLE_RELSIZE \ + SLEN("../") + SLEN("../") + SLEN("00/") + SLEN("00/") + SLEN(UUID0_STR) + 1; + #define GF_UNLINK_TRUE 0x0000000000000001 #define GF_UNLINK_FALSE 0x0000000000000000 #define DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out) \ do { \ if (frame->root->pid >= 0 && priv->disk_space_full && \ - !dict_get(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { \ + !dict_get_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) { \ op_ret = -1; \ op_errno = ENOSPC; \ gf_msg_debug("posix", ENOSPC, \ @@ -95,9 +89,8 @@ #endif #define GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xattr_req, op_ret, \ - op_errno, out) \ + op_errno, _uuid_req, out) \ do { \ - uuid_t _uuid_req; \ int _ret = 0; \ /* TODO: Remove pid check once trash implements client side \ * logic to assign gfid for entry creations inside .trashcan \ @@ -107,9 +100,7 @@ _ret = dict_get_gfuuid(xattr_req, "gfid-req", &_uuid_req); \ if (_ret) { \ gf_msg(this->name, GF_LOG_ERROR, EINVAL, P_MSG_NULL_GFID, \ - "failed to get the gfid from" \ - " dict for %s", \ - loc->path); \ + "failed to get the gfid from dict for %s", loc->path); \ op_ret = -1; \ op_errno = EINVAL; \ goto out; \ @@ -128,12 +119,14 @@ */ struct posix_fd { - int fd; /* fd returned by the kernel */ - int32_t flags; /* flags for open/creat */ - DIR *dir; /* handle returned by the kernel */ - off_t dir_eof; /* offset at dir EOF */ - int odirect; + int fd; /* fd returned by the kernel */ + int32_t flags; /* flags for open/creat */ + DIR *dir; /* handle returned by the kernel */ + off_t dir_eof; /* offset at dir EOF */ struct list_head list; /* to add to the janitor list */ + int odirect; + xlator_t *xl; + char _pad[4]; /* manual padding */ }; struct posix_private { @@ -144,69 +137,38 @@ struct posix_private { gf_lock_t lock; char *hostname; - /* Statistics, provides activity of the server */ - - struct timeval prev_fetch_time; - struct timeval init_time; time_t last_landfill_check; - int32_t janitor_sleep_duration; - struct list_head janitor_fds; - pthread_cond_t janitor_cond; - pthread_mutex_t janitor_lock; - - int64_t read_value; /* Total read, from init */ - int64_t write_value; /* Total write, from init */ - int64_t nr_files; - /* - In some cases, two exported volumes may reside on the same - partition on the server. Sending statvfs info for both - the volumes will lead to erroneous df output at the client, - since free space on the partition will be counted twice. - In such cases, user can disable exporting statvfs info - on one of the volumes by setting this option. - */ - gf_boolean_t export_statfs; + gf_atomic_t read_value; /* Total read, from init */ + gf_atomic_t write_value; /* Total write, from init */ - gf_boolean_t o_direct; /* always open files in O_DIRECT mode */ + /* janitor task which cleans up /.trash (created by replicate) */ + struct gf_tw_timer_list *janitor; - /* - decide whether posix_unlink does open (file), unlink (file), close (fd) - instead of just unlink (file). with the former approach there is no - lockout of access to parent directory during removal of very large files - for the entire duration of freeing of data blocks. - */ - gf_boolean_t background_unlink; - - /* janitor thread which cleans up /.trash (created by replicate) */ - pthread_t janitor; - gf_boolean_t janitor_present; char *trash_path; /* lock for brick dir */ - DIR *mount_lock; + int mount_lock; struct stat handledir; /* uuid of glusterd that swapned the brick process */ uuid_t glusterd_uuid; - gf_boolean_t aio_configured; - gf_boolean_t aio_init_done; - gf_boolean_t aio_capable; #ifdef HAVE_LIBAIO io_context_t ctxp; pthread_t aiothread; #endif - /* node-uuid in pathinfo xattr */ - gf_boolean_t node_uuid_pathinfo; - pthread_t fsyncer; struct list_head fsyncs; pthread_mutex_t fsync_mutex; pthread_cond_t fsync_cond; + pthread_mutex_t janitor_mutex; + pthread_cond_t janitor_cond; + pthread_cond_t fd_cond; int fsync_queue_count; + int32_t janitor_sleep_duration; enum { BATCH_NONE = 0, @@ -217,8 +179,6 @@ struct posix_private { } batch_fsync_mode; uint32_t batch_fsync_delay_usec; - gf_boolean_t update_pgfid_nlinks; - gf_boolean_t gfid2path; char gfid2path_sep[8]; /* seconds to sleep between health checks */ @@ -226,12 +186,10 @@ struct posix_private { /* seconds to sleep to wait for aio write finish for health checks */ uint32_t health_check_timeout; pthread_t health_check; - gf_boolean_t health_check_active; - uint32_t disk_reserve; - uint32_t disk_space_full; + double disk_reserve; pthread_t disk_space_check; - gf_boolean_t disk_space_check_active; + uint32_t disk_space_full; #ifdef GF_DARWIN_HOST_OS enum { @@ -246,9 +204,6 @@ struct posix_private { same backend. Very much usable in brick-splitting feature. */ int32_t shared_brick_count; - /* This option is used for either to call a landfill_purge or not */ - gf_boolean_t disable_landfill_purge; - /*Option to set mode bit permission that will always be set on file/directory. */ mode_t force_create_mode; @@ -256,9 +211,47 @@ struct posix_private { mode_t create_mask; mode_t create_directory_mask; uint32_t max_hardlinks; + int32_t arrdfd[256]; + int dirfd; + + /* This option is used for either to call a landfill_purge or not */ + gf_boolean_t disable_landfill_purge; gf_boolean_t fips_mode_rchecksum; gf_boolean_t ctime; + gf_boolean_t janitor_task_stop; + + gf_boolean_t disk_space_check_active; + char disk_unit; + gf_boolean_t health_check_active; + gf_boolean_t update_pgfid_nlinks; + gf_boolean_t gfid2path; + /* node-uuid in pathinfo xattr */ + gf_boolean_t node_uuid_pathinfo; + /* + In some cases, two exported volumes may reside on the same + partition on the server. Sending statvfs info for both + the volumes will lead to erroneous df output at the client, + since free space on the partition will be counted twice. + + In such cases, user can disable exporting statvfs info + on one of the volumes by setting this option. + */ + gf_boolean_t export_statfs; + + gf_boolean_t o_direct; /* always open files in O_DIRECT mode */ + + /* + decide whether posix_unlink does open (file), unlink (file), close (fd) + instead of just unlink (file). with the former approach there is no + lockout of access to parent directory during removal of very large files + for the entire duration of freeing of data blocks. + */ + gf_boolean_t background_unlink; + gf_boolean_t aio_configured; + gf_boolean_t aio_init_done; + gf_boolean_t aio_capable; + uint32_t rel_fdcount; }; typedef struct { @@ -272,9 +265,11 @@ typedef struct { fd_t *fd; int fdnum; int flags; - int32_t op_errno; char *list; size_t list_size; + int32_t op_errno; + + char _pad[4]; /* manual padding */ } posix_xattr_filler_t; typedef struct { @@ -298,7 +293,7 @@ typedef struct { char gfid_str[64] = {0}; \ uuid_utoa_r(gfid, gfid_str); \ path_len = strlen(base_path) + 1 + SLEN(GF_UNLINK_PATH) + 1 + \ - strlen(gfid_str) + 1; \ + UUID_CANONICAL_FORM_LEN + 1; \ unlink_path = alloca(path_len); \ if (!unlink_path) { \ gf_msg("posix", GF_LOG_ERROR, ENOMEM, P_MSG_UNLINK_FAILED, \ @@ -334,24 +329,26 @@ posix_istat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *basename, int posix_pstat(xlator_t *this, inode_t *inode, uuid_t gfid, const char *real_path, struct iatt *iatt, gf_boolean_t inode_locked); + dict_t * posix_xattr_fill(xlator_t *this, const char *path, loc_t *loc, fd_t *fd, int fdnum, dict_t *xattr, struct iatt *buf); int -posix_handle_pair(xlator_t *this, const char *real_path, char *key, +posix_handle_pair(xlator_t *this, loc_t *loc, const char *real_path, char *key, data_t *value, int flags, struct iatt *stbuf); int posix_fhandle_pair(call_frame_t *frame, xlator_t *this, int fd, char *key, data_t *value, int flags, struct iatt *stbuf, fd_t *_fd); void -posix_spawn_janitor_thread(xlator_t *this); +posix_janitor_timer_start(xlator_t *this); int posix_acl_xattr_set(xlator_t *this, const char *path, dict_t *xattr_req); int posix_gfid_heal(xlator_t *this, const char *path, loc_t *loc, dict_t *xattr_req); int -posix_entry_create_xattr_set(xlator_t *this, const char *path, dict_t *dict); +posix_entry_create_xattr_set(xlator_t *this, loc_t *loc, const char *path, + dict_t *dict); int posix_fd_ctx_get(fd_t *fd, xlator_t *this, struct posix_fd **pfd, @@ -365,10 +362,10 @@ posix_special_xattr(char **pattern, char *key); void __posix_fd_set_odirect(fd_t *fd, struct posix_fd *pfd, int opflags, off_t offset, size_t size); -void +int posix_spawn_health_check_thread(xlator_t *this); -void +int posix_spawn_disk_space_check_thread(xlator_t *this); void * @@ -387,10 +384,7 @@ void posix_gfid_unset(xlator_t *this, dict_t *xdata); int -posix_pacl_set(const char *path, const char *key, const char *acl_s); - -int -posix_pacl_get(const char *path, const char *key, char **acl_s); +posix_pacl_get(const char *path, int fdnum, const char *key, char **acl_s); int32_t posix_get_objectsignature(char *, dict_t *); @@ -638,6 +632,11 @@ posix_put(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata); int32_t +posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, dict_t *xdata); + +int32_t posix_set_mode_in_dict(dict_t *in_dict, dict_t *out_dict, struct iatt *in_stbuf); @@ -656,5 +655,19 @@ int posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, struct iatt *buf, const char *realpath, dict_t *xattr_req, dict_t **xattr_rsp, gf_boolean_t ignore_failure); +int +posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno); + +int +posix_spawn_ctx_janitor_thread(xlator_t *this); + +void +posix_update_iatt_buf(struct iatt *buf, int fd, char *loc, dict_t *xdata); + +gf_boolean_t +posix_is_layout_stale(dict_t *xdata, char *par_path, xlator_t *this); + +int +posix_delete_user_xattr(dict_t *dict, char *k, data_t *v, void *data); #endif /* _POSIX_H */ |
