diff options
Diffstat (limited to 'xlators/storage/posix/src/posix-common.c')
| -rw-r--r-- | xlators/storage/posix/src/posix-common.c | 355 |
1 files changed, 275 insertions, 80 deletions
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c index 9c9d52e3609..f10722ec3fb 100644 --- a/xlators/storage/posix/src/posix-common.c +++ b/xlators/storage/posix/src/posix-common.c @@ -26,7 +26,6 @@ #include <signal.h> #include <sys/uio.h> #include <unistd.h> -#include <ftw.h> #ifndef GF_BSD_HOST_OS #include <alloca.h> @@ -36,30 +35,22 @@ #include <fcntl.h> #endif /* HAVE_LINKAT */ -#include "glusterfs.h" -#include "checksum.h" -#include "dict.h" -#include "logging.h" -#include "posix.h" #include "posix-inode-handle.h" -#include "xlator.h" -#include "defaults.h" -#include "common-utils.h" -#include "compat-errno.h" -#include "compat.h" -#include "byte-order.h" -#include "syscall.h" -#include "statedump.h" -#include "locking.h" -#include "timer.h" +#include <glusterfs/compat-errno.h> +#include <glusterfs/compat.h> +#include <glusterfs/byte-order.h> +#include <glusterfs/syscall.h> +#include <glusterfs/statedump.h> +#include <glusterfs/locking.h> +#include <glusterfs/timer.h> #include "glusterfs3-xdr.h" -#include "hashfn.h" #include "posix-aio.h" -#include "glusterfs-acl.h" +#include <glusterfs/glusterfs-acl.h> #include "posix-messages.h" -#include "events.h" +#include <glusterfs/events.h> #include "posix-gfid-path.h" -#include "compat-uuid.h" +#include <glusterfs/compat-uuid.h> +#include "timer-wheel.h" extern char *marker_xattrs[]; #define ALIGN_SIZE 4096 @@ -110,13 +101,13 @@ posix_priv(xlator_t *this) struct posix_private *priv = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN]; - (void)snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, - this->name); - gf_proc_dump_add_section(key_prefix); - if (!this) return 0; + (void)snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, + this->name); + gf_proc_dump_add_section("%s", key_prefix); + priv = this->private; if (!priv) @@ -124,9 +115,9 @@ posix_priv(xlator_t *this) gf_proc_dump_write("base_path", "%s", priv->base_path); gf_proc_dump_write("base_path_length", "%d", priv->base_path_length); - gf_proc_dump_write("max_read", "%d", priv->read_value); - gf_proc_dump_write("max_write", "%d", priv->write_value); - gf_proc_dump_write("nr_files", "%ld", priv->nr_files); + gf_proc_dump_write("max_read", "%" PRId64, GF_ATOMIC_GET(priv->read_value)); + gf_proc_dump_write("max_write", "%" PRId64, + GF_ATOMIC_GET(priv->write_value)); return 0; } @@ -143,11 +134,60 @@ posix_inode(xlator_t *this) int32_t posix_notify(xlator_t *this, int32_t event, void *data, ...) { + xlator_t *victim = data; + struct posix_private *priv = this->private; + int ret = 0; + struct timespec sleep_till = { + 0, + }; + glusterfs_ctx_t *ctx = this->ctx; + switch (event) { case GF_EVENT_PARENT_UP: { - /* Tell the parent that posix xlator is up */ + /* Notify the parent that posix xlator is up */ default_notify(this, GF_EVENT_CHILD_UP, data); } break; + + case GF_EVENT_PARENT_DOWN: { + if (!victim->cleanup_starting) + break; + + if (priv->janitor) { + pthread_mutex_lock(&priv->janitor_mutex); + { + priv->janitor_task_stop = _gf_true; + ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, + priv->janitor); + if (!ret) { + timespec_now_realtime(&sleep_till); + sleep_till.tv_sec += 1; + /* Wait to set janitor_task flag to _gf_false by + * janitor_task_done */ + while (priv->janitor_task_stop) { + (void)pthread_cond_timedwait(&priv->janitor_cond, + &priv->janitor_mutex, + &sleep_till); + timespec_now_realtime(&sleep_till); + sleep_till.tv_sec += 1; + } + } + } + pthread_mutex_unlock(&priv->janitor_mutex); + GF_FREE(priv->janitor); + } + priv->janitor = NULL; + pthread_mutex_lock(&ctx->fd_lock); + { + while (priv->rel_fdcount > 0) { + pthread_cond_wait(&priv->fd_cond, &ctx->fd_lock); + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s", + victim->name); + default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data); + } break; default: /* */ break; @@ -333,15 +373,31 @@ posix_reconfigure(xlator_t *this, dict_t *options) " fallback to <hostname>:<export>"); } - GF_OPTION_RECONF("reserve", priv->disk_reserve, options, uint32, out); - if (priv->disk_reserve) - posix_spawn_disk_space_check_thread(this); + GF_OPTION_RECONF("reserve", priv->disk_reserve, options, percent_or_size, + out); + /* option can be any one of percent or bytes */ + priv->disk_unit = 0; + if (priv->disk_reserve < 100.0) + priv->disk_unit = 'p'; + + if (priv->disk_reserve) { + ret = posix_spawn_disk_space_check_thread(this); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, + "Getting disk space check from thread failed"); + goto out; + } + } GF_OPTION_RECONF("health-check-interval", priv->health_check_interval, options, uint32, out); GF_OPTION_RECONF("health-check-timeout", priv->health_check_timeout, options, uint32, out); - posix_spawn_health_check_thread(this); + if (priv->health_check_interval) { + ret = posix_spawn_health_check_thread(this); + if (ret) + goto out; + } GF_OPTION_RECONF("shared-brick-count", priv->shared_brick_count, options, int32, out); @@ -496,6 +552,30 @@ posix_create_unlink_dir(xlator_t *this) return 0; } +int +posix_create_open_directory_based_fd(xlator_t *this, int pdirfd, char *dir_name) +{ + int ret = -1; + + ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0); + if (ret < 0 && errno == ENOENT) { + ret = sys_mkdirat(pdirfd, dir_name, 0700); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "Creating directory %s failed", dir_name); + goto out; + } + ret = sys_openat(pdirfd, dir_name, (O_DIRECTORY | O_RDONLY), 0); + if (ret < 0 && errno != EEXIST) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error mkdir hash-1 %s ", dir_name); + goto out; + } + } +out: + return ret; +} + /** * init - */ @@ -522,7 +602,7 @@ posix_init(xlator_t *this) uuid_t gfid = { 0, }; - uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; + static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}; char *guuid = NULL; int32_t uid = -1; int32_t gid = -1; @@ -532,6 +612,15 @@ posix_init(xlator_t *this) int force_directory = -1; int create_mask = -1; int create_directory_mask = -1; + char dir_handle[PATH_MAX] = { + 0, + }; + int i; + char fhash[4] = { + 0, + }; + int hdirfd = -1; + char value; dir_data = dict_get(this->options, "directory"); @@ -572,7 +661,12 @@ posix_init(xlator_t *this) } _private->base_path = gf_strdup(dir_data->data); - _private->base_path_length = strlen(_private->base_path); + _private->base_path_length = dir_data->len - 1; + + _private->dirfd = -1; + _private->mount_lock = -1; + for (i = 0; i < 256; i++) + _private->arrdfd[i] = -1; ret = dict_get_str(this->options, "hostname", &_private->hostname); if (ret) { @@ -588,16 +682,11 @@ posix_init(xlator_t *this) } /* Check for Extended attribute support, if not present, log it */ - op_ret = sys_lsetxattr(dir_data->data, "trusted.glusterfs.test", "working", - 8, 0); - if (op_ret != -1) { - ret = sys_lremovexattr(dir_data->data, "trusted.glusterfs.test"); - if (ret) { - gf_msg(this->name, GF_LOG_DEBUG, errno, P_MSG_INVALID_OPTION, - "failed to remove xattr: " - "trusted.glusterfs.test"); - } - } else { + size = sys_lgetxattr(dir_data->data, "user.x", &value, sizeof(value)); + + if ((size == -1) && (errno == EOPNOTSUPP)) { + gf_msg(this->name, GF_LOG_DEBUG, 0, P_MSG_XDATA_GETXATTR, + "getxattr returned %zd", size); tmp_data = dict_get(this->options, "mandate-attribute"); if (tmp_data) { if (gf_string2boolean(tmp_data->data, &tmp_bool) == -1) { @@ -757,6 +846,8 @@ posix_init(xlator_t *this) } LOCK_INIT(&_private->lock); + GF_ATOMIC_INIT(_private->read_value, 0); + GF_ATOMIC_INIT(_private->write_value, 0); _private->export_statfs = 1; tmp_data = dict_get(this->options, "export-statfs-size"); @@ -844,8 +935,9 @@ posix_init(xlator_t *this) /* performing open dir on brick dir locks the brick dir * and prevents it from being unmounted */ - _private->mount_lock = sys_opendir(dir_data->data); - if (!_private->mount_lock) { + _private->mount_lock = sys_open(dir_data->data, (O_DIRECTORY | O_RDONLY), + 0); + if (_private->mount_lock < 0) { ret = -1; op_errno = errno; gf_msg(this->name, GF_LOG_ERROR, 0, P_MSG_DIR_OPERATION_FAILED, @@ -889,6 +981,28 @@ posix_init(xlator_t *this) } this->private = (void *)_private; + snprintf(dir_handle, sizeof(dir_handle), "%s/%s", _private->base_path, + GF_HIDDEN_PATH); + hdirfd = posix_create_open_directory_based_fd(this, _private->mount_lock, + dir_handle); + if (hdirfd < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error open directory failed for dir %s", dir_handle); + ret = -1; + goto out; + } + _private->dirfd = hdirfd; + for (i = 0; i < 256; i++) { + snprintf(fhash, sizeof(fhash), "%02x", i); + _private->arrdfd[i] = posix_create_open_directory_based_fd(this, hdirfd, + fhash); + if (_private->arrdfd[i] < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_HANDLE_CREATE, + "error openat failed for file %s", fhash); + ret = -1; + goto out; + } + } op_ret = posix_handle_init(this); if (op_ret == -1) { @@ -946,27 +1060,45 @@ posix_init(xlator_t *this) _private->disk_space_check_active = _gf_false; _private->disk_space_full = 0; - GF_OPTION_INIT("reserve", _private->disk_reserve, uint32, out); - if (_private->disk_reserve) - posix_spawn_disk_space_check_thread(this); + + GF_OPTION_INIT("reserve", _private->disk_reserve, percent_or_size, out); + + /* option can be any one of percent or bytes */ + _private->disk_unit = 0; + if (_private->disk_reserve < 100.0) + _private->disk_unit = 'p'; + + if (_private->disk_reserve) { + ret = posix_spawn_disk_space_check_thread(this); + if (ret) { + gf_msg(this->name, GF_LOG_INFO, 0, P_MSG_DISK_SPACE_CHECK_FAILED, + "Getting disk space check from thread failed "); + goto out; + } + } _private->health_check_active = _gf_false; GF_OPTION_INIT("health-check-interval", _private->health_check_interval, uint32, out); GF_OPTION_INIT("health-check-timeout", _private->health_check_timeout, uint32, out); - if (_private->health_check_interval) - posix_spawn_health_check_thread(this); - - pthread_mutex_init(&_private->janitor_lock, NULL); - pthread_cond_init(&_private->janitor_cond, NULL); - INIT_LIST_HEAD(&_private->janitor_fds); - - posix_spawn_janitor_thread(this); + if (_private->health_check_interval) { + ret = posix_spawn_health_check_thread(this); + if (ret) + goto out; + } + posix_janitor_timer_start(this); pthread_mutex_init(&_private->fsync_mutex, NULL); pthread_cond_init(&_private->fsync_cond, NULL); + pthread_mutex_init(&_private->janitor_mutex, NULL); + pthread_cond_init(&_private->janitor_cond, NULL); + pthread_cond_init(&_private->fd_cond, NULL); INIT_LIST_HEAD(&_private->fsyncs); + _private->rel_fdcount = 0; + ret = posix_spawn_ctx_janitor_thread(this); + if (ret) + goto out; ret = gf_thread_create(&_private->fsyncer, NULL, posix_fsyncer, this, "posixfsy"); @@ -1040,9 +1172,27 @@ posix_init(xlator_t *this) out); GF_OPTION_INIT("ctime", _private->ctime, bool, out); + out: if (ret) { if (_private) { + if (_private->dirfd >= 0) { + sys_close(_private->dirfd); + _private->dirfd = -1; + } + + for (i = 0; i < 256; i++) { + if (_private->arrdfd[i] >= 0) { + sys_close(_private->arrdfd[i]); + _private->arrdfd[i] = -1; + } + } + /*unlock brick dir*/ + if (_private->mount_lock >= 0) { + (void)sys_close(_private->mount_lock); + _private->mount_lock = -1; + } + GF_FREE(_private->base_path); GF_FREE(_private->hostname); @@ -1061,36 +1211,84 @@ void posix_fini(xlator_t *this) { struct posix_private *priv = this->private; + gf_boolean_t health_check = _gf_false; + glusterfs_ctx_t *ctx = this->ctx; + uint32_t count; + int ret = 0; + int i = 0; + if (!priv) return; LOCK(&priv->lock); - if (priv->health_check_active) { + { + health_check = priv->health_check_active; priv->health_check_active = _gf_false; - pthread_cancel(priv->health_check); - priv->health_check = 0; } UNLOCK(&priv->lock); + + if (priv->dirfd >= 0) { + sys_close(priv->dirfd); + priv->dirfd = -1; + } + + for (i = 0; i < 256; i++) { + if (priv->arrdfd[i] >= 0) { + sys_close(priv->arrdfd[i]); + priv->arrdfd[i] = -1; + } + } + + if (health_check) { + (void)gf_thread_cleanup_xint(priv->health_check); + priv->health_check = 0; + } + if (priv->disk_space_check) { priv->disk_space_check_active = _gf_false; - pthread_cancel(priv->disk_space_check); + (void)gf_thread_cleanup_xint(priv->disk_space_check); priv->disk_space_check = 0; } + if (priv->janitor) { - (void)gf_thread_cleanup_xint(priv->janitor); - priv->janitor = 0; + /*TODO: Make sure the synctask is also complete */ + ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED, + "Failed to delete janitor timer"); + } + GF_FREE(priv->janitor); + priv->janitor = NULL; } + + pthread_mutex_lock(&ctx->fd_lock); + { + count = --ctx->pxl_count; + if (count == 0) { + pthread_cond_signal(&ctx->fd_cond); + } + } + pthread_mutex_unlock(&ctx->fd_lock); + + if (count == 0) { + pthread_join(ctx->janitor, NULL); + } + if (priv->fsyncer) { (void)gf_thread_cleanup_xint(priv->fsyncer); priv->fsyncer = 0; } /*unlock brick dir*/ - if (priv->mount_lock) - (void)sys_closedir(priv->mount_lock); + if (priv->mount_lock >= 0) { + (void)sys_close(priv->mount_lock); + priv->mount_lock = -1; + } GF_FREE(priv->base_path); LOCK_DESTROY(&priv->lock); - pthread_mutex_destroy(&priv->janitor_lock); pthread_mutex_destroy(&priv->fsync_mutex); + pthread_cond_destroy(&priv->fsync_cond); + pthread_mutex_destroy(&priv->janitor_mutex); + pthread_cond_destroy(&priv->janitor_cond); GF_FREE(priv->hostname); GF_FREE(priv->trash_path); GF_FREE(priv); @@ -1099,7 +1297,7 @@ posix_fini(xlator_t *this) return; } -struct volume_options options[] = { +struct volume_options posix_options[] = { {.key = {"o-direct"}, .type = GF_OPTION_TYPE_BOOL}, {.key = {"directory"}, .type = GF_OPTION_TYPE_PATH, @@ -1162,7 +1360,7 @@ struct volume_options options[] = { {.key = {"health-check-timeout"}, .type = GF_OPTION_TYPE_INT, .min = 0, - .default_value = "10", + .default_value = "20", .validate = GF_OPT_VALIDATE_MIN, .description = "Interval in seconds to wait aio_write finish for health check, " @@ -1170,11 +1368,11 @@ struct volume_options options[] = { .op_version = {GD_OP_VERSION_4_0_0}, .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, {.key = {"reserve"}, - .type = GF_OPTION_TYPE_INT, + .type = GF_OPTION_TYPE_PERCENT_OR_SIZET, .min = 0, .default_value = "1", .validate = GF_OPT_VALIDATE_MIN, - .description = "Percentage of disk space to be reserved." + .description = "Percentage/Size of disk space to be reserved." " Set to 0 to disable", .op_version = {GD_OP_VERSION_3_13_0}, .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, @@ -1268,24 +1466,21 @@ struct volume_options options[] = { .min = 0000, .max = 0777, .default_value = "0000", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Mode bit permission that will always be set on a file."}, {.key = {"force-directory-mode"}, .type = GF_OPTION_TYPE_INT, .min = 0000, .max = 0777, .default_value = "0000", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Mode bit permission that will be always set on directory"}, {.key = {"create-mask"}, .type = GF_OPTION_TYPE_INT, .min = 0000, .max = 0777, .default_value = "0777", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Any bit not set here will be removed from the" "modes set on a file when it is created"}, {.key = {"create-directory-mask"}, @@ -1293,8 +1488,7 @@ struct volume_options options[] = { .min = 0000, .max = 0777, .default_value = "0777", - .validate = GF_OPT_VALIDATE_MIN, - .validate = GF_OPT_VALIDATE_MAX, + .validate = GF_OPT_VALIDATE_BOTH, .description = "Any bit not set here will be removed from the" "modes set on a directory when it is created"}, {.key = {"max-hardlinks"}, @@ -1317,7 +1511,7 @@ struct volume_options options[] = { "SHA256 checksum. MD5 otherwise."}, {.key = {"ctime"}, .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", + .default_value = "on", .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC, .op_version = {GD_OP_VERSION_4_1_0}, .tags = {"ctime"}, @@ -1326,4 +1520,5 @@ struct volume_options options[] = { "are stored in xattr to keep it consistent across replica and " "distribute set. The time attributes stored at the backend are " "not considered "}, - {.key = {NULL}}}; + {.key = {NULL}}, +}; |
