diff options
-rw-r--r-- | doc/admin-guide/en-US/markdown/admin_managing_volumes.md | 2 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 4 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-helpers.c | 114 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix.c | 20 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix.h | 6 |
5 files changed, 146 insertions, 0 deletions
diff --git a/doc/admin-guide/en-US/markdown/admin_managing_volumes.md b/doc/admin-guide/en-US/markdown/admin_managing_volumes.md index 6375bf5257e..dd8ed471015 100644 --- a/doc/admin-guide/en-US/markdown/admin_managing_volumes.md +++ b/doc/admin-guide/en-US/markdown/admin_managing_volumes.md @@ -153,6 +153,8 @@ To tune volume options server.grace-timeout Specifies the duration for the lock state to be maintained on the server after a network disconnection. 10 10 - 1800 secs server.statedump-path Location of the state dump file. /tmp directory of the brick New directory path + + storage.health-check-interval Number of seconds between health-checks done on the filesystem that is used for the brick(s). Defaults to 30 seconds, set to 0 to disable. /tmp directory of the brick New directory path ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- You can view the changed volume options using diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index f57f49f42e9..8d02ac028a5 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -1215,6 +1215,10 @@ struct volopt_map_entry glusterd_volopt_map[] = { .voltype = "storage/posix", .op_version = 2 }, + { .key = "storage.health-check-interval", + .voltype = "storage/posix", + .op_version = 3 + }, { .key = "config.memory-accounting", .voltype = "configuration", .option = "!config", diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index f8284d3359f..a13e0207808 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -22,6 +22,7 @@ #include <pthread.h> #include <ftw.h> #include <sys/stat.h> +#include <signal.h> #ifndef GF_BSD_HOST_OS #include <alloca.h> @@ -1063,3 +1064,116 @@ posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd) return ret; } + +static void * +posix_health_check_thread_proc (void *data) +{ + xlator_t *this = NULL; + struct posix_private *priv = NULL; + uint32_t interval = 0; + int ret = -1; + struct stat sb = {0, }; + + this = data; + priv = this->private; + + /* prevent races when the interval is updated */ + interval = priv->health_check_interval; + if (interval == 0) + goto out; + + gf_log (this->name, GF_LOG_DEBUG, "health-check thread started, " + "interval = %d seconds", interval); + + while (1) { + /* aborting sleep() is a request to exit this thread, sleep() + * will normally not return when cancelled */ + ret = sleep (interval); + if (ret > 0) + break; + + /* prevent thread errors while doing the health-check(s) */ + pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL); + + /* Do the health-check, it should be moved to its own function + * in case it gets more complex. */ + ret = stat (priv->base_path, &sb); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "stat() on %s returned: %s", priv->base_path, + strerror (errno)); + goto abort; + } + + pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL); + } + +out: + gf_log (this->name, GF_LOG_DEBUG, "health-check thread exiting"); + + LOCK (&priv->lock); + { + priv->health_check_active = _gf_false; + } + UNLOCK (&priv->lock); + + return NULL; + +abort: + /* health-check failed */ + gf_log (this->name, GF_LOG_EMERG, "health-check failed, going down"); + xlator_notify (this->parents->xlator, GF_EVENT_CHILD_DOWN, this); + + ret = sleep (30); + if (ret == 0) { + gf_log (this->name, GF_LOG_EMERG, "still alive! -> SIGTERM"); + kill (getpid(), SIGTERM); + } + + ret = sleep (30); + if (ret == 0) { + gf_log (this->name, GF_LOG_EMERG, "still alive! -> SIGKILL"); + kill (getpid(), SIGKILL); + } + + return NULL; +} + +void +posix_spawn_health_check_thread (xlator_t *xl) +{ + struct posix_private *priv = NULL; + int ret = -1; + + priv = xl->private; + + LOCK (&priv->lock); + { + /* cancel the running thread */ + if (priv->health_check_active == _gf_true) { + pthread_cancel (priv->health_check); + priv->health_check_active = _gf_false; + } + + /* prevent scheduling a check in a tight loop */ + if (priv->health_check_interval == 0) + goto unlock; + + ret = pthread_create (&priv->health_check, NULL, + posix_health_check_thread_proc, xl); + if (ret < 0) { + priv->health_check_interval = 0; + priv->health_check_active = _gf_false; + gf_log (xl->name, GF_LOG_ERROR, + "unable to setup health-check thread: %s", + strerror (errno)); + goto unlock; + } + + /* run the thread detached, resources will be freed on exit */ + pthread_detach (priv->health_check); + priv->health_check_active = _gf_true; + } +unlock: + UNLOCK (&priv->lock); +} diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 05101fd3177..399c94bf095 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -23,6 +23,7 @@ #include <pthread.h> #include <ftw.h> #include <sys/stat.h> +#include <signal.h> #ifndef GF_BSD_HOST_OS #include <alloca.h> @@ -4320,6 +4321,10 @@ reconfigure (xlator_t *this, dict_t *options) " fallback to <hostname>:<export>"); } + GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval, + options, uint32, out); + posix_spawn_health_check_thread (this); + ret = 0; out: return ret; @@ -4690,6 +4695,12 @@ init (xlator_t *this) " fallback to <hostname>:<export>"); } + _private->health_check_active = _gf_false; + GF_OPTION_INIT ("health-check-interval", + _private->health_check_interval, uint32, out); + if (_private->health_check_interval) + posix_spawn_health_check_thread (this); + pthread_mutex_init (&_private->janitor_lock, NULL); pthread_cond_init (&_private->janitor_cond, NULL); INIT_LIST_HEAD (&_private->janitor_fds); @@ -4815,5 +4826,14 @@ struct volume_options options[] = { .description = "return glusterd's node-uuid in pathinfo xattr" " string instead of hostname" }, + { + .key = {"health-check-interval"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "30", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Interval in seconds for a filesystem health check, " + "set to 0 to disable" + }, { .key = {NULL} } }; diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index 2cee1905f78..22340370e2c 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -127,6 +127,11 @@ struct posix_private { /* node-uuid in pathinfo xattr */ gf_boolean_t node_uuid_pathinfo; + + /* seconds to sleep between health checks */ + uint32_t health_check_interval; + pthread_t health_check; + gf_boolean_t health_check_active; }; typedef struct { @@ -178,4 +183,5 @@ gf_boolean_t posix_special_xattr (char **pattern, char *key); void __posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags, off_t offset, size_t size); +void posix_spawn_health_check_thread (xlator_t *this); #endif /* _POSIX_H */ |