summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doc/admin-guide/en-US/markdown/admin_managing_volumes.md2
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c4
-rw-r--r--xlators/storage/posix/src/posix-helpers.c114
-rw-r--r--xlators/storage/posix/src/posix.c20
-rw-r--r--xlators/storage/posix/src/posix.h6
5 files changed, 146 insertions, 0 deletions
diff --git a/doc/admin-guide/en-US/markdown/admin_managing_volumes.md b/doc/admin-guide/en-US/markdown/admin_managing_volumes.md
index 6375bf5257e..dd8ed471015 100644
--- a/doc/admin-guide/en-US/markdown/admin_managing_volumes.md
+++ b/doc/admin-guide/en-US/markdown/admin_managing_volumes.md
@@ -153,6 +153,8 @@ To tune volume options
server.grace-timeout Specifies the duration for the lock state to be maintained on the server after a network disconnection. 10 10 - 1800 secs
server.statedump-path Location of the state dump file. /tmp directory of the brick New directory path
+
+ storage.health-check-interval Number of seconds between health-checks done on the filesystem that is used for the brick(s). Defaults to 30 seconds, set to 0 to disable. /tmp directory of the brick New directory path
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
You can view the changed volume options using
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index f57f49f42e9..8d02ac028a5 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -1215,6 +1215,10 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.voltype = "storage/posix",
.op_version = 2
},
+ { .key = "storage.health-check-interval",
+ .voltype = "storage/posix",
+ .op_version = 3
+ },
{ .key = "config.memory-accounting",
.voltype = "configuration",
.option = "!config",
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index f8284d3359f..a13e0207808 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -22,6 +22,7 @@
#include <pthread.h>
#include <ftw.h>
#include <sys/stat.h>
+#include <signal.h>
#ifndef GF_BSD_HOST_OS
#include <alloca.h>
@@ -1063,3 +1064,116 @@ posix_fd_ctx_get (fd_t *fd, xlator_t *this, struct posix_fd **pfd)
return ret;
}
+
+static void *
+posix_health_check_thread_proc (void *data)
+{
+ xlator_t *this = NULL;
+ struct posix_private *priv = NULL;
+ uint32_t interval = 0;
+ int ret = -1;
+ struct stat sb = {0, };
+
+ this = data;
+ priv = this->private;
+
+ /* prevent races when the interval is updated */
+ interval = priv->health_check_interval;
+ if (interval == 0)
+ goto out;
+
+ gf_log (this->name, GF_LOG_DEBUG, "health-check thread started, "
+ "interval = %d seconds", interval);
+
+ while (1) {
+ /* aborting sleep() is a request to exit this thread, sleep()
+ * will normally not return when cancelled */
+ ret = sleep (interval);
+ if (ret > 0)
+ break;
+
+ /* prevent thread errors while doing the health-check(s) */
+ pthread_setcancelstate (PTHREAD_CANCEL_DISABLE, NULL);
+
+ /* Do the health-check, it should be moved to its own function
+ * in case it gets more complex. */
+ ret = stat (priv->base_path, &sb);
+ if (ret < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "stat() on %s returned: %s", priv->base_path,
+ strerror (errno));
+ goto abort;
+ }
+
+ pthread_setcancelstate (PTHREAD_CANCEL_ENABLE, NULL);
+ }
+
+out:
+ gf_log (this->name, GF_LOG_DEBUG, "health-check thread exiting");
+
+ LOCK (&priv->lock);
+ {
+ priv->health_check_active = _gf_false;
+ }
+ UNLOCK (&priv->lock);
+
+ return NULL;
+
+abort:
+ /* health-check failed */
+ gf_log (this->name, GF_LOG_EMERG, "health-check failed, going down");
+ xlator_notify (this->parents->xlator, GF_EVENT_CHILD_DOWN, this);
+
+ ret = sleep (30);
+ if (ret == 0) {
+ gf_log (this->name, GF_LOG_EMERG, "still alive! -> SIGTERM");
+ kill (getpid(), SIGTERM);
+ }
+
+ ret = sleep (30);
+ if (ret == 0) {
+ gf_log (this->name, GF_LOG_EMERG, "still alive! -> SIGKILL");
+ kill (getpid(), SIGKILL);
+ }
+
+ return NULL;
+}
+
+void
+posix_spawn_health_check_thread (xlator_t *xl)
+{
+ struct posix_private *priv = NULL;
+ int ret = -1;
+
+ priv = xl->private;
+
+ LOCK (&priv->lock);
+ {
+ /* cancel the running thread */
+ if (priv->health_check_active == _gf_true) {
+ pthread_cancel (priv->health_check);
+ priv->health_check_active = _gf_false;
+ }
+
+ /* prevent scheduling a check in a tight loop */
+ if (priv->health_check_interval == 0)
+ goto unlock;
+
+ ret = pthread_create (&priv->health_check, NULL,
+ posix_health_check_thread_proc, xl);
+ if (ret < 0) {
+ priv->health_check_interval = 0;
+ priv->health_check_active = _gf_false;
+ gf_log (xl->name, GF_LOG_ERROR,
+ "unable to setup health-check thread: %s",
+ strerror (errno));
+ goto unlock;
+ }
+
+ /* run the thread detached, resources will be freed on exit */
+ pthread_detach (priv->health_check);
+ priv->health_check_active = _gf_true;
+ }
+unlock:
+ UNLOCK (&priv->lock);
+}
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index 05101fd3177..399c94bf095 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -23,6 +23,7 @@
#include <pthread.h>
#include <ftw.h>
#include <sys/stat.h>
+#include <signal.h>
#ifndef GF_BSD_HOST_OS
#include <alloca.h>
@@ -4320,6 +4321,10 @@ reconfigure (xlator_t *this, dict_t *options)
" fallback to <hostname>:<export>");
}
+ GF_OPTION_RECONF ("health-check-interval", priv->health_check_interval,
+ options, uint32, out);
+ posix_spawn_health_check_thread (this);
+
ret = 0;
out:
return ret;
@@ -4690,6 +4695,12 @@ init (xlator_t *this)
" fallback to <hostname>:<export>");
}
+ _private->health_check_active = _gf_false;
+ GF_OPTION_INIT ("health-check-interval",
+ _private->health_check_interval, uint32, out);
+ if (_private->health_check_interval)
+ posix_spawn_health_check_thread (this);
+
pthread_mutex_init (&_private->janitor_lock, NULL);
pthread_cond_init (&_private->janitor_cond, NULL);
INIT_LIST_HEAD (&_private->janitor_fds);
@@ -4815,5 +4826,14 @@ struct volume_options options[] = {
.description = "return glusterd's node-uuid in pathinfo xattr"
" string instead of hostname"
},
+ {
+ .key = {"health-check-interval"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = "30",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Interval in seconds for a filesystem health check, "
+ "set to 0 to disable"
+ },
{ .key = {NULL} }
};
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index 2cee1905f78..22340370e2c 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -127,6 +127,11 @@ struct posix_private {
/* node-uuid in pathinfo xattr */
gf_boolean_t node_uuid_pathinfo;
+
+ /* seconds to sleep between health checks */
+ uint32_t health_check_interval;
+ pthread_t health_check;
+ gf_boolean_t health_check_active;
};
typedef struct {
@@ -178,4 +183,5 @@ gf_boolean_t posix_special_xattr (char **pattern, char *key);
void
__posix_fd_set_odirect (fd_t *fd, struct posix_fd *pfd, int opflags,
off_t offset, size_t size);
+void posix_spawn_health_check_thread (xlator_t *this);
#endif /* _POSIX_H */