diff options
author | Anand Avati <avati@gluster.com> | 2010-01-23 05:14:31 +0000 |
---|---|---|
committer | Anand V. Avati <avati@dev.gluster.com> | 2010-01-23 02:58:34 -0800 |
commit | a0b148ea4e2a0163548eeb89b7580be4adbb8070 (patch) | |
tree | 54ecf54695c9a891bb13d9542bf80fe443b4c725 /xlators/storage/posix/src/posix.c | |
parent | 9c53d5daf403f6fbfde76dec121295a4c156b32e (diff) |
Server backend storage hang should not cause the mount point to hang.
Submitted-by: Krishna Srinivas <krishna@gluster.com>
NOTE: fixed compilation issues in posix.c introduced while merging
storage/posix polls for FS/kernel being functional by issuing
statvfs() call. In case statvfs expires the timer, storage/posix will
send CHILD_DOWN to upper translator. Ultimately this will cause
protocol/server to disconnect all clients connected and also cleans up
the data structures. Hence if soft lockup or other kernel bug causes
backend FS to hang, the clients will not be hung.
Signed-off-by: Krishna Srinivas <krishna@gluster.com>
Signed-off-by: Anand V. Avati <avati@blackhole.gluster.com>
Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
BUG: 272 (Server backend storage hang should not cause the mount point to hang)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=272
Diffstat (limited to 'xlators/storage/posix/src/posix.c')
-rw-r--r-- | xlators/storage/posix/src/posix.c | 141 |
1 files changed, 136 insertions, 5 deletions
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 1ff9a06f9fe..c9342ac4d69 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -4703,6 +4703,119 @@ posix_inode (xlator_t *this) return 0; } +void +posix_fsping_timer_expired (void *data) +{ + xlator_t *this = NULL; + struct posix_private *priv = NULL; + + this = data; + priv = this->private; + + pthread_mutex_lock (&priv->mutex); + { + if (priv->fsping_timer) { + gf_timer_call_cancel (this->ctx, + priv->fsping_timer); + priv->fsping_timer = NULL; + } + + if (priv->fs_state) { + priv->fs_state = 0; + default_notify (this, GF_EVENT_CHILD_DOWN, NULL); + } + } + pthread_mutex_unlock (&priv->mutex); +} + +void +posix_fsping (void *arg); + +void * +posix_fsping_statvfs (void *arg) +{ + int ret = -1; + xlator_t *this = NULL; + char *root_path = NULL; + struct statvfs buf = {0, }; + struct posix_private *priv = NULL; + struct timeval delta = {0, }; + + this = arg; + priv = this->private; + root_path = POSIX_BASE_PATH (this); + + ret = statvfs (root_path, &buf); + + pthread_mutex_lock (&priv->mutex); + { + if (priv->fsping_timer) { + gf_timer_call_cancel (this->ctx, + priv->fsping_timer); + priv->fsping_timer = NULL; + } + if (ret == 0) { + if (priv->fs_state == 0) { + priv->fs_state = 1; + default_notify (this, GF_EVENT_CHILD_UP, + NULL); + } + } else { + if (priv->fs_state) { + priv->fs_state = 0; + default_notify (this, GF_EVENT_CHILD_DOWN, + NULL); + } + } + } + pthread_mutex_unlock (&priv->mutex); + + delta.tv_sec = POSIX_FSPING_SLEEP_TIME; + priv->fsping_timer = + gf_timer_call_after (this->ctx, + delta, + posix_fsping, + (void *) this); + if (priv->fsping_timer == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "unable to register timer"); + } + return NULL; +} + +void +posix_fsping (void *arg) +{ + xlator_t *this = NULL; + struct posix_private *priv = NULL; + struct timeval delta = {0, }; + + this = arg; + priv = this->private; + + delta.tv_sec = priv->fsping_timeout; + delta.tv_usec = 0; + + if (priv->fsping_timer) { + gf_timer_call_cancel (this->ctx, + priv->fsping_timer); + } + priv->fsping_timer = + gf_timer_call_after (this->ctx, + delta, + posix_fsping_timer_expired, + (void *) this); + + if (priv->fsping_timer == NULL) { + gf_log (this->name, GF_LOG_ERROR, + "unable to register timer"); + /*FIXME: handle error*/ + } + pthread_create (&priv->fsping, + NULL, + posix_fsping_statvfs, + this); +} int32_t posix_rchecksum (call_frame_t *frame, xlator_t *this, @@ -4780,12 +4893,15 @@ notify (xlator_t *this, void *data, ...) { + struct posix_private *priv = NULL; + + priv = this->private; + switch (event) { case GF_EVENT_PARENT_UP: { - /* Tell the parent that posix xlator is up */ - default_notify (this, GF_EVENT_CHILD_UP, data); + posix_fsping ((void *)this); } break; default: @@ -4809,9 +4925,9 @@ init (xlator_t *this) data_t * dir_data = NULL; data_t * tmp_data = NULL; uint64_t time64 = 0; - - int dict_ret = 0; - int32_t janitor_sleep; + int dict_ret = -1; + int fsping_timeout = -1; + int32_t janitor_sleep; dir_data = dict_get (this->options, "directory"); @@ -4908,6 +5024,7 @@ init (xlator_t *this) strcat (_private->trash_path, "/" GF_REPLICATE_TRASH_DIR); LOCK_INIT (&_private->lock); + pthread_mutex_init (&_private->mutex, NULL); ret = gethostname (_private->hostname, 256); if (ret < 0) { @@ -4923,6 +5040,17 @@ init (xlator_t *this) _private->max_write = 1; } + _private->fsping_timeout = POSIX_FSPING_TIMEOUT; + dict_ret = dict_get_int32 (this->options, + "fsping-timeout", + &fsping_timeout); + + if (dict_ret == 0) { + _private->fsping_timeout = fsping_timeout; + } + gf_log (this->name, GF_LOG_DEBUG, + "fsping-timeout set to %d", _private->fsping_timeout); + _private->export_statfs = 1; tmp_data = dict_get (this->options, "export-statfs-size"); if (tmp_data) { @@ -5056,6 +5184,7 @@ fini (xlator_t *this) { struct posix_private *priv = this->private; sys_lremovexattr (priv->base_path, "trusted.glusterfs.test"); + pthread_mutex_destroy (&priv->mutex); FREE (priv); return; } @@ -5132,6 +5261,8 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_BOOL }, { .key = {"span-devices"}, .type = GF_OPTION_TYPE_INT }, + { .key = {"fsping-timeout"}, + .type = GF_OPTION_TYPE_INT }, { .key = {"background-unlink"}, .type = GF_OPTION_TYPE_BOOL }, { .key = {"janitor-sleep-duration"}, |