summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMohit Agrawal <moagrawal@redhat.com>2019-07-16 20:36:57 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2019-08-20 05:28:39 +0000
commitf138d3fa2237e7fa940ecf17153fd700350c4138 (patch)
tree388452149858a9178c553c617d53fc0e6531ff2c
parenta4d073cda22787552b46b186b898304b438db0c2 (diff)
posix: In brick_mux brick is crashed while start/stop volume in loop
Problem: In brick_mux environment sometime brick is crashed while volume stop/start in a loop.Brick is crashed in janitor task at the time of accessing priv.If posix priv is cleaned up before call janitor task then janitor task is crashed. Solution: To avoid the crash in brick_mux environment introduce a new flag janitor_task_stop in posix_private and before send CHILD_DOWN event wait for update the flag by janitor_task_done Change-Id: Id9fa5d183a463b2b682774ab5cb9868357d139a4 fixes: bz#1730409 Signed-off-by: Mohit Agrawal <moagrawal@redhat.com>
-rw-r--r--libglusterfs/src/glusterfs/xlator.h3
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c5
-rw-r--r--xlators/protocol/server/src/server.c6
-rw-r--r--xlators/storage/posix/src/posix-common.c40
-rw-r--r--xlators/storage/posix/src/posix-helpers.c16
-rw-r--r--xlators/storage/posix/src/posix.h3
6 files changed, 69 insertions, 4 deletions
diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
index cdf9d4e8a5d..6449e59f484 100644
--- a/libglusterfs/src/glusterfs/xlator.h
+++ b/libglusterfs/src/glusterfs/xlator.h
@@ -858,6 +858,9 @@ struct _xlator {
/* Flag to notify got CHILD_DOWN event for detach brick */
uint32_t notify_down;
+
+ /* Flag to avoid throw duplicate PARENT_DOWN event */
+ uint32_t parent_down;
};
/* This would be the only structure which needs to be exported by
diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c
index 59b9cbec4a2..d91b672e47e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-utils.c
+++ b/xlators/mgmt/glusterd/src/glusterd-utils.c
@@ -4015,8 +4015,9 @@ out:
if (msg[0]) {
gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_IMPORT_FAIL, "%s",
msg);
- gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
- new_brickinfo->hostname, new_brickinfo->path);
+ if (new_brickinfo)
+ gf_event(EVENT_IMPORT_BRICK_FAILED, "peer=%s;brick=%s",
+ new_brickinfo->hostname, new_brickinfo->path);
}
gf_msg_debug("glusterd", 0, "Returning with %d", ret);
return ret;
diff --git a/xlators/protocol/server/src/server.c b/xlators/protocol/server/src/server.c
index 1e91ede0d58..74453edecd7 100644
--- a/xlators/protocol/server/src/server.c
+++ b/xlators/protocol/server/src/server.c
@@ -582,6 +582,7 @@ server_graph_janitor_threads(void *data)
gf_boolean_t victim_found = _gf_false;
xlator_list_t **trav_p = NULL;
xlator_t *top = NULL;
+ uint32_t parent_down = 0;
GF_ASSERT(data);
@@ -600,7 +601,10 @@ server_graph_janitor_threads(void *data)
victim = (*trav_p)->xlator;
if (victim->cleanup_starting &&
strcmp(victim->name, victim_name) == 0) {
- victim_found = _gf_true;
+ parent_down = victim->parent_down;
+ victim->parent_down = 1;
+ if (!parent_down)
+ victim_found = _gf_true;
break;
}
}
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c
index 2b39b5bde53..eee39fd000f 100644
--- a/xlators/storage/posix/src/posix-common.c
+++ b/xlators/storage/posix/src/posix-common.c
@@ -136,10 +136,15 @@ int32_t
posix_notify(xlator_t *this, int32_t event, void *data, ...)
{
xlator_t *victim = data;
+ struct posix_private *priv = this->private;
+ int ret = 0;
+ struct timespec sleep_till = {
+ 0,
+ };
switch (event) {
case GF_EVENT_PARENT_UP: {
- /* Tell the parent that posix xlator is up */
+ /* the parent that posix xlator is up */
default_notify(this, GF_EVENT_CHILD_UP, data);
} break;
@@ -148,6 +153,31 @@ posix_notify(xlator_t *this, int32_t event, void *data, ...)
break;
gf_log(this->name, GF_LOG_INFO, "Sending CHILD_DOWN for brick %s",
victim->name);
+
+ if (priv->janitor) {
+ pthread_mutex_lock(&priv->janitor_mutex);
+ {
+ priv->janitor_task_stop = _gf_true;
+ ret = gf_tw_del_timer(this->ctx->tw->timer_wheel,
+ priv->janitor);
+ if (!ret) {
+ clock_gettime(CLOCK_REALTIME, &sleep_till);
+ sleep_till.tv_sec += 1;
+ /* Wait to set janitor_task flag to _gf_false by
+ * janitor_task_done */
+ while (priv->janitor_task_stop) {
+ (void)pthread_cond_timedwait(&priv->janitor_cond,
+ &priv->janitor_mutex,
+ &sleep_till);
+ clock_gettime(CLOCK_REALTIME, &sleep_till);
+ sleep_till.tv_sec += 1;
+ }
+ }
+ }
+ pthread_mutex_unlock(&priv->janitor_mutex);
+ GF_FREE(priv->janitor);
+ }
+ priv->janitor = NULL;
default_notify(this->parents->xlator, GF_EVENT_CHILD_DOWN, data);
} break;
default:
@@ -997,6 +1027,8 @@ posix_init(xlator_t *this)
pthread_mutex_init(&_private->fsync_mutex, NULL);
pthread_cond_init(&_private->fsync_cond, NULL);
+ pthread_mutex_init(&_private->janitor_mutex, NULL);
+ pthread_cond_init(&_private->janitor_cond, NULL);
INIT_LIST_HEAD(&_private->fsyncs);
ret = posix_spawn_ctx_janitor_thread(this);
if (ret)
@@ -1117,6 +1149,7 @@ posix_fini(xlator_t *this)
(void)gf_thread_cleanup_xint(priv->disk_space_check);
priv->disk_space_check = 0;
}
+
if (priv->janitor) {
/*TODO: Make sure the synctask is also complete */
ret = gf_tw_del_timer(this->ctx->tw->timer_wheel, priv->janitor);
@@ -1124,8 +1157,10 @@ posix_fini(xlator_t *this)
gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_TIMER_DELETE_FAILED,
"Failed to delete janitor timer");
}
+ GF_FREE(priv->janitor);
priv->janitor = NULL;
}
+
if (priv->fsyncer) {
(void)gf_thread_cleanup_xint(priv->fsyncer);
priv->fsyncer = 0;
@@ -1137,6 +1172,9 @@ posix_fini(xlator_t *this)
GF_FREE(priv->base_path);
LOCK_DESTROY(&priv->lock);
pthread_mutex_destroy(&priv->fsync_mutex);
+ pthread_cond_destroy(&priv->fsync_cond);
+ pthread_mutex_destroy(&priv->janitor_mutex);
+ pthread_cond_destroy(&priv->janitor_cond);
GF_FREE(priv->hostname);
GF_FREE(priv->trash_path);
GF_FREE(priv);
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index 828aa289a2f..9893058f29e 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -1427,12 +1427,24 @@ posix_janitor_task_done(int ret, call_frame_t *frame, void *data)
this = data;
priv = this->private;
+ pthread_mutex_lock(&priv->janitor_mutex);
+ {
+ if (priv->janitor_task_stop) {
+ priv->janitor_task_stop = _gf_false;
+ pthread_cond_signal(&priv->janitor_cond);
+ pthread_mutex_unlock(&priv->janitor_mutex);
+ goto out;
+ }
+ }
+ pthread_mutex_unlock(&priv->janitor_mutex);
+
LOCK(&priv->lock);
{
__posix_janitor_timer_start(this);
}
UNLOCK(&priv->lock);
+out:
return 0;
}
@@ -1451,6 +1463,9 @@ posix_janitor_task(void *data)
old_this = THIS;
THIS = this;
+ if (!priv)
+ goto out;
+
time(&now);
if ((now - priv->last_landfill_check) > priv->janitor_sleep_duration) {
if (priv->disable_landfill_purge) {
@@ -1470,6 +1485,7 @@ posix_janitor_task(void *data)
THIS = old_this;
+out:
return 0;
}
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index 963daf3f148..9c04c55db02 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -200,6 +200,8 @@ struct posix_private {
struct list_head fsyncs;
pthread_mutex_t fsync_mutex;
pthread_cond_t fsync_cond;
+ pthread_mutex_t janitor_mutex;
+ pthread_cond_t janitor_cond;
int fsync_queue_count;
enum {
@@ -254,6 +256,7 @@ struct posix_private {
gf_boolean_t fips_mode_rchecksum;
gf_boolean_t ctime;
+ gf_boolean_t janitor_task_stop;
};
typedef struct {