diff options
author | Mohit Agrawal <moagrawa@redhat.com> | 2018-03-12 19:43:15 +0530 |
---|---|---|
committer | Raghavendra G <rgowdapp@redhat.com> | 2018-04-19 04:31:51 +0000 |
commit | 0043c63f70776444f69667a4ef9596217ecb42b7 (patch) | |
tree | e6c239e4b27198d40bca329edcce317ded59de09 /xlators/storage | |
parent | be26b0da2f1a7fe336400de6a1c016716983bd38 (diff) |
gluster: Sometimes Brick process is crashed at the time of stopping brick
Problem: Sometimes brick process is getting crashed at the time
of stop brick while brick mux is enabled.
Solution: Brick process was getting crashed because of rpc connection
was not cleaning properly while brick mux is enabled.In this patch
after sending GF_EVENT_CLEANUP notification to xlator(server)
waits for all rpc client connection destroy for specific xlator.Once rpc
connections are destroyed in server_rpc_notify for all associated client
for that brick then call xlator_mem_cleanup for for brick xlator as well as
all child xlators.To avoid races at the time of cleanup introduce
two new flags at each xlator cleanup_starting, call_cleanup.
BUG: 1544090
Signed-off-by: Mohit Agrawal <moagrawa@redhat.com>
Note: Run all test-cases in separate build (https://review.gluster.org/#/c/19700/)
with same patch after enable brick mux forcefully, all test cases are
passed.
Change-Id: Ic4ab9c128df282d146cf1135640281fcb31997bf
updates: bz#1544090
Diffstat (limited to 'xlators/storage')
-rw-r--r-- | xlators/storage/posix/src/posix-common.c | 6 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-helpers.c | 14 |
2 files changed, 14 insertions, 6 deletions
diff --git a/xlators/storage/posix/src/posix-common.c b/xlators/storage/posix/src/posix-common.c index 507bfc20991..bcaad2703e9 100644 --- a/xlators/storage/posix/src/posix-common.c +++ b/xlators/storage/posix/src/posix-common.c @@ -1105,12 +1105,13 @@ posix_fini (xlator_t *this) struct posix_private *priv = this->private; if (!priv) return; - this->private = NULL; - if (priv->health_check) { + LOCK (&priv->lock); + if (priv->health_check_active) { priv->health_check_active = _gf_false; pthread_cancel (priv->health_check); priv->health_check = 0; } + UNLOCK (&priv->lock); if (priv->disk_space_check) { priv->disk_space_check_active = _gf_false; pthread_cancel (priv->disk_space_check); @@ -1135,6 +1136,7 @@ posix_fini (xlator_t *this) GF_FREE (priv->hostname); GF_FREE (priv->trash_path); GF_FREE (priv); + this->private = NULL; return; } diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index 0ff94df944e..e9d379fda07 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -2001,6 +2001,12 @@ out: return NULL; abort: + LOCK (&priv->lock); + { + priv->health_check_active = _gf_false; + } + UNLOCK (&priv->lock); + /* health-check failed */ gf_msg (this->name, GF_LOG_EMERG, 0, P_MSG_HEALTHCHECK_FAILED, "health-check failed, going down"); @@ -2041,18 +2047,18 @@ abort: for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) { victim = (*trav_p)->xlator; - if (victim && - strcmp (victim->name, priv->base_path) == 0) { + if (!victim->call_cleanup && + strcmp (victim->name, priv->base_path) == 0) { victim_found = _gf_true; break; } } UNLOCK (&ctx->volfile_lock); - if (victim_found) { + if (victim_found && !victim->cleanup_starting) { gf_log (THIS->name, GF_LOG_INFO, "detaching not-only " " child %s", priv->base_path); + victim->cleanup_starting = 1; top->notify (top, GF_EVENT_CLEANUP, victim); - xlator_mem_cleanup (victim); } } |