diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2015-07-15 06:16:54 +0530 |
---|---|---|
committer | Xavier Hernandez <xhernandez@datalab.es> | 2015-07-23 06:30:41 -0700 |
commit | 332cb6a6901c68f0c79070d7103cc7a728ea6d26 (patch) | |
tree | b71016a0efd8d775382e8416e738280a1faf5c0f /xlators/cluster/ec/src/ec-common.c | |
parent | 94372373ee355e42dfe1660a50315adb4f019d64 (diff) |
cluster/ec: Handle race between unlock-timer, new lock
Problem:
New lock could come at the time timer is on the way to unlock. This was leading
to crash in timer thread because thread executing new lock can free up the
timer_link->fop and then timer thread will try to access structures already
freed.
Fix:
If the timer event is fired, set lock->release to true and wait for unlock to
complete.
Thanks to Xavi and Bhaskar for helping in confirming that this race is the RC.
Thanks to Kritika for pointing out and explaining how Avati's patch can be used
to fix this bug.
Change-Id: I45fa5470bbc1f03b5f3d133e26d1e0ab24303378
BUG: 1243187
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/11670
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Xavier Hernandez <xhernandez@datalab.es>
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
Diffstat (limited to 'xlators/cluster/ec/src/ec-common.c')
-rw-r--r-- | xlators/cluster/ec/src/ec-common.c | 33 |
1 files changed, 14 insertions, 19 deletions
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index c54e043f5bb..6381239901c 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -1361,13 +1361,20 @@ void ec_lock(ec_fop_data_t *fop) if (lock->timer != NULL) { GF_ASSERT (lock->release == _gf_false); timer_link = lock->timer->data; - ec_trace("UNLOCK_CANCELLED", timer_link->fop, "lock=%p", lock); - gf_timer_call_cancel(fop->xl->ctx, lock->timer); - lock->timer = NULL; - - lock->refs--; - /* There should remain at least 1 ref, the current one. */ - GF_ASSERT(lock->refs > 0); + if (gf_timer_call_cancel(fop->xl->ctx, lock->timer) == 0) { + ec_trace("UNLOCK_CANCELLED", timer_link->fop, + "lock=%p", lock); + lock->timer = NULL; + lock->refs--; + /* There should remain at least 1 ref, the current one. */ + GF_ASSERT(lock->refs > 0); + } else { + /* Timer expired and on the way to unlock. + * Set lock->release to _gf_true, so that this + * lock will be put in frozen list*/ + timer_link = NULL; + lock->release = _gf_true; + } } GF_ASSERT(list_empty(&link->wait_list)); @@ -1818,18 +1825,6 @@ void ec_unlock(ec_fop_data_t *fop) } } -void -ec_unlock_force(ec_fop_data_t *fop) -{ - int32_t i; - - for (i = 0; i < fop->lock_count; i++) { - ec_trace("UNLOCK_FORCED", fop, "lock=%p", &fop->locks[i]); - - ec_unlock_timer_del(&fop->locks[i]); - } -} - void ec_flush_size_version(ec_fop_data_t * fop) { GF_ASSERT(fop->lock_count == 1); |