diff options
author | Anand Avati <avati@redhat.com> | 2013-01-25 19:22:55 -0800 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2013-01-26 00:20:03 -0800 |
commit | 1939519979b6c4da110f08434fe63d0b03332d52 (patch) | |
tree | bf5a91507513474fdfcde7e2e3bfe2760d0d8166 | |
parent | d8d138300b18fc2291a8a73059ac1d888141d6db (diff) |
replicate: fix lock counting in blocking lock path
As of http://review.gluster.org/2828, the blocking lock code
path's condition for checking completion of locking atempt is
broken. The condition -
if ((child_index == priv->child_count) || ...)
and
if ((child_index == priv->child_count) && ...)
which is retained to check completion of blocking lock attempts
for DATA/METADATA transaction will _always_ fail because a few
lines above we have -
child_index = cookie % priv->child_count;
So child_index will never equal priv->child_count. This leaves
the correctness at the mercy of the next part of the
conditional -
.. (int_lock->lock_count == int_lock->lk_expected_count) ..
This "works" as long as no server went down during the transaction.
If a server goes down in the middle of the transaction, then this
condition also fails, and the code wraps around and starts a
blocking lock attempt loop all the way again from from the first server.
This results in double locks getting acquired on those servers, and
eventually the second condition gets hit (first condition is _never_
hit) and we come out of locking phase.
During unlock phase we perform only one unlock per server leaving the
other lock "leaked" forever.
Change-Id: I7189cdf3f70901b04647516fe1d1e189f36cc8dd
BUG: 765564
Signed-off-by: Anand Avati <avati@redhat.com>
Reviewed-on: http://review.gluster.org/4433
Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
-rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 9 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 1 |
2 files changed, 5 insertions, 5 deletions
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index ef183d985cb..802f91f5fa7 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -913,6 +913,8 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->op_errno = op_errno; int_lock->lock_op_errno = op_errno; } + + int_lock->lk_attempted_count++; } UNLOCK (&frame->lock); @@ -1072,7 +1074,7 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie) child_index++; } - if ((child_index == priv->child_count) && + if ((int_lock->lk_expected_count == int_lock->lk_attempted_count) && ((afr_is_entrylk (int_lock, local->transaction.type) && int_lock->entrylk_lock_count == 0) || (int_lock->lock_count == 0))){ @@ -1091,10 +1093,7 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie) } - if ((child_index == priv->child_count) || - (int_lock->entrylk_lock_count == int_lock->lk_expected_count) || - (int_lock->lock_count == int_lock->lk_expected_count)) { - + if (int_lock->lk_expected_count == int_lock->lk_attempted_count) { /* we're done locking */ gf_log (this->name, GF_LOG_DEBUG, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index d31290fbf40..9405ff29512 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -382,6 +382,7 @@ typedef struct { uint64_t lock_number; int32_t lk_call_count; int32_t lk_expected_count; + int32_t lk_attempted_count; int32_t lock_op_ret; int32_t lock_op_errno; |