summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnand Avati <avati@redhat.com>2013-01-25 19:22:55 -0800
committerAnand Avati <avati@redhat.com>2013-01-26 00:20:03 -0800
commit1939519979b6c4da110f08434fe63d0b03332d52 (patch)
treebf5a91507513474fdfcde7e2e3bfe2760d0d8166
parentd8d138300b18fc2291a8a73059ac1d888141d6db (diff)
replicate: fix lock counting in blocking lock path
As of http://review.gluster.org/2828, the blocking lock code path's condition for checking completion of locking atempt is broken. The condition - if ((child_index == priv->child_count) || ...) and if ((child_index == priv->child_count) && ...) which is retained to check completion of blocking lock attempts for DATA/METADATA transaction will _always_ fail because a few lines above we have - child_index = cookie % priv->child_count; So child_index will never equal priv->child_count. This leaves the correctness at the mercy of the next part of the conditional - .. (int_lock->lock_count == int_lock->lk_expected_count) .. This "works" as long as no server went down during the transaction. If a server goes down in the middle of the transaction, then this condition also fails, and the code wraps around and starts a blocking lock attempt loop all the way again from from the first server. This results in double locks getting acquired on those servers, and eventually the second condition gets hit (first condition is _never_ hit) and we come out of locking phase. During unlock phase we perform only one unlock per server leaving the other lock "leaked" forever. Change-Id: I7189cdf3f70901b04647516fe1d1e189f36cc8dd BUG: 765564 Signed-off-by: Anand Avati <avati@redhat.com> Reviewed-on: http://review.gluster.org/4433 Reviewed-by: Krishnan Parthasarathi <kparthas@redhat.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c9
-rw-r--r--xlators/cluster/afr/src/afr.h1
2 files changed, 5 insertions, 5 deletions
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index ef183d985cb..802f91f5fa7 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -913,6 +913,8 @@ afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_errno = op_errno;
int_lock->lock_op_errno = op_errno;
}
+
+ int_lock->lk_attempted_count++;
}
UNLOCK (&frame->lock);
@@ -1072,7 +1074,7 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
child_index++;
}
- if ((child_index == priv->child_count) &&
+ if ((int_lock->lk_expected_count == int_lock->lk_attempted_count) &&
((afr_is_entrylk (int_lock, local->transaction.type) &&
int_lock->entrylk_lock_count == 0) ||
(int_lock->lock_count == 0))){
@@ -1091,10 +1093,7 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
}
- if ((child_index == priv->child_count) ||
- (int_lock->entrylk_lock_count == int_lock->lk_expected_count) ||
- (int_lock->lock_count == int_lock->lk_expected_count)) {
-
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
/* we're done locking */
gf_log (this->name, GF_LOG_DEBUG,
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index d31290fbf40..9405ff29512 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -382,6 +382,7 @@ typedef struct {
uint64_t lock_number;
int32_t lk_call_count;
int32_t lk_expected_count;
+ int32_t lk_attempted_count;
int32_t lock_op_ret;
int32_t lock_op_errno;