From dc8a3490e437d25ac2ee94a74778cd16c778514d Mon Sep 17 00:00:00 2001 From: Pranith Kumar K Date: Mon, 15 Sep 2014 14:22:44 +0530 Subject: cluster/afr: Handle EAGAIN properly in inodelk Backport of http://review.gluster.org/8739 Problem: When one of the brick is taken down and brough back up in a replica pair, locks on that brick will be allowed. Afr returns inodelk success even when one of the bricks already has the lock taken. Fix: If any brick returns EAGAIN return failure to parent xlator. Note: This change only works for non-blocking inodelks. This patch addresses dht-synchronization which uses non-blocking locks for rename. Blocking lock is issued by only one of the rebalance processes. So for now there is no possibility of deadlock. BUG: 1151308 Change-Id: I72f15d8789442c29b5c7be2d5dabf7bae6bfa845 Signed-off-by: Pranith Kumar K Reviewed-on: http://review.gluster.org/8923 Tested-by: Gluster Build System Reviewed-by: Niels de Vos Reviewed-by: Kaleb KEITHLEY --- xlators/cluster/afr/src/afr-common.c | 164 ++++++++++++++++++++++++++++++++--- xlators/cluster/afr/src/afr.h | 7 ++ 2 files changed, 157 insertions(+), 14 deletions(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 537b9c2062a..1f39e98d06c 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2971,6 +2971,72 @@ out: /* }}} */ +int32_t +afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) + +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = -1; + int child_index = (long)cookie; + uuid_t gfid = {0}; + + local = frame->local; + priv = this->private; + + if (op_ret < 0 && op_errno != ENOTCONN) { + loc_gfid (&local->loc, gfid); + gf_log (this->name, GF_LOG_ERROR, "%s: Failed to unlock %s " + "with lk_owner: %s (%s)", uuid_utoa (gfid), + priv->children[child_index]->name, + lkowner_utoa (&frame->root->lk_owner), + strerror (op_errno)); + } + + call_count = afr_frame_return (frame); + if (call_count == 0) { + AFR_STACK_UNWIND (inodelk, frame, local->op_ret, + local->op_errno, local->xdata_rsp); + } + + return 0; +} + +int32_t +afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this, + int call_count) +{ + int i = 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + local = frame->local; + priv = this->private; + local->call_count = call_count; + local->cont.inodelk.flock.l_type = F_UNLCK; + + for (i = 0; i < priv->child_count; i++) { + if (!local->child_up[i]) + continue; + + if (local->child_errno[i]) + continue; + + STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk, + (void*) (long) i, + priv->children[i], + priv->children[i]->fops->inodelk, + local->cont.inodelk.volume, + &local->loc, local->cont.inodelk.cmd, + &local->cont.inodelk.flock, 0); + + if (!--call_count) + break; + } + + return 0; +} int32_t afr_inodelk_cbk (call_frame_t *frame, void *cookie, @@ -2978,24 +3044,88 @@ afr_inodelk_cbk (call_frame_t *frame, void *cookie, { afr_local_t *local = NULL; + afr_private_t *priv = NULL; int call_count = -1; + int child_index = (long)cookie; + int i = 0; + int lock_count = 0; local = frame->local; + priv = this->private; - LOCK (&frame->lock); - { - if (op_ret == 0) - local->op_ret = 0; - - local->op_errno = op_errno; + if (op_ret < 0) + local->child_errno[child_index] = op_errno; + if (op_ret == 0 && xdata) { + LOCK (&frame->lock); + { + if (!local->xdata_rsp) + local->xdata_rsp = dict_ref (xdata); + } + UNLOCK (&frame->lock); } - UNLOCK (&frame->lock); call_count = afr_frame_return (frame); - if (call_count == 0) - AFR_STACK_UNWIND (inodelk, frame, local->op_ret, - local->op_errno, xdata); + if (call_count == 0) { + for (i = 0; i < priv->child_count; i++) { + /* + * The idea is to not allow lock even if at least one of + * the bricks already have a competing lock granted. If + * there is a competing lock the errno returned is + * EAGAIN. so in this loop the following criteria + * should be met. + * 1) If the errno is anything other than EAGAIN + * on some of the subvols but there is at least one + * success, the fop should be considered success. + * 2) If the errno is EAGAIN on at least one of the + * subvols the fop should fail with -1, EAGAIN. + */ + if (!local->child_up[i]) + continue; + + if (local->child_errno[i] == 0) + lock_count++; + + if (local->op_ret == -1 && local->op_errno == EAGAIN) + continue; + /* + * For meeting '2)' we set op_ret to -1, op_errno to + * EAGAIN if any of the bricks give that error. Check + * above prevents any more modifications to + * local->op_ret, local->op_errno + * (i.e. final status of the fop). + */ + if (local->child_errno[i] == EAGAIN) { + local->op_ret = -1; + local->op_errno = EAGAIN; + continue; + } + + /* + * For meeting '1)' + * Here we set the op_ret to 0 if the fop succeeds on + * any of the bricks provided we haven't witnessed + * any -1, EAGAIN from other bricks. So if the bricks + * fail with some other reason other than EAGAIN but + * succeed on at least one of the bricks the final + * result is SUCCESS for the fop. + */ + + if (local->child_errno[i] == 0) + local->op_ret = 0; + + local->op_errno = local->child_errno[i]; + } + + if (lock_count && local->cont.inodelk.flock.l_type != F_UNLCK && + (local->op_ret == -1 && local->op_errno == EAGAIN)) { + afr_unlock_inodelks_and_unwind (frame, this, + lock_count); + } else { + AFR_STACK_UNWIND (inodelk, frame, local->op_ret, + local->op_errno, local->xdata_rsp); + } + } return 0; } @@ -3026,14 +3156,20 @@ afr_inodelk (call_frame_t *frame, xlator_t *this, if (ret < 0) goto out; + loc_copy (&local->loc, loc); + local->cont.inodelk.volume = volume; + local->cont.inodelk.cmd = cmd; + local->cont.inodelk.flock = *flock; + call_count = local->call_count; for (i = 0; i < priv->child_count; i++) { if (local->child_up[i]) { - STACK_WIND (frame, afr_inodelk_cbk, - priv->children[i], - priv->children[i]->fops->inodelk, - volume, loc, cmd, flock, xdata); + STACK_WIND_COOKIE (frame, afr_inodelk_cbk, + (void*) (long) i, + priv->children[i], + priv->children[i]->fops->inodelk, + volume, loc, cmd, flock, xdata); if (!--call_count) break; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 0a2b00168ea..63f658da09b 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -646,6 +646,13 @@ typedef struct _afr_local { dict_t *params; char *linkpath; } symlink; + + struct { + const char *volume; + int32_t cmd; + struct gf_flock flock; + } inodelk; + } cont; struct { -- cgit