diff options
author | Ravishankar N <ravishankar@redhat.com> | 2016-05-18 14:37:46 +0530 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2016-05-24 01:23:46 -0700 |
commit | 86a87a2ec0984f450b36ae6414c2d6d66870af73 (patch) | |
tree | d70f8dc5f37f252c9f28eba63ac58646e34ca1fe | |
parent | b01fb8d3bb9772d94073aaa52b2d8210ac4fabb8 (diff) |
cluster/afr: Check for required number of entrylks
Problem:
Parallel rmdir operations on the same directory results in ENOTCONN messages
eventhough there was no network disconnect.
In blocking entry lock during rmdir, AFR takes 2 set of locks on all its
children-One (parentdir,name of dir to be deleted), the other (full lock
on the dir being deleted). We proceed to pre-op stage even if only a single
lock (but not all the needed locks) was obtained, only to fail it with ENOTCONN
because afr_locked_nodes_get() returns zero nodes in afr_changelog_pre_op().
Fix:
After we get replies for all blocking lock requests, if we don't have
the minimum number of locks to carry out the FOP, unlock and fail the
FOP. The op_errno will be that of the last failed reply we got, i.e.
whatever is set in afr_lock_cbk().
Change-Id: Ibef25e65b468ebb5ea6ae1f5121a5f1201072293
BUG: 1336381
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: http://review.gluster.org/14358
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
-rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 88 |
1 files changed, 83 insertions, 5 deletions
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index 53bb7920089..c2a5f526c08 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -1029,6 +1029,88 @@ _is_lock_wind_needed (afr_local_t *local, int child_index) return _gf_true; } +static void +afr_log_entry_locks_failure(xlator_t *this, afr_local_t *local, + afr_internal_lock_t *int_lock) +{ + const char *fop = NULL; + char *pargfid = NULL; + const char *name = NULL; + + fop = gf_fop_list[local->op]; + + switch (local->op) { + case GF_FOP_LINK: + pargfid = uuid_utoa(local->newloc.pargfid); + name = local->newloc.name; + break; + default: + pargfid = uuid_utoa(local->loc.pargfid); + name = local->loc.name; + break; + } + + gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_BLOCKING_LKS_FAILED, + "Unable to obtain sufficient blocking entry locks on at least " + "one child while attempting %s on {pgfid:%s, name:%s}.", fop, + pargfid, name); +} + +static gf_boolean_t +is_blocking_locks_count_sufficient (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + afr_internal_lock_t *int_lock = NULL; + gf_boolean_t is_entrylk = _gf_false; + int child = 0; + int nlockee = 0; + int lockee_count = 0; + gf_boolean_t ret = _gf_true; + + local = frame->local; + priv = this->private; + int_lock = &local->internal_lock; + lockee_count = int_lock->lockee_count; + is_entrylk = afr_is_entrylk (int_lock, local->transaction.type); + + if (!is_entrylk) { + if (int_lock->lock_count == 0) { + gf_msg (this->name, GF_LOG_WARNING, 0, + AFR_MSG_BLOCKING_LKS_FAILED, "Unable to obtain " + "blocking inode lock on even one child for " + "gfid:%s.", uuid_utoa (local->inode->gfid)); + return _gf_false; + } else { + /*inodelk succeded on atleast one child. */ + return _gf_true; + } + + } else { + if (int_lock->entrylk_lock_count == 0) { + afr_log_entry_locks_failure (this, local, int_lock); + return _gf_false; + } + /* For FOPS that take multiple sets of locks (mkdir, rename), + * there must be atleast one brick on which the locks from + * all lock sets were successful. */ + for (child = 0; child < priv->child_count; child++) { + ret = _gf_true; + for (nlockee = 0; nlockee < lockee_count; nlockee++) { + if (!(int_lock->lockee[nlockee].locked_nodes[child] & LOCKED_YES)) + ret = _gf_false; + } + if (ret) + return ret; + } + if (!ret) + afr_log_entry_locks_failure (this, local, int_lock); + } + + return ret; + +} + int afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie) { @@ -1079,11 +1161,7 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie) } if (int_lock->lk_expected_count == int_lock->lk_attempted_count) { - if ((is_entrylk && int_lock->entrylk_lock_count == 0) || - (!is_entrylk && int_lock->lock_count == 0)) { - gf_msg (this->name, GF_LOG_INFO, 0, - AFR_MSG_BLOCKING_LKS_FAILED, - "unable to lock on even one child"); + if (!is_blocking_locks_count_sufficient (frame, this)) { local->op_ret = -1; int_lock->lock_op_ret = -1; |