summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2016-05-18 14:37:46 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2016-05-24 01:23:46 -0700
commit86a87a2ec0984f450b36ae6414c2d6d66870af73 (patch)
treed70f8dc5f37f252c9f28eba63ac58646e34ca1fe /xlators/cluster
parentb01fb8d3bb9772d94073aaa52b2d8210ac4fabb8 (diff)
cluster/afr: Check for required number of entrylks
Problem: Parallel rmdir operations on the same directory results in ENOTCONN messages eventhough there was no network disconnect. In blocking entry lock during rmdir, AFR takes 2 set of locks on all its children-One (parentdir,name of dir to be deleted), the other (full lock on the dir being deleted). We proceed to pre-op stage even if only a single lock (but not all the needed locks) was obtained, only to fail it with ENOTCONN because afr_locked_nodes_get() returns zero nodes in afr_changelog_pre_op(). Fix: After we get replies for all blocking lock requests, if we don't have the minimum number of locks to carry out the FOP, unlock and fail the FOP. The op_errno will be that of the last failed reply we got, i.e. whatever is set in afr_lock_cbk(). Change-Id: Ibef25e65b468ebb5ea6ae1f5121a5f1201072293 BUG: 1336381 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: http://review.gluster.org/14358 Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c88
1 files changed, 83 insertions, 5 deletions
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 53bb7920089..c2a5f526c08 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -1029,6 +1029,88 @@ _is_lock_wind_needed (afr_local_t *local, int child_index)
return _gf_true;
}
+static void
+afr_log_entry_locks_failure(xlator_t *this, afr_local_t *local,
+ afr_internal_lock_t *int_lock)
+{
+ const char *fop = NULL;
+ char *pargfid = NULL;
+ const char *name = NULL;
+
+ fop = gf_fop_list[local->op];
+
+ switch (local->op) {
+ case GF_FOP_LINK:
+ pargfid = uuid_utoa(local->newloc.pargfid);
+ name = local->newloc.name;
+ break;
+ default:
+ pargfid = uuid_utoa(local->loc.pargfid);
+ name = local->loc.name;
+ break;
+ }
+
+ gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_BLOCKING_LKS_FAILED,
+ "Unable to obtain sufficient blocking entry locks on at least "
+ "one child while attempting %s on {pgfid:%s, name:%s}.", fop,
+ pargfid, name);
+}
+
+static gf_boolean_t
+is_blocking_locks_count_sufficient (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ gf_boolean_t is_entrylk = _gf_false;
+ int child = 0;
+ int nlockee = 0;
+ int lockee_count = 0;
+ gf_boolean_t ret = _gf_true;
+
+ local = frame->local;
+ priv = this->private;
+ int_lock = &local->internal_lock;
+ lockee_count = int_lock->lockee_count;
+ is_entrylk = afr_is_entrylk (int_lock, local->transaction.type);
+
+ if (!is_entrylk) {
+ if (int_lock->lock_count == 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_BLOCKING_LKS_FAILED, "Unable to obtain "
+ "blocking inode lock on even one child for "
+ "gfid:%s.", uuid_utoa (local->inode->gfid));
+ return _gf_false;
+ } else {
+ /*inodelk succeded on atleast one child. */
+ return _gf_true;
+ }
+
+ } else {
+ if (int_lock->entrylk_lock_count == 0) {
+ afr_log_entry_locks_failure (this, local, int_lock);
+ return _gf_false;
+ }
+ /* For FOPS that take multiple sets of locks (mkdir, rename),
+ * there must be atleast one brick on which the locks from
+ * all lock sets were successful. */
+ for (child = 0; child < priv->child_count; child++) {
+ ret = _gf_true;
+ for (nlockee = 0; nlockee < lockee_count; nlockee++) {
+ if (!(int_lock->lockee[nlockee].locked_nodes[child] & LOCKED_YES))
+ ret = _gf_false;
+ }
+ if (ret)
+ return ret;
+ }
+ if (!ret)
+ afr_log_entry_locks_failure (this, local, int_lock);
+ }
+
+ return ret;
+
+}
+
int
afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
{
@@ -1079,11 +1161,7 @@ afr_lock_blocking (call_frame_t *frame, xlator_t *this, int cookie)
}
if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
- if ((is_entrylk && int_lock->entrylk_lock_count == 0) ||
- (!is_entrylk && int_lock->lock_count == 0)) {
- gf_msg (this->name, GF_LOG_INFO, 0,
- AFR_MSG_BLOCKING_LKS_FAILED,
- "unable to lock on even one child");
+ if (!is_blocking_locks_count_sufficient (frame, this)) {
local->op_ret = -1;
int_lock->lock_op_ret = -1;