diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2016-11-07 14:47:34 +0530 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2016-12-07 22:56:55 -0800 |
commit | 953917924a6298fb1deedf76feec354ee21dc373 (patch) | |
tree | f95ba8853cda5c843fe34589f0533d5b6366fc29 | |
parent | 15e424144228d3d769f0afc7ee753ad7612c32d4 (diff) |
cluster/afr: Fix bugs in [f]inodelk/[f]entrylk
Problems:
1) Inodelk is not taking quorum into account
2) finodelk, [f]entrylk are not implemented correctly
3) By default afr doesn't go for non-blocking parallel locks.
Fix:
Implemented a common framework which can be used by
[f]inodelk/[f]entrylk. Used quorum for the same.
>Change-Id: I239f13875a065298630d266941df10cfa3addc85
>BUG: 1369077
>Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
>Reviewed-on: http://review.gluster.org/15802
>Tested-by: Krutika Dhananjay <kdhananj@redhat.com>
>Reviewed-by: Krutika Dhananjay <kdhananj@redhat.com>
>Smoke: Gluster Build System <jenkins@build.gluster.org>
>Reviewed-by: Ravishankar N <ravishankar@redhat.com>
>CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
>NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
BUG: 1402482
Change-Id: I0c5fed6ca87c6432bb20d00f76cdf5c328a52a85
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/16056
Smoke: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Ravishankar N <ravishankar@redhat.com>
-rw-r--r-- | tests/basic/afr/inodelk.t | 87 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 686 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-messages.h | 4 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 8 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 17 |
6 files changed, 468 insertions, 336 deletions
diff --git a/tests/basic/afr/inodelk.t b/tests/basic/afr/inodelk.t new file mode 100644 index 00000000000..a32aa8531b5 --- /dev/null +++ b/tests/basic/afr/inodelk.t @@ -0,0 +1,87 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +cleanup; + +#This test tests that inodelk fails when quorum is not met. Also tests the +#success case where inodelk is obtained and unlocks are done correctly. + +TEST glusterd; +TEST pidof glusterd + +TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0..5} +TEST $CLI volume start $V0 +TEST $GFS -s $H0 --volfile-id=$V0 $M0 + +#Test success case +TEST mkdir $M0/dir1 +TEST mv $M0/dir1 $M0/dir2 + +#If there is a problem with inodelk unlocking the following would hang. +TEST mv $M0/dir2 $M0/dir1 + +#Test failure case by bringing two of the bricks down +#Test that the directory is not moved partially on some bricks but successful +#on other subvol where quorum meets. Do that for both set of bricks + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST ! mv $M0/dir1 $M0/dir2 + +TEST stat $B0/${V0}0/dir1 +TEST stat $B0/${V0}1/dir1 +TEST stat $B0/${V0}2/dir1 +TEST stat $B0/${V0}3/dir1 +TEST stat $B0/${V0}4/dir1 +TEST stat $B0/${V0}5/dir1 +TEST ! stat $B0/${V0}0/dir2 +TEST ! stat $B0/${V0}1/dir2 +TEST ! stat $B0/${V0}2/dir2 +TEST ! stat $B0/${V0}3/dir2 +TEST ! stat $B0/${V0}4/dir2 +TEST ! stat $B0/${V0}5/dir2 + +TEST $CLI volume start $V0 force +TEST kill_brick $V0 $H0 $B0/${V0}3 +TEST kill_brick $V0 $H0 $B0/${V0}4 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST ! mv $M0/dir1 $M0/dir2 +TEST stat $B0/${V0}0/dir1 +TEST stat $B0/${V0}1/dir1 +TEST stat $B0/${V0}2/dir1 +TEST stat $B0/${V0}3/dir1 +TEST stat $B0/${V0}4/dir1 +TEST stat $B0/${V0}5/dir1 +TEST ! stat $B0/${V0}0/dir2 +TEST ! stat $B0/${V0}1/dir2 +TEST ! stat $B0/${V0}2/dir2 +TEST ! stat $B0/${V0}3/dir2 +TEST ! stat $B0/${V0}4/dir2 +TEST ! stat $B0/${V0}5/dir2 + +#Bring the bricks back up and try mv once more, it should succeed. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 4 +TEST mv $M0/dir1 $M0/dir2 +cleanup; +#Do similar tests on replica 2 +TEST glusterd; +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..3} +TEST $CLI volume start $V0 +TEST $GFS -s $H0 --volfile-id=$V0 $M0 +TEST mkdir $M0/dir1 +TEST mv $M0/dir1 $M0/dir2 +#Because we don't know hashed subvol, do the same test twice bringing 1 brick +#from each down, quorum calculation should allow it. +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST mv $M0/dir2 $M0/dir1 +TEST $CLI volume start $V0 force +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST mv $M0/dir1 $M0/dir2 +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST mv $M0/dir2 $M0/dir1 +cleanup diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 072c663c5c0..781b108702d 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -45,6 +45,14 @@ #include "afr-messages.h" #include "compound-fop-utils.h" +int32_t +afr_quorum_errno (afr_private_t *priv) +{ + if (priv->quorum_reads) + return ENOTCONN; + return EROFS; +} + gf_boolean_t afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv, int32_t *op_errno) @@ -1576,20 +1584,20 @@ afr_remove_eager_lock_stub (afr_local_t *local) } static gf_boolean_t -afr_entrylk_is_unlock (entrylk_cmd cmd) +afr_fop_lock_is_unlock (call_frame_t *frame) { - if (ENTRYLK_UNLOCK == cmd) - return _gf_true; - return _gf_false; -} - -static gf_boolean_t -afr_inodelk_is_unlock (int32_t cmd, struct gf_flock *flock) -{ - switch (cmd) { - case F_SETLKW: - case F_SETLK: - if (F_UNLCK == flock->l_type) + afr_local_t *local = frame->local; + switch (local->op) { + case GF_FOP_INODELK: + case GF_FOP_FINODELK: + if ((F_UNLCK == local->cont.inodelk.in_flock.l_type) && + (local->cont.inodelk.in_cmd == F_SETLKW || + local->cont.inodelk.in_cmd == F_SETLK)) + return _gf_true; + break; + case GF_FOP_ENTRYLK: + case GF_FOP_FENTRYLK: + if (ENTRYLK_UNLOCK == local->cont.entrylk.in_cmd) return _gf_true; break; default: @@ -1792,6 +1800,15 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this) { /* inodelk */ GF_FREE (local->cont.inodelk.volume); + if (local->cont.inodelk.xdata) + dict_unref (local->cont.inodelk.xdata); + } + + { /* entrylk */ + GF_FREE (local->cont.entrylk.volume); + GF_FREE (local->cont.entrylk.basename); + if (local->cont.entrylk.xdata) + dict_unref (local->cont.entrylk.xdata); } if (local->xdata_req) @@ -3350,10 +3367,96 @@ out: /* }}} */ -int32_t -afr_unlock_partial_inodelk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) +static int +afr_serialized_lock_wind (call_frame_t *frame, xlator_t *this); + +static gf_boolean_t +afr_is_conflicting_lock_present (int32_t op_ret, int32_t op_errno) +{ + if (op_ret == -1 && op_errno == EAGAIN) + return _gf_true; + return _gf_false; +} + +static void +afr_fop_lock_unwind (call_frame_t *frame, glusterfs_fop_t op, int32_t op_ret, + int32_t op_errno, dict_t *xdata) +{ + switch (op) { + case GF_FOP_INODELK: + AFR_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata); + break; + case GF_FOP_FINODELK: + AFR_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata); + break; + case GF_FOP_ENTRYLK: + AFR_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata); + break; + case GF_FOP_FENTRYLK: + AFR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, xdata); + break; + default: + break; + } +} + +static void +afr_fop_lock_wind (call_frame_t *frame, xlator_t *this, int child_index, + int32_t (*lock_cbk) (call_frame_t *, void *, xlator_t *, + int32_t, int32_t, dict_t *)) +{ + afr_local_t *local = frame->local; + afr_private_t *priv = this->private; + int i = child_index; + + switch (local->op) { + case GF_FOP_INODELK: + STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->inodelk, + (const char *)local->cont.inodelk.volume, + &local->loc, local->cont.inodelk.cmd, + &local->cont.inodelk.flock, + local->cont.inodelk.xdata); + break; + case GF_FOP_FINODELK: + STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->finodelk, + (const char *)local->cont.inodelk.volume, + local->fd, local->cont.inodelk.cmd, + &local->cont.inodelk.flock, + local->cont.inodelk.xdata); + break; + case GF_FOP_ENTRYLK: + STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->entrylk, + local->cont.entrylk.volume, &local->loc, + local->cont.entrylk.basename, + local->cont.entrylk.cmd, + local->cont.entrylk.type, + local->cont.entrylk.xdata); + break; + case GF_FOP_FENTRYLK: + STACK_WIND_COOKIE (frame, lock_cbk, (void *) (long) i, + priv->children[i], + priv->children[i]->fops->fentrylk, + local->cont.entrylk.volume, local->fd, + local->cont.entrylk.basename, + local->cont.entrylk.cmd, + local->cont.entrylk.type, + local->cont.entrylk.xdata); + break; + default: + break; + } +} + +static int32_t +afr_unlock_partial_lock_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, dict_t *xdata) { afr_local_t *local = NULL; @@ -3366,28 +3469,78 @@ afr_unlock_partial_inodelk_cbk (call_frame_t *frame, void *cookie, priv = this->private; if (op_ret < 0 && op_errno != ENOTCONN) { - loc_gfid (&local->loc, gfid); - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_INODE_UNLOCK_FAIL, - "%s: Failed to unlock %s " - "with lk_owner: %s (%s)", uuid_utoa (gfid), + if (local->fd) + gf_uuid_copy (gfid, local->fd->inode->gfid); + else + loc_gfid (&local->loc, gfid); + gf_msg (this->name, GF_LOG_ERROR, op_errno, + AFR_MSG_UNLOCK_FAIL, + "%s: Failed to unlock %s on %s " + "with lk_owner: %s", uuid_utoa (gfid), + gf_fop_list[local->op], priv->children[child_index]->name, - lkowner_utoa (&frame->root->lk_owner), - strerror (op_errno)); + lkowner_utoa (&frame->root->lk_owner)); } call_count = afr_frame_return (frame); - if (call_count == 0) { - AFR_STACK_UNWIND (inodelk, frame, local->op_ret, - local->op_errno, local->xdata_rsp); - } + if (call_count) + goto out; + if (local->fop_lock_state != AFR_FOP_LOCK_PARALLEL) { + afr_fop_lock_unwind (frame, local->op, local->op_ret, + local->op_errno, local->xdata_rsp); + goto out; + } + /* At least one child is up */ + /* + * Non-blocking locks also need to be serialized. Otherwise there is + * a chance that both the mounts which issued same non-blocking inodelk + * may endup not acquiring the lock on any-brick. + * Ex: Mount1 and Mount2 + * request for full length lock on file f1. Mount1 afr may acquire the + * partial lock on brick-1 and may not acquire the lock on brick-2 + * because Mount2 already got the lock on brick-2, vice versa. Since + * both the mounts only got partial locks, afr treats them as failure in + * gaining the locks and unwinds with EAGAIN errno. + */ + local->op_ret = -1; + local->op_ret = EUCLEAN; + local->fop_lock_state = AFR_FOP_LOCK_SERIAL; + afr_local_replies_wipe (local, priv); + if (local->xdata_rsp) + dict_unref (local->xdata_rsp); + local->xdata_rsp = NULL; + switch (local->op) { + case GF_FOP_INODELK: + case GF_FOP_FINODELK: + local->cont.inodelk.cmd = local->cont.inodelk.in_cmd; + local->cont.inodelk.flock = local->cont.inodelk.in_flock; + if (local->cont.inodelk.xdata) + dict_unref (local->cont.inodelk.xdata); + local->cont.inodelk.xdata = NULL; + if (local->xdata_req) + local->cont.inodelk.xdata = dict_ref (local->xdata_req); + break; + case GF_FOP_ENTRYLK: + case GF_FOP_FENTRYLK: + local->cont.entrylk.cmd = local->cont.entrylk.in_cmd; + if (local->cont.entrylk.xdata) + dict_unref (local->cont.entrylk.xdata); + local->cont.entrylk.xdata = NULL; + if (local->xdata_req) + local->cont.entrylk.xdata = dict_ref (local->xdata_req); + break; + default: + break; + } + afr_serialized_lock_wind (frame, this); +out: return 0; } -int32_t -afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this, - int call_count) +static int32_t +afr_unlock_locks_and_proceed (call_frame_t *frame, xlator_t *this, + int call_count) { int i = 0; afr_private_t *priv = NULL; @@ -3396,7 +3549,25 @@ afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this, local = frame->local; priv = this->private; local->call_count = call_count; - local->cont.inodelk.flock.l_type = F_UNLCK; + switch (local->op) { + case GF_FOP_INODELK: + case GF_FOP_FINODELK: + local->cont.inodelk.flock.l_type = F_UNLCK; + local->cont.inodelk.cmd = F_SETLK; + if (local->cont.inodelk.xdata) + dict_unref (local->cont.inodelk.xdata); + local->cont.inodelk.xdata = NULL; + break; + case GF_FOP_ENTRYLK: + case GF_FOP_FENTRYLK: + local->cont.entrylk.cmd = ENTRYLK_UNLOCK; + if (local->cont.entrylk.xdata) + dict_unref (local->cont.entrylk.xdata); + local->cont.entrylk.xdata = NULL; + break; + default: + break; + } for (i = 0; i < priv->child_count; i++) { if (!local->replies[i].valid) @@ -3405,13 +3576,7 @@ afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this, if (local->replies[i].op_ret == -1) continue; - STACK_WIND_COOKIE (frame, afr_unlock_partial_inodelk_cbk, - (void*) (long) i, - priv->children[i], - priv->children[i]->fops->inodelk, - local->cont.inodelk.volume, - &local->loc, local->cont.inodelk.cmd, - &local->cont.inodelk.flock, 0); + afr_fop_lock_wind (frame, this, i, afr_unlock_partial_lock_cbk); if (!--call_count) break; @@ -3421,23 +3586,27 @@ afr_unlock_inodelks_and_unwind (call_frame_t *frame, xlator_t *this, } int32_t -afr_inodelk_done (call_frame_t *frame, xlator_t *this) +afr_fop_lock_done (call_frame_t *frame, xlator_t *this) { int i = 0; int lock_count = 0; + unsigned char *success = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; local = frame->local; priv = this->private; + success = alloca0(priv->child_count); for (i = 0; i < priv->child_count; i++) { if (!local->replies[i].valid) continue; - if (local->replies[i].op_ret == 0) + if (local->replies[i].op_ret == 0) { lock_count++; + success[i] = 1; + } if (local->op_ret == -1 && local->op_errno == EAGAIN) continue; @@ -3455,20 +3624,29 @@ afr_inodelk_done (call_frame_t *frame, xlator_t *this) local->op_errno = local->replies[i].op_errno; } - if (lock_count && local->cont.inodelk.flock.l_type != F_UNLCK && - (local->op_ret == -1 && local->op_errno == EAGAIN)) { - afr_unlock_inodelks_and_unwind (frame, this, - lock_count); + if (afr_fop_lock_is_unlock (frame) || (lock_count == 0)) + goto unwind; + + if (afr_is_conflicting_lock_present (local->op_ret, local->op_errno)) { + afr_unlock_locks_and_proceed (frame, this, lock_count); + } else if (priv->quorum_count && !afr_has_quorum (success, this)) { + local->fop_lock_state = AFR_FOP_LOCK_QUORUM_FAILED; + local->op_ret = -1; + local->op_errno = afr_quorum_errno (priv); + afr_unlock_locks_and_proceed (frame, this, lock_count); } else { - AFR_STACK_UNWIND (inodelk, frame, local->op_ret, - local->op_errno, local->xdata_rsp); + goto unwind; } return 0; +unwind: + afr_fop_lock_unwind (frame, local->op, local->op_ret, + local->op_errno, local->xdata_rsp); + return 0; } -int -afr_common_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +static int +afr_common_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { afr_local_t *local = NULL; @@ -3492,32 +3670,8 @@ afr_common_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } static int32_t -afr_parallel_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) - -{ - int call_count = 0; - - afr_common_inodelk_cbk (frame, cookie, this, op_ret, op_errno, xdata); - - call_count = afr_frame_return (frame); - if (call_count == 0) - afr_inodelk_done (frame, this); - - return 0; -} - -static gf_boolean_t -afr_is_conflicting_lock_present (int32_t op_ret, int32_t op_errno) -{ - if (op_ret == -1 && op_errno == EAGAIN) - return _gf_true; - return _gf_false; -} - -static int32_t -afr_serialized_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +afr_serialized_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { afr_local_t *local = NULL; @@ -3528,7 +3682,7 @@ afr_serialized_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; priv = this->private; - afr_common_inodelk_cbk (frame, cookie, this, op_ret, op_errno, xdata); + afr_common_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata); for (next_child = child_index + 1; next_child < priv->child_count; next_child++) { @@ -3538,80 +3692,123 @@ afr_serialized_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (afr_is_conflicting_lock_present (op_ret, op_errno) || (next_child == priv->child_count)) { - afr_inodelk_done (frame, this); + afr_fop_lock_done (frame, this); } else { - STACK_WIND_COOKIE (frame, afr_serialized_inodelk_cbk, - (void *) (long) next_child, - priv->children[next_child], - priv->children[next_child]->fops->inodelk, - (const char *)local->cont.inodelk.volume, - &local->loc, local->cont.inodelk.cmd, - &local->cont.inodelk.flock, - local->xdata_req); + afr_fop_lock_wind (frame, this, next_child, + afr_serialized_lock_cbk); } return 0; } static int -afr_parallel_inodelk_wind (call_frame_t *frame, xlator_t *this) +afr_serialized_lock_wind (call_frame_t *frame, xlator_t *this) { afr_private_t *priv = NULL; afr_local_t *local = NULL; - int call_count = 0; int i = 0; priv = this->private; local = frame->local; - call_count = local->call_count; for (i = 0; i < priv->child_count; i++) { - if (!local->child_up[i]) - continue; - STACK_WIND_COOKIE (frame, afr_parallel_inodelk_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->inodelk, - (const char *)local->cont.inodelk.volume, - &local->loc, local->cont.inodelk.cmd, - &local->cont.inodelk.flock, - local->xdata_req); - if (!--call_count) + if (local->child_up[i]) { + afr_fop_lock_wind (frame, this, i, + afr_serialized_lock_cbk); break; + } } return 0; } +static int32_t +afr_parallel_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) + +{ + int call_count = 0; + + afr_common_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata); + + call_count = afr_frame_return (frame); + if (call_count == 0) + afr_fop_lock_done (frame, this); + + return 0; +} + static int -afr_serialized_inodelk_wind (call_frame_t *frame, xlator_t *this) +afr_parallel_lock_wind (call_frame_t *frame, xlator_t *this) { afr_private_t *priv = NULL; afr_local_t *local = NULL; + int call_count = 0; int i = 0; priv = this->private; local = frame->local; + call_count = local->call_count; for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND_COOKIE (frame, afr_serialized_inodelk_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->inodelk, - (const char *)local->cont.inodelk.volume, - &local->loc, local->cont.inodelk.cmd, - &local->cont.inodelk.flock, - local->xdata_req); + if (!local->child_up[i]) + continue; + afr_fop_lock_wind (frame, this, i, afr_parallel_lock_cbk); + if (!--call_count) break; - } } return 0; } -int32_t -afr_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, - struct gf_flock *flock, dict_t *xdata) +static int +afr_fop_handle_lock (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = frame->local; + int op_errno = 0; + + if (!afr_fop_lock_is_unlock (frame)) { + if (!afr_is_consistent_io_possible (local, this->private, + &op_errno)) + goto out; + + switch (local->op) { + case GF_FOP_INODELK: + case GF_FOP_FINODELK: + local->cont.inodelk.cmd = F_SETLK; + break; + case GF_FOP_ENTRYLK: + case GF_FOP_FENTRYLK: + local->cont.entrylk.cmd = ENTRYLK_LOCK_NB; + break; + default: + break; + } + } + + if (local->xdata_req) { + switch (local->op) { + case GF_FOP_INODELK: + case GF_FOP_FINODELK: + local->cont.inodelk.xdata = dict_ref (local->xdata_req); + break; + case GF_FOP_ENTRYLK: + case GF_FOP_FENTRYLK: + local->cont.entrylk.xdata = dict_ref (local->xdata_req); + break; + default: + break; + } + } + + local->fop_lock_state = AFR_FOP_LOCK_PARALLEL; + afr_parallel_lock_wind (frame, this); +out: + return -op_errno; +} + +static int32_t +afr_handle_inodelk (call_frame_t *frame, glusterfs_fop_t fop, + const char *volume, loc_t *loc, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) { afr_local_t *local = NULL; int32_t op_errno = ENOMEM; @@ -3620,154 +3817,90 @@ afr_inodelk (call_frame_t *frame, xlator_t *this, if (!local) goto out; - local->op = GF_FOP_INODELK; - if (!afr_inodelk_is_unlock (cmd, flock) && - !afr_is_consistent_io_possible (local, this->private, &op_errno)) - goto out; + local->op = fop; + if (loc) + loc_copy (&local->loc, loc); + if (fd) + local->fd = fd_ref (fd); - loc_copy (&local->loc, loc); local->cont.inodelk.volume = gf_strdup (volume); if (!local->cont.inodelk.volume) { op_errno = ENOMEM; goto out; } + local->cont.inodelk.in_cmd = cmd; local->cont.inodelk.cmd = cmd; + local->cont.inodelk.in_flock = *flock; local->cont.inodelk.flock = *flock; if (xdata) local->xdata_req = dict_ref (xdata); - /* At least one child is up */ - /* - * Non-blocking locks also need to be serialized. Otherwise there is - * a chance that both the mounts which issued same non-blocking inodelk - * may endup not acquiring the lock on any-brick. - * Ex: Mount1 and Mount2 - * request for full length lock on file f1. Mount1 afr may acquire the - * partial lock on brick-1 and may not acquire the lock on brick-2 - * because Mount2 already got the lock on brick-2, vice versa. Since - * both the mounts only got partial locks, afr treats them as failure in - * gaining the locks and unwinds with EAGAIN errno. - */ - if (flock->l_type == F_UNLCK) { - afr_parallel_inodelk_wind (frame, this); - } else { - afr_serialized_inodelk_wind (frame, this); - } - - return 0; + op_errno = -afr_fop_handle_lock (frame, frame->this); + if (op_errno) + goto out; + return 0; out: - AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL); + afr_fop_lock_unwind (frame, fop, -1, op_errno, NULL); return 0; } - int32_t -afr_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) - +afr_inodelk (call_frame_t *frame, xlator_t *this, + const char *volume, loc_t *loc, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) { - afr_local_t *local = NULL; - int call_count = -1; - - local = frame->local; - - LOCK (&frame->lock); - { - if (op_ret == 0) - local->op_ret = 0; - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) - AFR_STACK_UNWIND (finodelk, frame, local->op_ret, - local->op_errno, xdata); - + afr_handle_inodelk (frame, GF_FOP_INODELK, volume, loc, NULL, cmd, + flock, xdata); return 0; } - int32_t afr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata) { - afr_private_t *priv = NULL; + afr_handle_inodelk (frame, GF_FOP_FINODELK, volume, NULL, fd, cmd, + flock, xdata); + return 0; +} + +static int +afr_handle_entrylk (call_frame_t *frame, glusterfs_fop_t fop, + const char *volume, loc_t *loc, fd_t *fd, + const char *basename, entrylk_cmd cmd, + entrylk_type type, dict_t *xdata) +{ afr_local_t *local = NULL; - int i = 0; - int32_t call_count = 0; int32_t op_errno = ENOMEM; - priv = this->private; - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; - - local->op = GF_FOP_FINODELK; - if (!afr_inodelk_is_unlock (cmd, flock) && - !afr_is_consistent_io_possible (local, this->private, &op_errno)) + local = AFR_FRAME_INIT (frame, op_errno); + if (!local) goto out; - local->cont.inodelk.volume = gf_strdup (volume); - if (!local->cont.inodelk.volume) { + local->op = fop; + if (loc) + loc_copy (&local->loc, loc); + if (fd) + local->fd = fd_ref (fd); + local->cont.entrylk.cmd = cmd; + local->cont.entrylk.in_cmd = cmd; + local->cont.entrylk.type = type; + local->cont.entrylk.volume = gf_strdup (volume); + local->cont.entrylk.basename = gf_strdup (basename); + if (!local->cont.entrylk.volume || !local->cont.entrylk.basename) { op_errno = ENOMEM; goto out; } - - local->fd = fd_ref (fd); - local->cont.inodelk.cmd = cmd; - local->cont.inodelk.flock = *flock; if (xdata) local->xdata_req = dict_ref (xdata); - call_count = local->call_count; - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND (frame, afr_finodelk_cbk, - priv->children[i], - priv->children[i]->fops->finodelk, - volume, fd, cmd, flock, xdata); - - if (!--call_count) - break; - } - } + op_errno = -afr_fop_handle_lock (frame, frame->this); + if (op_errno) + goto out; - return 0; -out: - AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL); return 0; -} - - -int32_t -afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - afr_local_t *local = NULL; - int call_count = -1; - - local = frame->local; - - LOCK (&frame->lock); - { - if (op_ret == 0) - local->op_ret = 0; - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) - AFR_STACK_UNWIND (entrylk, frame, local->op_ret, - local->op_errno, xdata); - +out: + afr_fop_lock_unwind (frame, fop, -1, op_errno, NULL); return 0; } @@ -3776,115 +3909,18 @@ afr_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int i = 0; - int32_t call_count = 0; - int32_t op_errno = 0; - - priv = this->private; - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; - - local->op = GF_FOP_ENTRYLK; - if (!afr_entrylk_is_unlock (cmd) && - !afr_is_consistent_io_possible (local, priv, &op_errno)) - goto out; - - local->cont.entrylk.cmd = cmd; - call_count = local->call_count; - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND (frame, afr_entrylk_cbk, - priv->children[i], - priv->children[i]->fops->entrylk, - volume, loc, basename, cmd, type, xdata); - - if (!--call_count) - break; - } - } - - return 0; -out: - AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL); - + afr_handle_entrylk (frame, GF_FOP_ENTRYLK, volume, loc, NULL, basename, + cmd, type, xdata); return 0; } - - -int -afr_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) - -{ - afr_local_t *local = NULL; - int call_count = -1; - - local = frame->local; - - LOCK (&frame->lock); - { - if (op_ret == 0) - local->op_ret = 0; - - local->op_errno = op_errno; - } - UNLOCK (&frame->lock); - - call_count = afr_frame_return (frame); - - if (call_count == 0) - AFR_STACK_UNWIND (fentrylk, frame, local->op_ret, - local->op_errno, xdata); - - return 0; -} - - int afr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { - afr_private_t *priv = NULL; - afr_local_t *local = NULL; - int i = 0; - int32_t call_count = 0; - int32_t op_errno = ENOMEM; - - priv = this->private; - - local = AFR_FRAME_INIT (frame, op_errno); - if (!local) - goto out; - - local->op = GF_FOP_FENTRYLK; - if (!afr_entrylk_is_unlock (cmd) && - !afr_is_consistent_io_possible (local, priv, &op_errno)) - goto out; - - local->cont.entrylk.cmd = cmd; - call_count = local->call_count; - for (i = 0; i < priv->child_count; i++) { - if (local->child_up[i]) { - STACK_WIND (frame, afr_fentrylk_cbk, - priv->children[i], - priv->children[i]->fops->fentrylk, - volume, fd, basename, cmd, type, xdata); - - if (!--call_count) - break; - } - } - - return 0; -out: - AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL); - + afr_handle_entrylk (frame, GF_FOP_FENTRYLK, volume, NULL, fd, basename, + cmd, type, xdata); return 0; } diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index 233672898f1..1f2a11755bf 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -669,7 +669,7 @@ afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) { gf_msg (this->name, GF_LOG_ERROR, op_errno, - AFR_MSG_INODE_UNLOCK_FAIL, + AFR_MSG_UNLOCK_FAIL, "path=%s gfid=%s: unlock failed on subvolume %s " "with lock owner %s", local->loc.path, loc_gfid_utoa (&(local->loc)), diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h index 5fb81c696d8..02eb206fd08 100644 --- a/xlators/cluster/afr/src/afr-messages.h +++ b/xlators/cluster/afr/src/afr-messages.h @@ -130,11 +130,11 @@ /*! * @messageid 108010 - * @diagnosis Inode unlocks failed on a brick. + * @diagnosis unlocks failed on a brick. * @recommendedaction Error number in the log should give the reason why it * failed. Also observe brick logs for more information. */ -#define AFR_MSG_INODE_UNLOCK_FAIL (GLFS_COMP_BASE_AFR + 10) +#define AFR_MSG_UNLOCK_FAIL (GLFS_COMP_BASE_AFR + 10) /*! * @messageid 108011 diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index cd666b8b690..bcb1100a660 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -136,14 +136,6 @@ afr_needs_changelog_update (afr_local_t *local) return _gf_false; } -static int32_t -afr_quorum_errno (afr_private_t *priv) -{ - if (priv->quorum_reads) - return ENOTCONN; - return EROFS; -} - int __afr_txn_write_fop (call_frame_t *frame, xlator_t *this) { diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 93f4ba3dddc..9bfb5812d8a 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -347,6 +347,11 @@ typedef struct { int readdir_subvol; } afr_fd_ctx_t; +typedef enum { + AFR_FOP_LOCK_PARALLEL, + AFR_FOP_LOCK_SERIAL, + AFR_FOP_LOCK_QUORUM_FAILED, +} afr_fop_lock_state_t; typedef struct _afr_local { glusterfs_fop_t op; @@ -664,11 +669,19 @@ typedef struct _afr_local { struct { char *volume; int32_t cmd; + int32_t in_cmd; + struct gf_flock in_flock; struct gf_flock flock; + void *xdata; } inodelk; struct { + char *volume; + char *basename; + entrylk_cmd in_cmd; entrylk_cmd cmd; + entrylk_type type; + void *xdata; } entrylk; struct { @@ -791,6 +804,7 @@ typedef struct _afr_local { gf_boolean_t need_full_crawl; gf_boolean_t compound; + afr_fop_lock_state_t fop_lock_state; } afr_local_t; @@ -1180,6 +1194,9 @@ int afr_set_in_flight_sb_status (xlator_t *this, afr_local_t *local, inode_t *inode); +int32_t +afr_quorum_errno (afr_private_t *priv); + gf_boolean_t afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv, int32_t *op_errno); |