From 8250b0bf29d5423bf6014b45b43a22c6aace5576 Mon Sep 17 00:00:00 2001 From: Richard Wareing Date: Tue, 22 Sep 2015 22:55:40 -0700 Subject: cluster/afr: AFR2 Discovery entry heal flow should only happen on root gfid Summary: - Prevents entry self-heal flow from happening on non-root GFIDs Test Plan: - Run prove -v tests/bugs/fb8149516.t Reviewers: dph, moox, sshreyas Reviewed By: sshreyas Differential Revision: https://phabricator.fb.com/D2470622 Change-Id: Id8559f2cfeb6e1e5c26dc1571854c0fbc0b59e08 Signed-off-by: Jeff Darcy Reviewed-on: https://review.gluster.org/18250 Reviewed-by: Jeff Darcy Tested-by: Jeff Darcy CentOS-regression: Gluster Build System Smoke: Gluster Build System --- tests/bugs/fb8149516.t | 17 +++++++++++++---- xlators/cluster/afr/src/afr-common.c | 7 ++++--- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/tests/bugs/fb8149516.t b/tests/bugs/fb8149516.t index 54372794c6f..3679ab8f93a 100644 --- a/tests/bugs/fb8149516.t +++ b/tests/bugs/fb8149516.t @@ -16,6 +16,8 @@ TEST $CLI volume set $V0 cluster.entry-self-heal off TEST $CLI volume set $V0 cluster.data-self-heal off TEST $CLI volume set $V0 cluster.metadata-self-heal off TEST $CLI volume set $V0 nfs.disable off +TEST $CLI volume set $V0 nfs.write-size 524288 +TEST $CLI volume set $V0 nfs.read-size 524288 TEST $CLI volume start $V0 TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 cd $M0 @@ -28,13 +30,20 @@ TEST kill_brick $V0 $H0 $B0/${V0}2 TEST rm -rf $B0/${V0}2/testfile* TEST rm -rf $B0/${V0}2/.glusterfs -TEST $CLI volume start $V0 force +TEST $CLI volume stop $V0 +sleep 5 +TEST $CLI volume start $V0 EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 2 +EXPECT_WITHIN 30 "0" get_pending_heal_count $V0 # Verify we see all ten files when ls'ing, without the patch this should # return no files and fail. -FILE_LIST=($(\ls $M0)) -TEST "((${#FILE_LIST[@]} == 10))" -EXPECT_WITHIN 30 "0" get_pending_heal_count $V0 +how_many_files () { + \ls $M0 2> /dev/null | wc -l +} +# Once it's triggered, entry self-heal happens asynchronously and might take +# a long-ish time before the missing entries appear, but it does complete. +# To accommodate that, retry for a little while. +EXPECT_WITHIN 20 "10" how_many_files cleanup diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 06860448fc5..94f43f008a5 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1100,15 +1100,16 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this, int error) } local = frame->local; - priv = this->private; + priv = this->private; ret = afr_replies_interpret (frame, this, local->refreshinode, &start_heal); err = afr_inode_refresh_err (frame, this); - if (priv->did_discovery == _gf_false || - (afr_selfheal_enabled (this) && start_heal)) { + if ((ret && afr_selfheal_enabled (this)) || + (priv->did_discovery == _gf_false && + AFR_IS_ROOT_GFID (local->refreshinode->gfid))) { heal_frame = copy_frame (frame); if (!heal_frame) goto refresh_done; -- cgit