diff options
-rw-r--r-- | tests/bugs/replicate/bug-1180545.t | 48 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 20 |
2 files changed, 66 insertions, 2 deletions
diff --git a/tests/bugs/replicate/bug-1180545.t b/tests/bugs/replicate/bug-1180545.t new file mode 100644 index 00000000000..748d5defa91 --- /dev/null +++ b/tests/bugs/replicate/bug-1180545.t @@ -0,0 +1,48 @@ +#!/bin/bash + +#Create gfid split-brain of directory and check if conservative merge +#completes successfully. + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd + +TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1} +TEST $CLI volume set $V0 cluster.heal-timeout 60 +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; + +#Create files with alternate brick down. One file has gfid mismatch. +TEST mkdir $M0/DIR + +TEST kill_brick $V0 $H0 $B0/brick1 +TEST touch $M0/DIR/FILE +TEST touch $M0/DIR/file{1..5} +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +TEST kill_brick $V0 $H0 $B0/brick0 +TEST touch $M0/DIR/FILE +TEST touch $M0/DIR/file{6..10} +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +#Trigger heal and verify number of entries in backend +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 + +EXPECT_WITHIN $HEAL_TIMEOUT '2' count_sh_entries $B0/brick0 +EXPECT_WITHIN $HEAL_TIMEOUT '2' count_sh_entries $B0/brick1 +#Two entries for DIR and two for FILE +EXPECT_WITHIN $HEAL_TIMEOUT "4" afr_get_pending_heal_count $V0 +TEST diff <(ls $B0/brick0/DIR) <(ls $B0/brick1/DIR) +cleanup diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index b78bfa99f20..28e332db740 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -503,6 +503,7 @@ afr_selfheal_entry_do_subvol (call_frame_t *frame, xlator_t *this, call_frame_t *iter_frame = NULL; xlator_t *subvol = NULL; afr_private_t *priv = NULL; + gf_boolean_t mismatch = _gf_false; priv = this->private; subvol = priv->children[child]; @@ -532,6 +533,11 @@ afr_selfheal_entry_do_subvol (call_frame_t *frame, xlator_t *this, entry->d_name); AFR_STACK_RESET (iter_frame); + if (ret == -1) { + /* gfid or type mismatch. */ + mismatch = _gf_true; + ret = 0; + } if (ret) break; } @@ -542,6 +548,9 @@ afr_selfheal_entry_do_subvol (call_frame_t *frame, xlator_t *this, } AFR_STACK_DESTROY (iter_frame); + if (mismatch == _gf_true) + /* undo pending will be skipped */ + ret = -1; return ret; } @@ -552,6 +561,7 @@ afr_selfheal_entry_do (call_frame_t *frame, xlator_t *this, fd_t *fd, { int i = 0; afr_private_t *priv = NULL; + gf_boolean_t mismatch = _gf_false; int ret = 0; priv = this->private; @@ -563,14 +573,20 @@ afr_selfheal_entry_do (call_frame_t *frame, xlator_t *this, fd_t *fd, if (i != source && !healed_sinks[i]) continue; ret = afr_selfheal_entry_do_subvol (frame, this, fd, i); + if (ret == -1) { + /* gfid or type mismatch. */ + mismatch = _gf_true; + continue; + } if (ret) break; } + if (mismatch == _gf_true) + /* undo pending will be skipped */ + ret = -1; return ret; } - - static int __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd, unsigned char *locked_on) |