diff options
-rw-r--r-- | tests/afr.rc | 11 | ||||
-rwxr-xr-x | tests/bugs/replicate/bug-1015990-rep.t | 14 | ||||
-rwxr-xr-x | tests/bugs/replicate/bug-1015990.t | 30 | ||||
-rw-r--r-- | tests/bugs/replicate/bug-1101647.t | 8 | ||||
-rw-r--r-- | tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t | 57 | ||||
-rw-r--r-- | tests/bugs/replicate/bug-957877.t | 2 | ||||
-rw-r--r-- | tests/bugs/unclassified/bug-874498.t | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 14 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 54 |
10 files changed, 137 insertions, 57 deletions
diff --git a/tests/afr.rc b/tests/afr.rc index 721f24545d5..2aae292314a 100644 --- a/tests/afr.rc +++ b/tests/afr.rc @@ -73,11 +73,10 @@ function is_file_heal_done { function count_sh_entries() { - val1=0 - for g in $(ls $1/.glusterfs/indices/xattrop) - do - val1=$(( val1 + 1 )) - done + ls $1/.glusterfs/indices/xattrop | grep -v "xattrop-" | wc -l +} - echo $val1; +function count_index_entries() +{ + ls $1/.glusterfs/indices/xattrop | wc -l } diff --git a/tests/bugs/replicate/bug-1015990-rep.t b/tests/bugs/replicate/bug-1015990-rep.t index 4e959e6e70e..1b104969d10 100755 --- a/tests/bugs/replicate/bug-1015990-rep.t +++ b/tests/bugs/replicate/bug-1015990-rep.t @@ -32,8 +32,6 @@ sleep 5 for i in {1..100}; do echo "STRING" > $M0/File$i; done -brick_2_sh_entries=$(count_sh_entries $B0/$V0"2") -brick_4_sh_entries=$(count_sh_entries $B0/$V0"4") command_output=$(gluster volume heal $V0 statistics heal-count replica $H0:$B0/$V0"1") @@ -58,19 +56,9 @@ brick_2_entries_count=$count xattrop_count_brick_2=$(count_sh_entries $B0/$V0"2") -##Remove the count of the xattrop-gfid entry count as it does not contribute -##to the number of files to be healed -sub_val=1 -xattrop_count_brick_2=$(($xattrop_count_brick_2-$sub_val)) +EXPECT $brick_2_entries_count echo $xattrop_count_brick_2 -ret=0 -if [ "$xattrop_count_brick_2" -eq "$brick_2_entries_count" ] - then - ret=$(($ret + $sub_val)) -fi - -EXPECT "1" echo $ret ## Finish up TEST $CLI volume stop $V0; EXPECT 'Stopped' volinfo_field $V0 'Status'; diff --git a/tests/bugs/replicate/bug-1015990.t b/tests/bugs/replicate/bug-1015990.t index 48181c00329..a8b12f2c202 100755 --- a/tests/bugs/replicate/bug-1015990.t +++ b/tests/bugs/replicate/bug-1015990.t @@ -54,36 +54,10 @@ done <<< "$command_output" brick_2_entries_count=$(($count-$value)) brick_4_entries_count=$value - xattrop_count_brick_2=$(count_sh_entries $B0/$V0"2") -##Remove the count of the xattrop-gfid entry count as it does not contribute -##to the number of files to be healed - -sub_val=1 -xattrop_count_brick_2=$(($xattrop_count_brick_2-$sub_val)) - xattrop_count_brick_4=$(count_sh_entries $B0/$V0"4") -##Remove xattrop-gfid entry count - -xattrop_count_brick_4=$(($xattrop_count_brick_4-$sub_val)) - - -ret=0 -if [ "$xattrop_count_brick_2" -eq "$brick_2_entries_count" ] - then - ret=$(($ret + $sub_val)) -fi - -EXPECT "1" echo $ret - - -ret=0 -if [ "$xattrop_count_brick_4" -eq "$brick_4_entries_count" ] - then - ret=$(($ret + $sub_val)) -fi - -EXPECT "1" echo $ret +EXPECT $brick_2_entries_count echo $xattrop_count_brick_2 +EXPECT $brick_4_entries_count echo $xattrop_count_brick_4 ## Finish up TEST $CLI volume stop $V0; diff --git a/tests/bugs/replicate/bug-1101647.t b/tests/bugs/replicate/bug-1101647.t index 148af987f20..14e98c88c6b 100644 --- a/tests/bugs/replicate/bug-1101647.t +++ b/tests/bugs/replicate/bug-1101647.t @@ -13,13 +13,13 @@ TEST $CLI volume start $V0; TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 EXPECT_WITHIN 20 "Y" glustershd_up_status -#Create base entry in indices/xattrop and indices/base_indices_holder +#Create base entry in indices/xattrop echo "Data">$M0/file TEST $CLI volume heal $V0 -#Entries from indices/xattrop and indices/base_indices_holder should not be cleared after a heal. -EXPECT 1 count_sh_entries $B0/$V0"1" -EXPECT 1 count_sh_entries $B0/$V0"2" +#Entries from indices/xattrop should not be cleared after a heal. +EXPECT 1 count_index_entries $B0/$V0"1" +EXPECT 1 count_index_entries $B0/$V0"2" TEST kill_brick $V0 $H0 $B0/${V0}2 echo "More data">>$M0/file diff --git a/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t b/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t new file mode 100644 index 00000000000..a476563e31e --- /dev/null +++ b/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t @@ -0,0 +1,57 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc +cleanup; + +#Stale entries in xattrop folder for files which do not need heal must be removed during the next index heal crawl. + +TEST glusterd; +TEST pidof glusterd; +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}; +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST $CLI volume start $V0; +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +TEST `echo hello>$M0/datafile` +TEST touch $M0/mdatafile + +#Create split-brain and reset the afr xattrs, so that we have only the entry inside xattrop folder. +#This is to simulate the case where the brick crashed just before pre-op happened, but index xlator created the entry inside xattrop folder. + +#Create data, metadata SB. +TEST kill_brick $V0 $H0 $B0/$V0"1" +TEST stat $M0/datafile +TEST `echo append>>$M0/datafile` +TEST chmod +x $M0/mdatafile +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 1 +TEST kill_brick $V0 $H0 $B0/$V0"0" +TEST stat $M0/datafile +TEST `echo append>>$M0/datafile` +TEST chmod +x $M0/mdatafile +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 0 +TEST ! cat $M0/datafile + +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_in_shd $V0 1 + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT '2' echo $(count_sh_entries $B0/$V0"0") +EXPECT_WITHIN $HEAL_TIMEOUT '2' echo $(count_sh_entries $B0/$V0"1") + +#Reset xattrs and trigger heal. +TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}0/datafile +TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}1/datafile + +TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}0/mdatafile +TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}1/mdatafile + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "0" afr_get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT '0' echo $(count_sh_entries $B0/$V0"0") +EXPECT_WITHIN $HEAL_TIMEOUT '0' echo $(count_sh_entries $B0/$V0"1") + +cleanup diff --git a/tests/bugs/replicate/bug-957877.t b/tests/bugs/replicate/bug-957877.t index 12901723880..bcce7e3c9e7 100644 --- a/tests/bugs/replicate/bug-957877.t +++ b/tests/bugs/replicate/bug-957877.t @@ -23,7 +23,7 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 TEST $CLI volume heal $V0 # Wait for self-heal to complete -EXPECT_WITHIN $HEAL_TIMEOUT '1' count_sh_entries $BRICK; +EXPECT_WITHIN $HEAL_TIMEOUT '0' count_sh_entries $BRICK; TEST getfattr -n "user.foo" $B0/${V0}0/f1; diff --git a/tests/bugs/unclassified/bug-874498.t b/tests/bugs/unclassified/bug-874498.t index 8d409d033f7..f362fd601eb 100644 --- a/tests/bugs/unclassified/bug-874498.t +++ b/tests/bugs/unclassified/bug-874498.t @@ -56,7 +56,7 @@ TEST $CLI volume heal $V0 ##Expected number of entries are 0 in the .glusterfs/indices/xattrop directory -EXPECT_WITHIN $HEAL_TIMEOUT '1' count_sh_entries $FILEN; +EXPECT_WITHIN $HEAL_TIMEOUT '0' count_sh_entries $FILEN; TEST $CLI volume stop $V0; TEST $CLI volume delete $V0; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 92ecb7fb99b..f7cc202d4d1 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -4574,7 +4574,7 @@ afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc) ret = afr_selfheal_do (frame, this, loc->gfid); - if (ret == 1) { + if (ret == 1 || ret == 2) { ret = dict_set_str (dict, "sh-fail-msg", "File not in split-brain"); if (ret) diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index fd450be0890..2441f413f3e 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1330,6 +1330,11 @@ afr_selfheal_do (call_frame_t *frame, xlator_t *this, uuid_t gfid) if (ret) goto out; + if (!(data_selfheal || metadata_selfheal || entry_selfheal)) { + ret = 2; + goto out; + } + if (data_selfheal) data_ret = afr_selfheal_data (frame, this, inode); @@ -1358,9 +1363,12 @@ out: return ret; } /* - * This is the entry point for healing a given GFID - * The function returns 0 if self-heal was successful, appropriate errno - * in case of a failure and 1 in case self-heal was never needed on the gfid. + * This is the entry point for healing a given GFID. The return values for this + * function are as follows: + * '0' if the self-heal is successful + * '1' if the afr-xattrs are non-zero (due to on-going IO) and no heal is needed + * '2' if the afr-xattrs are all-zero and no heal is needed + * $errno if the heal on the gfid failed. */ int diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index cb5bf6ce197..c38440b4c9b 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -238,6 +238,53 @@ afr_shd_index_purge (xlator_t *subvol, inode_t *inode, char *name) return ret; } +void +afr_shd_zero_xattrop (xlator_t *this, uuid_t gfid) +{ + + call_frame_t *frame = NULL; + inode_t *inode = NULL; + afr_private_t *priv = NULL; + dict_t *xattr = NULL; + int ret = 0; + int i = 0; + int raw[AFR_NUM_CHANGE_LOGS] = {0}; + + priv = this->private; + frame = afr_frame_create (this); + if (!frame) + goto out; + inode = afr_inode_find (this, gfid); + if (!inode) + goto out; + xattr = dict_new(); + if (!xattr) + goto out; + ret = dict_set_static_bin (xattr, AFR_DIRTY, raw, + sizeof(int) * AFR_NUM_CHANGE_LOGS); + if (ret) + goto out; + for (i = 0; i < priv->child_count; i++) { + ret = dict_set_static_bin (xattr, priv->pending_key[i], raw, + sizeof(int) * AFR_NUM_CHANGE_LOGS); + if (ret) + goto out; + } + + /*Send xattrop to all bricks. Doing a lookup to see if bricks are up or + * has valid repies for this gfid seems a bit of an overkill.*/ + for (i = 0; i < priv->child_count; i++) + afr_selfheal_post_op (frame, this, inode, i, xattr); + +out: + if (frame) + AFR_STACK_DESTROY (frame); + if (inode) + inode_unref (inode); + if (xattr) + dict_unref (xattr); + return; +} int afr_shd_selfheal_name (struct subvol_healer *healer, int child, uuid_t parent, @@ -369,6 +416,13 @@ afr_shd_index_heal (xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, if (ret == -ENOENT || ret == -ESTALE) afr_shd_index_purge (subvol, parent->inode, entry->d_name); + if (ret == 2) + /* If bricks crashed in pre-op after creating indices/xattrop + * link but before setting afr changelogs, we end up with stale + * xattrop links but zero changelogs. Remove such entries by + * sending a post-op with zero changelogs. + */ + afr_shd_zero_xattrop (healer->this, gfid); return 0; } |