diff options
-rw-r--r-- | tests/afr.rc | 6 | ||||
-rw-r--r-- | tests/basic/afr/afr-anon-inode-no-quorum.t | 63 | ||||
-rw-r--r-- | tests/basic/afr/afr-anon-inode.t | 114 | ||||
-rw-r--r-- | tests/basic/afr/entry-self-heal-anon-dir-off.t | 459 | ||||
-rw-r--r-- | tests/basic/afr/rename-data-loss.t | 72 | ||||
-rw-r--r-- | tests/bugs/replicate/bug-1744548-heal-timeout.t | 6 | ||||
-rwxr-xr-x | tests/features/trash.t | 74 | ||||
-rw-r--r-- | tests/volume.rc | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 46 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 12 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 182 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 205 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-name.c | 33 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 5 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 178 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.h | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 41 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 11 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 39 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 5 |
20 files changed, 1427 insertions, 128 deletions
diff --git a/tests/afr.rc b/tests/afr.rc index 5fc7fa1898d..241789903ba 100644 --- a/tests/afr.rc +++ b/tests/afr.rc @@ -115,3 +115,9 @@ function afr_private_key_value() #xargs at the end will strip leading spaces grep -E "^${key} = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'=' | xargs } + +function afr_anon_entry_count() +{ + local b=$1 + ls $b/.glusterfs-anonymous-inode* | wc -l +} diff --git a/tests/basic/afr/afr-anon-inode-no-quorum.t b/tests/basic/afr/afr-anon-inode-no-quorum.t new file mode 100644 index 00000000000..896ba0c9b2c --- /dev/null +++ b/tests/basic/afr/afr-anon-inode-no-quorum.t @@ -0,0 +1,63 @@ +#!/bin/bash + +#Test that anon-inode entry is not cleaned up as long as there exists at least +#one valid entry +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume heal $V0 disable +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.readdir-ahead off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 + +TEST touch $M0/a $M0/b + +gfid_a=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a)) +gfid_b=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b)) +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST mv $M0/a $M0/a-new +TEST mv $M0/b $M0/b-new + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST ! ls $M0/a +TEST ! ls $M0/b +anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode) +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b +#Make sure index heal doesn't happen after enabling heal +TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1 +TEST rm -f $B0/${V0}1/.glusterfs/indices/xattrop/* +TEST $CLI volume heal $V0 enable +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +#Allow time for a scan +sleep 5 +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a +TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b +inum_b=$(STAT_INO $B0/${V0}0/$anon_inode_name/$gfid_b) +TEST rm -f $M0/a-new +TEST stat $M0/b-new + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1 +EXPECT "$inum_b" STAT_INO $B0/${V0}0/b-new + +cleanup diff --git a/tests/basic/afr/afr-anon-inode.t b/tests/basic/afr/afr-anon-inode.t new file mode 100644 index 00000000000..f4cf37a2fa0 --- /dev/null +++ b/tests/basic/afr/afr-anon-inode.t @@ -0,0 +1,114 @@ +#!/bin/bash +#Tests that afr-anon-inode test cases work fine as expected +#These are cases where in entry-heal/name-heal we dont know entry for an inode +#so these inodes are kept in a special directory + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2} +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; +EXPECT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" +TEST $CLI volume set $V0 cluster.use-anonymous-inode no +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" +TEST $CLI volume set $V0 cluster.use-anonymous-inode yes +EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode" +TEST mkdir -p $M0/d1/b $M0/d2/a +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST mv $M0/d2/a $M0/d1 +TEST mv $M0/d1/b $M0/d2 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode) +TEST [[ -d $B0/${V0}1/$anon_inode_name ]] +TEST [[ -d $B0/${V0}2/$anon_inode_name ]] +anon_gfid=$(gf_get_gfid_xattr $B0/${V0}0/$anon_inode_name) +EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}1/$anon_inode_name +EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}2/$anon_inode_name + +TEST ! ls $M0/$anon_inode_name +EXPECT "^4$" echo $(ls -a $M0 | wc -l) + +#Test purging code path by shd +TEST $CLI volume heal $V0 disable +TEST mkdir $M0/l0 $M0/l1 $M0/l2 +TEST touch $M0/del-file $M0/del-file-nolink $M0/l0/file +TEST ln $M0/del-file $M0/del-file-link +TEST ln $M0/l0/file $M0/l1/file-link1 +TEST ln $M0/l0/file $M0/l2/file-link2 +TEST mkdir -p $M0/del-recursive-dir/d1 + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST rm -f $M0/del-file $M0/del-file-nolink +TEST rm -rf $M0/del-recursive-dir +TEST mv $M0/d1/a $M0/d2 +TEST mv $M0/l0/file $M0/l0/renamed-file +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 0 + +nolink_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file-nolink)) +link_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file)) +dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-recursive-dir)) +rename_dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/d1/a)) +rename_file_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/l0/file)) +TEST ! stat $M0/del-file +TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid +TEST ! stat $M0/del-file-nolink +TEST ! stat $B0/${V0}0/$anon_inode_name/$nolink_gfid +TEST ! stat $M0/del-recursive-dir +TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid +TEST ! stat $M0/d1/a +TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid +TEST ! stat $M0/l0/file +TEST stat $B0/${V0}0/$anon_inode_name/$rename_file_gfid + +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST mv $M0/l1/file-link1 $M0/l1/renamed-file-link1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1 +TEST ! stat $M0/l1/file-link1 +TEST stat $B0/${V0}1/$anon_inode_name/$rename_file_gfid + +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST mv $M0/l2/file-link2 $M0/l2/renamed-file-link2 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 2 +TEST ! stat $M0/l2/file-link2 +TEST stat $B0/${V0}2/$anon_inode_name/$rename_file_gfid + +#Simulate only anon-inodes present in all bricks +TEST rm -f $M0/l0/renamed-file $M0/l1/renamed-file-link1 $M0/l2/renamed-file-link2 + +#Test that shd doesn't cleanup anon-inodes when some bricks are down +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST $CLI volume heal $V0 enable +$CLI volume heal $V0 +sleep 5 #Allow time for completion of one scan +TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid +TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid +TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid +rename_dir_inum=$(STAT_INO $B0/${V0}0/$anon_inode_name/$rename_dir_gfid) + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}2 + +#Test that rename indeed happened instead of rmdir/mkdir +renamed_dir_inum=$(STAT_INO $B0/${V0}0/d2/a) +EXPECT "$rename_dir_inum" echo $renamed_dir_inum +cleanup; diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t new file mode 100644 index 00000000000..7bb6ee14193 --- /dev/null +++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t @@ -0,0 +1,459 @@ +#!/bin/bash + +#This file checks if missing entry self-heal and entry self-heal are working +#as expected. +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +function get_file_type { + stat -c "%a:%F:%g:%t:%T:%u" $1 +} + +function diff_dirs { + diff <(ls $1 | sort) <(ls $2 | sort) +} + +function heal_status { + local f1_path="${1}/${3}" + local f2_path="${2}/${3}" + local insync="" + diff_dirs $f1_path $f2_path + if [ $? -eq 0 ]; + then + insync="Y" + else + insync="N" + fi + local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path) + local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path) + local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path) + local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path) + local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path) + local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path) + if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi + if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi + if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi + if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi + if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi + if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi + echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2} +} + +function is_heal_done { + local zero_xattr="000000000000000000000000" + if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ]; + then + echo "Y" + else + echo "N" + fi +} + +function print_pending_heals { + local result=":" + for i in "$@"; + do + if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ]; + then + result="$result:$i" + fi + done +#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names + if [ $result == ":" ]; then result="~"; fi + echo $result +} + +zero_xattr="000000000000000000000000" +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume heal $V0 disable +TEST $CLI volume set $V0 cluster.use-anonymous-inode off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 performance.readdir-ahead off +TEST $CLI volume set $V0 performance.open-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.metadata-self-heal on +TEST $CLI volume set $V0 cluster.entry-self-heal on +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0 +cd $M0 +#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens +#spb is split-brain, fool is all fool + +#source_self_accusing means there exists source and a sink which self-accuses. +#This simulates failures where fops failed on the bricks without it going down. +#Something like EACCESS/EDQUOT etc + +TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing +TEST mkfifo source_deletions_heal/fifo +TEST mknod source_deletions_heal/block b 4 5 +TEST mknod source_deletions_heal/char c 1 5 +TEST touch source_deletions_heal/file +TEST ln -s source_deletions_heal/file source_deletions_heal/slink +TEST mkdir source_deletions_heal/dir1 +TEST mkdir source_deletions_heal/dir1/dir2 + +TEST mkfifo source_deletions_me/fifo +TEST mknod source_deletions_me/block b 4 5 +TEST mknod source_deletions_me/char c 1 5 +TEST touch source_deletions_me/file +TEST ln -s source_deletions_me/file source_deletions_me/slink +TEST mkdir source_deletions_me/dir1 +TEST mkdir source_deletions_me/dir1/dir2 + +TEST mkfifo source_self_accusing/fifo +TEST mknod source_self_accusing/block b 4 5 +TEST mknod source_self_accusing/char c 1 5 +TEST touch source_self_accusing/file +TEST ln -s source_self_accusing/file source_self_accusing/slink +TEST mkdir source_self_accusing/dir1 +TEST mkdir source_self_accusing/dir1/dir2 + +TEST kill_brick $V0 $H0 $B0/${V0}0 + +TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0 +TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1 +TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1 +TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1 + +#Test that the files are deleted +TEST ! stat $B0/${V0}1/source_deletions_heal/fifo +TEST ! stat $B0/${V0}1/source_deletions_heal/block +TEST ! stat $B0/${V0}1/source_deletions_heal/char +TEST ! stat $B0/${V0}1/source_deletions_heal/file +TEST ! stat $B0/${V0}1/source_deletions_heal/slink +TEST ! stat $B0/${V0}1/source_deletions_heal/dir1 +TEST ! stat $B0/${V0}1/source_deletions_me/fifo +TEST ! stat $B0/${V0}1/source_deletions_me/block +TEST ! stat $B0/${V0}1/source_deletions_me/char +TEST ! stat $B0/${V0}1/source_deletions_me/file +TEST ! stat $B0/${V0}1/source_deletions_me/slink +TEST ! stat $B0/${V0}1/source_deletions_me/dir1 +TEST ! stat $B0/${V0}1/source_self_accusing/fifo +TEST ! stat $B0/${V0}1/source_self_accusing/block +TEST ! stat $B0/${V0}1/source_self_accusing/char +TEST ! stat $B0/${V0}1/source_self_accusing/file +TEST ! stat $B0/${V0}1/source_self_accusing/slink +TEST ! stat $B0/${V0}1/source_self_accusing/dir1 + + +TEST mkfifo source_creations_heal/fifo +TEST mknod source_creations_heal/block b 4 5 +TEST mknod source_creations_heal/char c 1 5 +TEST touch source_creations_heal/file +TEST ln -s source_creations_heal/file source_creations_heal/slink +TEST mkdir source_creations_heal/dir1 +TEST mkdir source_creations_heal/dir1/dir2 + +TEST mkfifo source_creations_me/fifo +TEST mknod source_creations_me/block b 4 5 +TEST mknod source_creations_me/char c 1 5 +TEST touch source_creations_me/file +TEST ln -s source_creations_me/file source_creations_me/slink +TEST mkdir source_creations_me/dir1 +TEST mkdir source_creations_me/dir1/dir2 + +$CLI volume stop $V0 + +#simulate fool fool scenario for fool_* dirs +setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me} +setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me} +setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me} + +#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty +setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me} +setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me} + +$CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/${V0}1 + +TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1 + +$CLI volume stop $V0 + +#simulate fool fool scenario for fool_* dirs +setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me} +setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me} +setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me} + +#simulate self-accusing for source_self_accusing +TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing + +$CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +# Check if conservative merges happened correctly on _me_ dirs +TEST stat spb_me_heal/1 +TEST stat $B0/${V0}0/spb_me_heal/1 +TEST stat $B0/${V0}1/spb_me_heal/1 + +TEST stat spb_me_heal/0 +TEST stat $B0/${V0}0/spb_me_heal/0 +TEST stat $B0/${V0}1/spb_me_heal/0 + +TEST stat fool_me/1 +TEST stat $B0/${V0}0/fool_me/1 +TEST stat $B0/${V0}1/fool_me/1 + +TEST stat fool_me/0 +TEST stat $B0/${V0}0/fool_me/0 +TEST stat $B0/${V0}1/fool_me/0 + +TEST stat v1_fool_me/0 +TEST stat $B0/${V0}0/v1_fool_me/0 +TEST stat $B0/${V0}1/v1_fool_me/0 + +TEST stat v1_fool_me/1 +TEST stat $B0/${V0}0/v1_fool_me/1 +TEST stat $B0/${V0}1/v1_fool_me/1 + +TEST stat v1_dirty_me/0 +TEST stat $B0/${V0}0/v1_dirty_me/0 +TEST stat $B0/${V0}1/v1_dirty_me/0 + +#Check if files that have gfid-mismatches in _me_ are giving EIO +TEST ! stat spb_me/0 + +#Check if stale files are deleted on access +TEST ! stat source_deletions_me/fifo +TEST ! stat $B0/${V0}0/source_deletions_me/fifo +TEST ! stat $B0/${V0}1/source_deletions_me/fifo +TEST ! stat source_deletions_me/block +TEST ! stat $B0/${V0}0/source_deletions_me/block +TEST ! stat $B0/${V0}1/source_deletions_me/block +TEST ! stat source_deletions_me/char +TEST ! stat $B0/${V0}0/source_deletions_me/char +TEST ! stat $B0/${V0}1/source_deletions_me/char +TEST ! stat source_deletions_me/file +TEST ! stat $B0/${V0}0/source_deletions_me/file +TEST ! stat $B0/${V0}1/source_deletions_me/file +TEST ! stat source_deletions_me/file +TEST ! stat $B0/${V0}0/source_deletions_me/file +TEST ! stat $B0/${V0}1/source_deletions_me/file +TEST ! stat source_deletions_me/dir1/dir2 +TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2 +TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2 +TEST ! stat source_deletions_me/dir1 +TEST ! stat $B0/${V0}0/source_deletions_me/dir1 +TEST ! stat $B0/${V0}1/source_deletions_me/dir1 + +#Test if the files created as part of access are healed correctly +r=$(get_file_type source_creations_me/fifo) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo +TEST [ -p source_creations_me/fifo ] + +r=$(get_file_type source_creations_me/block) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block +TEST [ -b source_creations_me/block ] + +r=$(get_file_type source_creations_me/char) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char +TEST [ -c source_creations_me/char ] + +r=$(get_file_type source_creations_me/file) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file +TEST [ -f source_creations_me/file ] + +r=$(get_file_type source_creations_me/slink) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink +TEST [ -h source_creations_me/slink ] + +r=$(get_file_type source_creations_me/dir1/dir2) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2 +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2 +TEST [ -d source_creations_me/dir1/dir2 ] + +r=$(get_file_type source_creations_me/dir1) +EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1 +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1 +TEST [ -d source_creations_me/dir1 ] + +#Trigger heal and check _heal dirs are healed properly +#Trigger change in event generation number. That way inodes would get refreshed during lookup +TEST kill_brick $V0 $H0 $B0/${V0}1 +$CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +TEST stat spb_heal +TEST stat spb_me_heal +TEST stat fool_heal +TEST stat fool_me +TEST stat v1_fool_heal +TEST stat v1_fool_me +TEST stat source_deletions_heal +TEST stat source_deletions_me +TEST stat source_self_accusing +TEST stat source_creations_heal +TEST stat source_creations_me +TEST stat v1_dirty_heal +TEST stat v1_dirty_me +TEST $CLI volume stop $V0 +TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/* + +$CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +#Create base entry in indices/xattrop +echo "Data" > $M0/FILE +rm -f $M0/FILE +EXPECT "1" count_index_entries $B0/${V0}0 +EXPECT "1" count_index_entries $B0/${V0}1 + +TEST $CLI volume stop $V0; + +#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal +create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1 + +$CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 + +$CLI volume heal $V0 enable +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 + +TEST $CLI volume heal $V0; +EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing + +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal +EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me + +#Don't access the files/dirs from mount point as that may cause self-heals +# Check if conservative merges happened correctly on heal dirs +TEST stat $B0/${V0}0/spb_heal/1 +TEST stat $B0/${V0}1/spb_heal/1 + +TEST stat $B0/${V0}0/spb_heal/0 +TEST stat $B0/${V0}1/spb_heal/0 + +TEST stat $B0/${V0}0/fool_heal/1 +TEST stat $B0/${V0}1/fool_heal/1 + +TEST stat $B0/${V0}0/fool_heal/0 +TEST stat $B0/${V0}1/fool_heal/0 + +TEST stat $B0/${V0}0/v1_fool_heal/0 +TEST stat $B0/${V0}1/v1_fool_heal/0 + +TEST stat $B0/${V0}0/v1_fool_heal/1 +TEST stat $B0/${V0}1/v1_fool_heal/1 + +TEST stat $B0/${V0}0/v1_dirty_heal/0 +TEST stat $B0/${V0}1/v1_dirty_heal/0 + +#Check if files that have gfid-mismatches in spb are giving EIO +TEST ! stat spb/0 + +#Check if stale files are deleted on access +TEST ! stat $B0/${V0}0/source_deletions_heal/fifo +TEST ! stat $B0/${V0}1/source_deletions_heal/fifo +TEST ! stat $B0/${V0}0/source_deletions_heal/block +TEST ! stat $B0/${V0}1/source_deletions_heal/block +TEST ! stat $B0/${V0}0/source_deletions_heal/char +TEST ! stat $B0/${V0}1/source_deletions_heal/char +TEST ! stat $B0/${V0}0/source_deletions_heal/file +TEST ! stat $B0/${V0}1/source_deletions_heal/file +TEST ! stat $B0/${V0}0/source_deletions_heal/file +TEST ! stat $B0/${V0}1/source_deletions_heal/file +TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2 +TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2 +TEST ! stat $B0/${V0}0/source_deletions_heal/dir1 +TEST ! stat $B0/${V0}1/source_deletions_heal/dir1 + +#Check if stale files are deleted on access +TEST ! stat $B0/${V0}0/source_self_accusing/fifo +TEST ! stat $B0/${V0}1/source_self_accusing/fifo +TEST ! stat $B0/${V0}0/source_self_accusing/block +TEST ! stat $B0/${V0}1/source_self_accusing/block +TEST ! stat $B0/${V0}0/source_self_accusing/char +TEST ! stat $B0/${V0}1/source_self_accusing/char +TEST ! stat $B0/${V0}0/source_self_accusing/file +TEST ! stat $B0/${V0}1/source_self_accusing/file +TEST ! stat $B0/${V0}0/source_self_accusing/file +TEST ! stat $B0/${V0}1/source_self_accusing/file +TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2 +TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2 +TEST ! stat $B0/${V0}0/source_self_accusing/dir1 +TEST ! stat $B0/${V0}1/source_self_accusing/dir1 + +#Test if the files created as part of full self-heal correctly +r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo +TEST [ -p $B0/${V0}0/source_creations_heal/fifo ] +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block +EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block + +r=$(get_file_type $B0/${V0}0/source_creations_heal/block) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block + +r=$(get_file_type $B0/${V0}0/source_creations_heal/char) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char +EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char + +r=$(get_file_type $B0/${V0}0/source_creations_heal/file) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file +TEST [ -f $B0/${V0}0/source_creations_heal/file ] + +r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink +TEST [ -h $B0/${V0}0/source_creations_heal/slink ] + +r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2 +TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ] + +r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1) +EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1 +TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ] + +cd - + +#Anonymous directory shouldn't be created +TEST mkdir $M0/rename-dir +before_rename=$(STAT_INO $B0/${V0}1/rename-dir) +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST mv $M0/rename-dir $M0/new-name +TEST $CLI volume start $V0 force +#'spb' is in split-brain so pending-heal-count will be 2 +EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0 +after_rename=$(STAT_INO $B0/${V0}1/new-name) +EXPECT "0" echo $(ls -a $B0/${V0}0/ | grep anonymous-inode | wc -l) +EXPECT "0" echo $(ls -a $B0/${V0}1/ | grep anonymous-inode | wc -l) +EXPECT_NOT "$before_rename" echo $after_rename +cleanup diff --git a/tests/basic/afr/rename-data-loss.t b/tests/basic/afr/rename-data-loss.t new file mode 100644 index 00000000000..256ee2aafce --- /dev/null +++ b/tests/basic/afr/rename-data-loss.t @@ -0,0 +1,72 @@ +#!/bin/bash +#Self-heal tests +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1} +TEST $CLI volume set $V0 write-behind off +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 data-self-heal off +TEST $CLI volume set $V0 metadata-self-heal off +TEST $CLI volume set $V0 entry-self-heal off +TEST $CLI volume start $V0 +EXPECT 'Started' volinfo_field $V0 'Status' +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; + +cd $M0 +TEST `echo "line1" >> file1` +TEST mkdir dir1 +TEST mkdir dir2 +TEST mkdir -p dir1/dira/dirb +TEST `echo "line1">>dir1/dira/dirb/file1` +TEST mkdir delete_me +TEST `echo "line1" >> delete_me/file1` + +#brick0 has witnessed the second write while brick1 is down. +TEST kill_brick $V0 $H0 $B0/brick1 +TEST `echo "line2" >> file1` +TEST `echo "line2" >> dir1/dira/dirb/file1` +TEST `echo "line2" >> delete_me/file1` + +#Toggle the bricks that are up/down. +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +TEST kill_brick $V0 $H0 $B0/brick0 + +#Rename when the 'source' brick0 for data-selfheals is down. +mv file1 file2 +mv dir1/dira dir2 + +#Delete a dir when brick0 is down. +rm -rf delete_me +cd - + +#Bring everything up and trigger heal +TEST $CLI volume set $V0 self-heal-daemon on +TEST $CLI volume start $V0 force +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick1 + +#Remount to avoid reading from caches +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; +EXPECT "line2" tail -1 $M0/file2 +EXPECT "line2" tail -1 $M0/dir2/dira/dirb/file1 +TEST ! stat $M0/delete_me/file1 +TEST ! stat $M0/delete_me + +anon_inode_name=$(ls -a $B0/brick0 | grep glusterfs-anonymous-inode) +TEST [[ -d $B0/brick0/$anon_inode_name ]] +TEST [[ -d $B0/brick1/$anon_inode_name ]] +cleanup diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t index c208112c8b0..011535066f9 100644 --- a/tests/bugs/replicate/bug-1744548-heal-timeout.t +++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t @@ -25,14 +25,14 @@ TEST ! $CLI volume heal $V0 TEST $CLI volume profile $V0 start TEST $CLI volume profile $V0 info clear TEST $CLI volume heal $V0 enable -# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes -EXPECT_WITHIN $HEAL_TIMEOUT "^333$" get_cumulative_opendir_count +# Each brick does 4 opendirs, corresponding to dirty, xattrop and entry-changes, anonymous-inode +EXPECT_WITHIN 4 "^444$" get_cumulative_opendir_count # Check that a change in heal-timeout is honoured immediately. TEST $CLI volume set $V0 cluster.heal-timeout 5 sleep 10 # Two crawls must have happened. -EXPECT_WITHIN $HEAL_TIMEOUT "^999$" get_cumulative_opendir_count +EXPECT_WITHIN $CHILD_UP_TIMEOUT "^121212$" get_cumulative_opendir_count # shd must not heal if it is disabled and heal-timeout is changed. TEST $CLI volume heal $V0 disable diff --git a/tests/features/trash.t b/tests/features/trash.t index 472e909e567..da5b50bc85a 100755 --- a/tests/features/trash.t +++ b/tests/features/trash.t @@ -94,105 +94,105 @@ wildcard_not_exists() { if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi } -# testing glusterd [1-3] +# testing glusterd TEST glusterd TEST pidof glusterd TEST $CLI volume info -# creating distributed volume [4] +# creating distributed volume TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2} -# checking volume status [5-7] +# checking volume status EXPECT "$V0" volinfo_field $V0 'Volume Name' EXPECT 'Created' volinfo_field $V0 'Status' EXPECT '2' brick_count $V0 -# test without enabling trash translator [8] +# test without enabling trash translator TEST start_vol $V0 $M0 -# test on enabling trash translator [9-10] +# test on enabling trash translator TEST $CLI volume set $V0 features.trash on EXPECT 'on' volinfo_field $V0 'features.trash' -# files directly under mount point [11] +# files directly under mount point create_files $M0/file1 $M0/file2 TEST file_exists $V0 file1 file2 -# perform unlink [12] +# perform unlink TEST unlink_op file1 -# perform truncate [13] +# perform truncate TEST truncate_op file2 4 -# create files directory hierarchy and check [14] +# create files directory hierarchy and check mkdir -p $M0/1/2/3 create_files $M0/1/2/3/foo1 $M0/1/2/3/foo2 TEST file_exists $V0 1/2/3/foo1 1/2/3/foo2 -# perform unlink [15] +# perform unlink TEST unlink_op 1/2/3/foo1 -# perform truncate [16] +# perform truncate TEST truncate_op 1/2/3/foo2 4 # create a directory for eliminate pattern mkdir $M0/a -# set the eliminate pattern [17-18] +# set the eliminate pattern TEST $CLI volume set $V0 features.trash-eliminate-path /a EXPECT '/a' volinfo_field $V0 'features.trash-eliminate-path' -# create two files and check [19] +# create two files and check create_files $M0/a/test1 $M0/a/test2 TEST file_exists $V0 a/test1 a/test2 -# remove from eliminate pattern [20] +# remove from eliminate pattern rm -f $M0/a/test1 EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test1* -# truncate from eliminate path [21-23] +# truncate from eliminate path truncate -s 2 $M0/a/test2 TEST [ -e $M0/a/test2 ] TEST [ `ls -l $M0/a/test2 | awk '{print $5}'` -eq 2 ] EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test2* -# set internal op on [24-25] +# set internal op on TEST $CLI volume set $V0 features.trash-internal-op on EXPECT 'on' volinfo_field $V0 'features.trash-internal-op' -# again create two files and check [26] +# again create two files and check create_files $M0/inop1 $M0/inop2 TEST file_exists $V0 inop1 inop2 -# perform unlink [27] +# perform unlink TEST unlink_op inop1 -# perform truncate [28] +# perform truncate TEST truncate_op inop2 4 -# remove one brick and restart the volume [28-31] +# remove one brick and restart the volume TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 TEST $CLI volume stop $V0 TEST start_vol $V0 $M0 $M0/.trashcan -# again create two files and check [33] +# again create two files and check create_files $M0/rebal1 $M0/rebal2 TEST file_exists $V0 rebal1 rebal2 -# add one brick [34-35] +# add one brick TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3 TEST [ -d $B0/${V0}3 ] -# perform rebalance [36] +# perform rebalance TEST $CLI volume rebalance $V0 start force EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed #Find out which file was migrated to the new brick file_name=$(ls $B0/${V0}3/rebal*| xargs basename) -# check whether rebalance was succesful [37-40] +# check whether rebalance was succesful EXPECT "Y" wildcard_exists $B0/${V0}3/$file_name* EXPECT "Y" wildcard_exists $B0/${V0}1/.trashcan/internal_op/$file_name* @@ -201,52 +201,42 @@ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 # force required in case rebalance is not over TEST $CLI volume stop $V0 force -# create a replicated volume [41] +# create a replicated volume TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{1,2} -# checking volume status [42-45] +# checking volume status EXPECT "$V1" volinfo_field $V1 'Volume Name' EXPECT 'Replicate' volinfo_field $V1 'Type' EXPECT 'Created' volinfo_field $V1 'Status' EXPECT '2' brick_count $V1 -# enable trash with options and start the replicate volume by disabling automatic self-heal [46-50] +# enable trash with options and start the replicate volume by disabling automatic self-heal TEST $CLI volume set $V1 features.trash on TEST $CLI volume set $V1 features.trash-internal-op on EXPECT 'on' volinfo_field $V1 'features.trash' EXPECT 'on' volinfo_field $V1 'features.trash-internal-op' TEST start_vol $V1 $M1 $M1/.trashcan -# mount and check for trash directory [51] +# mount and check for trash directory TEST [ -d $M1/.trashcan/internal_op ] -# create a file and check [52] +# create a file and check touch $M1/self TEST [ -e $B0/${V1}1/self -a -e $B0/${V1}2/self ] -# kill one brick and delete the file from mount point [53-54] +# kill one brick and delete the file from mount point kill_brick $V1 $H0 $B0/${V1}1 EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count rm -f $M1/self EXPECT "Y" wildcard_exists $B0/${V1}2/.trashcan/self* -# force start the volume and trigger the self-heal manually [55-57] -TEST $CLI volume start $V1 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status -# Since we created the file under root of the volume, it will be -# healed automatically - -# check for the removed file in trashcan [58] -EXPECT_WITHIN $HEAL_TIMEOUT "Y" wildcard_exists $B0/${V1}1/.trashcan/internal_op/self* - -# check renaming of trash directory through cli [59-62] +# check renaming of trash directory through cli TEST $CLI volume set $V0 trash-dir abc TEST start_vol $V0 $M0 $M0/abc TEST [ -e $M0/abc -a ! -e $M0/.trashcan ] EXPECT "Y" wildcard_exists $B0/${V0}1/abc/internal_op/rebal* -# ensure that rename and delete operation on trash directory fails [63-65] +# ensure that rename and delete operation on trash directory fails rm -rf $M0/abc/internal_op TEST [ -e $M0/abc/internal_op ] rm -rf $M0/abc/ diff --git a/tests/volume.rc b/tests/volume.rc index 3924baeb7fc..b38848c0e52 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -999,4 +999,4 @@ function logging_time_check() local logfile=`echo ${0##*/}`_glusterd1.log cat $logdir/1/$logfile | tail -n 2 | head -n 1 | grep $(date +%H:%M) | wc -l -}
\ No newline at end of file +} diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 4c8fa31b679..1a4341a66d9 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -45,6 +45,41 @@ afr_quorum_errno(afr_private_t *priv) return ENOTCONN; } +gf_boolean_t +afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, + pid_t pid) +{ + if (!__is_root_gfid(pargfid)) { + return _gf_false; + } + + if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) { + /*For backward compatibility /.landfill is private*/ + return _gf_true; + } + + if (pid == GF_CLIENT_PID_GSYNCD) { + /*geo-rep needs to create/sync private directory on slave because + * it appears in changelog*/ + return _gf_false; + } + + if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) { + if (strcmp(name, priv->anon_inode_name) == 0) { + /* anonymous-inode dir is private*/ + return _gf_true; + } + } else { + if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) == + 0) { + /* anonymous-inode dir prefix is private for geo-rep to work*/ + return _gf_true; + } + } + + return _gf_false; +} + void afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, unsigned char *replies) @@ -3978,11 +4013,10 @@ afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) return 0; } - if (__is_root_gfid(loc->parent->gfid)) { - if (!strcmp(loc->name, GF_REPLICATE_TRASH_DIR)) { - op_errno = EPERM; - goto out; - } + if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name, + frame->root->pid)) { + op_errno = EPERM; + goto out; } local = AFR_FRAME_INIT(frame, op_errno); @@ -5660,6 +5694,7 @@ afr_priv_dump(xlator_t *this) priv->background_self_heal_count); gf_proc_dump_write("healers", "%d", priv->healers); gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode); + gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode); if (priv->quorum_count == AFR_QUORUM_AUTO) { gf_proc_dump_write("quorum-type", "auto"); } else if (priv->quorum_count == 0) { @@ -6653,6 +6688,7 @@ afr_priv_destroy(afr_private_t *priv) GF_FREE(priv->local); GF_FREE(priv->pending_key); GF_FREE(priv->children); + GF_FREE(priv->anon_inode); GF_FREE(priv->child_up); GF_FREE(priv->halo_child_up); GF_FREE(priv->child_latency); diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index f69013f3e0a..f8bf8340dab 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -164,8 +164,8 @@ afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol) } static void -afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, - gf_dirent_t *entries, fd_t *fd) +afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries, + int subvol, gf_dirent_t *entries, fd_t *fd) { int ret = -1; gf_dirent_t *entry = NULL; @@ -183,8 +183,8 @@ afr_readdir_transform_entries(gf_dirent_t *subvol_entries, int subvol, list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list) { - if (__is_root_gfid(fd->inode->gfid) && - !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) { + if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name, + frame->root->pid)) { continue; } @@ -228,8 +228,8 @@ afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } if (op_ret >= 0) - afr_readdir_transform_entries(subvol_entries, (long)cookie, &entries, - local->fd); + afr_readdir_transform_entries(frame, subvol_entries, (long)cookie, + &entries, local->fd); AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata); diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index f35c41df274..a580a1584cc 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -2750,3 +2750,185 @@ afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources, out: return source; } + +static int +afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + afr_local_t *local = frame->local; + int i = (long)cookie; + + local->replies[i].valid = 1; + local->replies[i].op_ret = op_ret; + local->replies[i].op_errno = op_errno; + if (op_ret == 0) { + local->op_ret = 0; + local->replies[i].poststat = *buf; + local->replies[i].preparent = *preparent; + local->replies[i].postparent = *postparent; + } + if (xdata) { + local->replies[i].xdata = dict_ref(xdata); + } + + syncbarrier_wake(&local->barrier); + return 0; +} + +int +afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode) +{ + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = this->private; + unsigned char *mkdir_on = alloca0(priv->child_count); + unsigned char *lookup_on = alloca0(priv->child_count); + loc_t loc = {0}; + int32_t op_errno = 0; + int32_t child_op_errno = 0; + struct iatt iatt = {0}; + dict_t *xdata = NULL; + uuid_t anon_inode_gfid = {0}; + int mkdir_count = 0; + int i = 0; + + /*Try to mkdir everywhere and return success if the dir exists on 'child' + */ + + if (!priv->use_anon_inode) { + op_errno = EINVAL; + goto out; + } + + frame = afr_frame_create(this, &op_errno); + if (op_errno) { + goto out; + } + local = frame->local; + if (!local->child_up[child]) { + /*Other bricks may need mkdir so don't error out yet*/ + child_op_errno = ENOTCONN; + } + gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid); + for (i = 0; i < priv->child_count; i++) { + if (!local->child_up[i]) + continue; + + if (priv->anon_inode[i]) { + mkdir_on[i] = 0; + } else { + mkdir_on[i] = 1; + mkdir_count++; + } + } + + if (mkdir_count == 0) { + *linked_inode = inode_find(this->itable, anon_inode_gfid); + if (*linked_inode) { + op_errno = 0; + goto out; + } + } + + loc.parent = inode_ref(this->itable->root); + loc.name = priv->anon_inode_name; + loc.inode = inode_new(this->itable); + if (!loc.inode) { + op_errno = ENOMEM; + goto out; + } + + xdata = dict_new(); + if (!xdata) { + op_errno = ENOMEM; + goto out; + } + + op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true); + if (op_errno) { + goto out; + } + + if (mkdir_count == 0) { + memcpy(lookup_on, local->child_up, priv->child_count); + goto lookup; + } + + AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0, + xdata); + + for (i = 0; i < priv->child_count; i++) { + if (!mkdir_on[i]) { + continue; + } + + if (local->replies[i].op_ret == 0) { + priv->anon_inode[i] = 1; + iatt = local->replies[i].poststat; + } else if (local->replies[i].op_ret < 0 && + local->replies[i].op_errno == EEXIST) { + lookup_on[i] = 1; + } else if (i == child) { + child_op_errno = local->replies[i].op_errno; + } + } + + if (AFR_COUNT(lookup_on, priv->child_count) == 0) { + goto link; + } + +lookup: + AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc, + xdata); + for (i = 0; i < priv->child_count; i++) { + if (!lookup_on[i]) { + continue; + } + + if (local->replies[i].op_ret == 0) { + if (gf_uuid_compare(anon_inode_gfid, + local->replies[i].poststat.ia_gfid) == 0) { + priv->anon_inode[i] = 1; + iatt = local->replies[i].poststat; + } else { + if (i == child) + child_op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA, + "%s has gfid: %s", priv->anon_inode_name, + uuid_utoa(local->replies[i].poststat.ia_gfid)); + } + } else if (i == child) { + child_op_errno = local->replies[i].op_errno; + } + } +link: + if (!gf_uuid_is_null(iatt.ia_gfid)) { + *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt); + if (*linked_inode) { + op_errno = 0; + inode_lookup(*linked_inode); + } else { + op_errno = ENOMEM; + } + goto out; + } + +out: + if (xdata) + dict_unref(xdata); + loc_wipe(&loc); + /*child_op_errno takes precedence*/ + if (child_op_errno == 0) { + child_op_errno = op_errno; + } + + if (child_op_errno && *linked_inode) { + inode_unref(*linked_inode); + *linked_inode = NULL; + } + if (frame) + AFR_STACK_DESTROY(frame); + return -child_op_errno; +} diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index ac31751997f..64893f441e3 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -16,54 +16,170 @@ #include <glusterfs/syncop-utils.h> #include <glusterfs/events.h> -static int -afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, - inode_t *inode, int child, struct afr_reply *replies) +int +afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, + struct afr_reply *replies, + gf_boolean_t *anon_inode) { afr_private_t *priv = NULL; + afr_local_t *local = NULL; xlator_t *subvol = NULL; int ret = 0; + int i = 0; + char g[64] = {0}; + unsigned char *lookup_success = NULL; + call_frame_t *frame = NULL; + loc_t loc2 = { + 0, + }; loc_t loc = { 0, }; - char g[64]; priv = this->private; - subvol = priv->children[child]; + lookup_success = alloca0(priv->child_count); + uuid_utoa_r(replies[child].poststat.ia_gfid, g); + loc.inode = inode_new(inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + + if (replies[child].poststat.ia_type == IA_IFDIR) { + /* This directory may have sub-directory hierarchy which may need to + * be preserved for subsequent heals. So unconditionally move the + * directory to anonymous-inode directory*/ + *anon_inode = _gf_true; + goto anon_inode; + } + + frame = afr_frame_create(this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + local = frame->local; + gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid); + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, + NULL); + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + lookup_success[i] = 1; + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + ret = -local->replies[i].op_errno; + } + } + + if (priv->quorum_count) { + if (afr_has_quorum(lookup_success, this, NULL)) { + *anon_inode = _gf_true; + } + } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) { + *anon_inode = _gf_true; + } else if (ret) { + goto out; + } + +anon_inode: + if (!*anon_inode) { + ret = 0; + goto out; + } loc.parent = inode_ref(dir); gf_uuid_copy(loc.pargfid, dir->gfid); loc.name = name; - loc.inode = inode_ref(inode); - if (replies[child].valid && replies[child].op_ret == 0) { - switch (replies[child].poststat.ia_type) { - case IA_IFDIR: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), - name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), - subvol->name); - ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), - name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), - subvol->name); - ret = syncop_unlink(subvol, &loc, NULL, NULL); - break; - } + ret = afr_anon_inode_create(this, child, &loc2.parent); + if (ret < 0) + goto out; + + loc2.name = g; + ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "Rename to %s dir %s/%s (%s) on %s failed", + priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, + subvol->name); + } else { + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "Rename to %s dir %s/%s (%s) on %s successful", + priv->anon_inode_name, uuid_utoa(dir->gfid), name, g, + subvol->name); } +out: loc_wipe(&loc); + loc_wipe(&loc2); + if (frame) { + AFR_STACK_DESTROY(frame); + } return ret; } int +afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, struct afr_reply *replies) +{ + char g[64] = {0}; + afr_private_t *priv = NULL; + xlator_t *subvol = NULL; + int ret = 0; + loc_t loc = { + 0, + }; + gf_boolean_t anon_inode = _gf_false; + + priv = this->private; + subvol = priv->children[child]; + + if ((!replies[child].valid) || (replies[child].op_ret < 0)) { + /*Nothing to do*/ + ret = 0; + goto out; + } + + if (priv->use_anon_inode) { + ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child, + replies, &anon_inode); + if (ret < 0 || anon_inode) + goto out; + } + + loc.parent = inode_ref(dir); + loc.inode = inode_new(inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + loc.name = name; + switch (replies[child].poststat.ia_type) { + case IA_IFDIR: + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name, + uuid_utoa_r(replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL); + break; + default: + gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid), + name, uuid_utoa_r(replies[child].poststat.ia_gfid, g), + subvol->name); + ret = syncop_unlink(subvol, &loc, NULL, NULL); + break; + } + +out: + loc_wipe(&loc); + return ret; +} + +int afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, unsigned char *sources, inode_t *dir, const char *name, inode_t *inode, @@ -76,6 +192,9 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, loc_t srcloc = { 0, }; + loc_t anonloc = { + 0, + }; xlator_t *this = frame->this; afr_private_t *priv = NULL; dict_t *xdata = NULL; @@ -86,15 +205,17 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, 0, }; unsigned char *newentry = NULL; - char dir_uuid_str[64] = {0}, iatt_uuid_str[64] = {0}; + char iatt_uuid_str[64] = {0}; + char dir_uuid_str[64] = {0}; priv = this->private; iatt = &replies[source].poststat; + uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str); if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) { gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED, "Invalid ia_type (%d) or gfid(%s). source brick=%d, " "pargfid=%s, name=%s", - iatt->ia_type, uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str), source, + iatt->ia_type, iatt_uuid_str, source, uuid_utoa_r(dir->gfid, dir_uuid_str), name); ret = -EINVAL; goto out; @@ -120,14 +241,24 @@ afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source, srcloc.inode = inode_ref(inode); gf_uuid_copy(srcloc.gfid, iatt->ia_gfid); - if (iatt->ia_type != IA_IFDIR) - ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); - if (iatt->ia_type == IA_IFDIR || ret == -ENOENT || ret == -ESTALE) { + ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0); + if (ret == -ENOENT || ret == -ESTALE) { newentry[dst] = 1; ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies, sources, newentry); if (ret) goto out; + } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) { + // Try rename from hidden directory + ret = afr_anon_inode_create(this, dst, &anonloc.parent); + if (ret < 0) + goto out; + anonloc.inode = inode_ref(inode); + anonloc.name = iatt_uuid_str; + ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL); + if (ret == -ENOENT || ret == -ESTALE) + ret = -1; /*This sets 'mismatch' to true*/ + goto out; } mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type); @@ -166,6 +297,7 @@ out: GF_FREE(linkname); loc_wipe(&loc); loc_wipe(&srcloc); + loc_wipe(&anonloc); return ret; } @@ -578,6 +710,11 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, priv = this->private; + if (afr_is_private_directory(priv, fd->inode->gfid, name, + GF_CLIENT_PID_SELF_HEALD)) { + return 0; + } + xattr = dict_new(); if (!xattr) return -ENOMEM; @@ -626,7 +763,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, replies); if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) { - ret = afr_shd_index_purge(subvol, parent_idx_inode, name, + ret = afr_shd_entry_purge(subvol, parent_idx_inode, name, inode->ia_type); /* Why is ret force-set to 0? We do not care about * index purge failing for full heal as it is quite @@ -756,10 +893,6 @@ afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd, if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) continue; - if (__is_root_gfid(fd->inode->gfid) && - !strcmp(entry->d_name, GF_REPLICATE_TRASH_DIR)) - continue; - ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name, loc.inode, subvol, local->need_full_crawl); @@ -822,7 +955,7 @@ afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry, /* The name indices under the pgfid index dir are guaranteed * to be regular files. Hence the hardcoding. */ - afr_shd_index_purge(subvol, parent->inode, entry->d_name, IA_IFREG); + afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG); ret = 0; goto out; } diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index dd40c57ab12..834aac86d48 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -98,21 +98,12 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, const char *bname, inode_t *inode, struct afr_reply *replies) { - loc_t loc = { - 0, - }; int i = 0; afr_private_t *priv = NULL; - char g[64]; int ret = 0; priv = this->private; - loc.parent = inode_ref(parent); - gf_uuid_copy(loc.pargfid, pargfid); - loc.name = bname; - loc.inode = inode_ref(inode); - for (i = 0; i < priv->child_count; i++) { if (!replies[i].valid) continue; @@ -120,30 +111,10 @@ __afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid, if (replies[i].op_ret) continue; - switch (replies[i].poststat.ia_type) { - case IA_IFDIR: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging dir %s/%s (%s) on %s", uuid_utoa(pargfid), - bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), - priv->children[i]->name); - - ret |= syncop_rmdir(priv->children[i], &loc, 1, NULL, NULL); - break; - default: - gf_msg(this->name, GF_LOG_WARNING, 0, - AFR_MSG_EXPUNGING_FILE_OR_DIR, - "expunging file %s/%s (%s) on %s", uuid_utoa(pargfid), - bname, uuid_utoa_r(replies[i].poststat.ia_gfid, g), - priv->children[i]->name); - - ret |= syncop_unlink(priv->children[i], &loc, NULL, NULL); - break; - } + ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i, + replies); } - loc_wipe(&loc); - return ret; } diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 7a038fa7fe3..48e6dbcfb18 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -369,4 +369,9 @@ gf_boolean_t afr_is_file_empty_on_all_children(afr_private_t *priv, struct afr_reply *replies); +int +afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name, + inode_t *inode, int child, struct afr_reply *replies); +int +afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode); #endif /* !_AFR_SELFHEAL_H */ diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index f2e08908c24..109fd4b7421 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -222,7 +222,7 @@ out: } int -afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, +afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, ia_type_t type) { int ret = 0; @@ -424,7 +424,7 @@ afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, ret = afr_shd_selfheal(healer, healer->subvol, gfid); if (ret == -ENOENT || ret == -ESTALE) - afr_shd_index_purge(subvol, parent->inode, entry->d_name, val); + afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val); if (ret == 2) /* If bricks crashed in pre-op after creating indices/xattrop @@ -843,6 +843,176 @@ out: return need_heal; } +static int +afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, + void *data) +{ + struct subvol_healer *healer = data; + afr_private_t *priv = healer->this->private; + call_frame_t *frame = NULL; + afr_local_t *local = NULL; + int ret = 0; + loc_t loc = {0}; + int count = 0; + int i = 0; + int op_errno = 0; + struct iatt *iatt = NULL; + gf_boolean_t multiple_links = _gf_false; + unsigned char *gfid_present = alloca0(priv->child_count); + unsigned char *entry_present = alloca0(priv->child_count); + char *type = "file"; + + frame = afr_frame_create(healer->this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + local = frame->local; + if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) { + gf_msg_debug(healer->this->name, 0, + "Not all bricks are up. Skipping " + "cleanup of %s on %s", + entry->d_name, subvol->name); + ret = 0; + goto out; + } + + loc.inode = inode_new(parent->inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + ret = gf_uuid_parse(entry->d_name, loc.gfid); + if (ret) { + ret = 0; + goto out; + } + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc, + NULL); + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + count++; + gfid_present[i] = 1; + iatt = &local->replies[i].poststat; + if (iatt->ia_type == IA_IFDIR) { + type = "dir"; + } + + if (i == healer->subvol) { + if (local->replies[i].poststat.ia_nlink > 1) { + multiple_links = _gf_true; + } + } + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + /*We don't have complete view. Skip the entry*/ + gf_msg_debug(healer->this->name, local->replies[i].op_errno, + "Skipping cleanup of %s on %s", entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + + /*Inode is deleted from subvol*/ + if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) { + gf_msg(healer->this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type, + priv->anon_inode_name, entry->d_name, subvol->name); + ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name, + iatt->ia_type); + if (ret == -ENOENT || ret == -ESTALE) + ret = 0; + } else if (count > 1) { + loc_wipe(&loc); + loc.parent = inode_ref(parent->inode); + loc.name = entry->d_name; + loc.inode = inode_new(parent->inode->table); + if (!loc.inode) { + ret = -ENOMEM; + goto out; + } + AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, + &loc, NULL); + count = 0; + for (i = 0; i < priv->child_count; i++) { + if (local->replies[i].op_ret == 0) { + count++; + entry_present[i] = 1; + iatt = &local->replies[i].poststat; + } else if (local->replies[i].op_errno != ENOENT && + local->replies[i].op_errno != ESTALE) { + /*We don't have complete view. Skip the entry*/ + gf_msg_debug(healer->this->name, local->replies[i].op_errno, + "Skipping cleanup of %s on %s", entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + for (i = 0; i < priv->child_count; i++) { + if (gfid_present[i] && !entry_present[i]) { + /*Entry is not anonymous on at least one subvol*/ + gf_msg_debug(healer->this->name, 0, + "Valid entry present on %s " + "Skipping cleanup of %s on %s", + priv->children[i]->name, entry->d_name, + subvol->name); + ret = 0; + goto out; + } + } + + gf_msg(healer->this->name, GF_LOG_WARNING, 0, + AFR_MSG_EXPUNGING_FILE_OR_DIR, + "expunging %s %s/%s on all subvols", type, priv->anon_inode_name, + entry->d_name); + ret = 0; + for (i = 0; i < priv->child_count; i++) { + op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent, + entry->d_name, iatt->ia_type); + if (op_errno != ENOENT && op_errno != ESTALE) { + ret |= -op_errno; + } + } + } + +out: + if (frame) + AFR_STACK_DESTROY(frame); + loc_wipe(&loc); + return ret; +} + +static void +afr_cleanup_anon_inode_dir(struct subvol_healer *healer) +{ + int ret = 0; + call_frame_t *frame = NULL; + afr_private_t *priv = healer->this->private; + loc_t loc = {0}; + + ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode); + if (ret) + goto out; + + frame = afr_frame_create(healer->this, &ret); + if (!frame) { + ret = -ret; + goto out; + } + + ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc, + GF_CLIENT_PID_SELF_HEALD, healer, + afr_shd_anon_inode_cleaner, NULL, + priv->shd.max_threads, priv->shd.wait_qlength); +out: + if (frame) + AFR_STACK_DESTROY(frame); + loc_wipe(&loc); + return; +} + void * afr_shd_index_healer(void *data) { @@ -900,6 +1070,10 @@ afr_shd_index_healer(void *data) sleep(1); } while (ret > 0); + if (ret == 0) { + afr_cleanup_anon_inode_dir(healer); + } + if (ret == 0 && pre_crawl_xdata && !healer->crawl_event.heal_failed_count) { afr_shd_ta_check_and_unset_xattrs(this, &loc, healer, diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 687c28e6472..18db728ea7b 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -70,6 +70,6 @@ afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid, char **path_p); int -afr_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name, +afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name, ia_type_t type); #endif /* !_AFR_SELF_HEALD_H */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 67e0a4d10be..df7366f0a65 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -135,6 +135,27 @@ set_data_self_heal_algorithm(afr_private_t *priv, char *algo) } } +void +afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options) +{ + char *volfile_id_str = NULL; + uuid_t anon_inode_gfid = {0}; + + /*If volume id is not present don't enable anything*/ + if (dict_get_str(options, "volume-id", &volfile_id_str)) + return; + GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX); + /*anon_inode_name is not supposed to change once assigned*/ + if (!priv->anon_inode_name[0]) { + snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s", + AFR_ANON_DIR_PREFIX, volfile_id_str); + gf_uuid_parse(volfile_id_str, anon_inode_gfid); + /*Flip a bit to make sure volfile-id and anon-gfid are not same*/ + anon_inode_gfid[0] ^= 1; + uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str); + } +} + int reconfigure(xlator_t *this, dict_t *options) { @@ -290,6 +311,10 @@ reconfigure(xlator_t *this, dict_t *options) consistent_io = _gf_false; priv->consistent_io = consistent_io; + afr_handle_anon_inode_options(priv, options); + + GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool, + out); if (priv->shd.enabled) { if ((priv->shd.enabled != enabled_old) || (timeout_old != priv->shd.timeout)) @@ -541,7 +566,9 @@ init(xlator_t *this) GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out); GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out); + afr_handle_anon_inode_options(priv, this->options); + GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out); if (priv->quorum_count != 0) priv->consistent_io = _gf_false; @@ -553,6 +580,9 @@ init(xlator_t *this) goto out; } + priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count, + gf_afr_mt_char); + priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char); @@ -561,7 +591,8 @@ init(xlator_t *this) priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char); - if (!priv->child_up || !priv->child_latency || !priv->halo_child_up) { + if (!priv->child_up || !priv->child_latency || !priv->halo_child_up || + !priv->anon_inode) { ret = -ENOMEM; goto out; } @@ -1286,6 +1317,14 @@ struct volume_options options[] = { .tags = {"replicate"}, .description = "This option exists only for backward compatibility " "and configuring it doesn't have any effect"}, + {.key = {"use-anonymous-inode"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .op_version = {GD_OP_VERSION_8_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE, + .tags = {"replicate"}, + .description = "Setting this option heals directory renames efficiently"}, + {.key = {NULL}}, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 2e60708accf..d62f9a9caf2 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -42,6 +42,7 @@ #define AFR_LK_HEAL_DOM "afr.lock-heal.domain" #define AFR_HALO_MAX_LATENCY 99999 +#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode" #define PFLAG_PENDING (1 << 0) #define PFLAG_SBRAIN (1 << 1) @@ -190,6 +191,7 @@ typedef struct _afr_private { struct list_head ta_waitq; struct list_head ta_onwireq; + unsigned char *anon_inode; unsigned char *child_up; unsigned char *halo_child_up; int64_t *child_latency; @@ -275,10 +277,15 @@ typedef struct _afr_private { gf_boolean_t esh_granular; gf_boolean_t consistent_io; gf_boolean_t data_self_heal; /* on/off */ + gf_boolean_t use_anon_inode; /*For lock healing.*/ struct list_head saved_locks; struct list_head lk_healq; + + /*For anon-inode handling */ + char anon_inode_name[NAME_MAX + 1]; + char anon_gfid_str[UUID_SIZE + 1]; } afr_private_t; typedef enum { @@ -1409,4 +1416,8 @@ afr_dom_lock_release(call_frame_t *frame); void afr_fill_success_replies(afr_local_t *local, afr_private_t *priv, unsigned char *replies); + +gf_boolean_t +afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name, + pid_t pid); #endif /* __AFR_H__ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 4e5712e6447..8d6fb5e0fac 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -3810,6 +3810,38 @@ out: } static int +set_volfile_id_option(volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + int clusters) +{ + xlator_t *xlator = NULL; + int i = 0; + int ret = -1; + glusterd_conf_t *conf = NULL; + xlator_t *this = NULL; + + this = THIS; + GF_VALIDATE_OR_GOTO("glusterd", this, out); + conf = this->private; + GF_VALIDATE_OR_GOTO(this->name, conf, out); + + if (conf->op_version < GD_OP_VERSION_9_0) + return 0; + xlator = first_of(graph); + + for (i = 0; i < clusters; i++) { + ret = xlator_set_fixed_option(xlator, "volume-id", + uuid_utoa(volinfo->volume_id)); + if (ret) + goto out; + + xlator = xlator->next; + } + +out: + return ret; +} + +static int volgen_graph_build_afr_clusters(volgen_graph_t *graph, glusterd_volinfo_t *volinfo) { @@ -3851,6 +3883,13 @@ volgen_graph_build_afr_clusters(volgen_graph_t *graph, clusters = -1; goto out; } + + ret = set_volfile_id_option(graph, volinfo, clusters); + if (ret) { + clusters = -1; + goto out; + } + if (!volinfo->arbiter_count && !volinfo->thin_arbiter_count) goto out; diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index f74876eec9f..398b4d76f52 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -3138,4 +3138,9 @@ struct volopt_map_entry glusterd_volopt_map[] = { .type = NO_DOC, }, + {.key = "cluster.use-anonymous-inode", + .voltype = "cluster/replicate", + .op_version = GD_OP_VERSION_9_0, + .value = "yes", + .flags = VOLOPT_FLAG_CLIENT_OPT}, {.key = NULL}}; |