diff options
-rwxr-xr-x | tests/bugs/bug-1088231.t | 161 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 23 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 6 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-layout.c | 1 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-selfheal.c | 197 |
5 files changed, 387 insertions, 1 deletions
diff --git a/tests/bugs/bug-1088231.t b/tests/bugs/bug-1088231.t new file mode 100755 index 00000000000..da6adaeb173 --- /dev/null +++ b/tests/bugs/bug-1088231.t @@ -0,0 +1,161 @@ +#!/bin/bash +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc +. $(dirname $0)/../fileio.rc +. $(dirname $0)/../dht.rc + + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 cluster.randomize-hash-range-by-gfid on +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --aux-gfid-mount --volfile-server=$H0 $M0 +TEST mkdir $M0/a + + +## Bug Description: In case of dht_discover code path, which is triggered +## when lookup done is nameless lookup, at the end of the lookup, even if +## it finds that self-heal is needed to fix-the layout it wont heal because +## healing code path is not added under nameless lookup. + +## What to test: With Patch, Even in case of nameless lookup, if layout +## needs to be fixed, the it will be fixed wherever lookup is successfull +## and it will not create any directory for subvols having ENOENT as it is +## nameless lookup. + +gfid_with_hyphen=`getfattr -n glusterfs.gfid.string $M0/a 2>/dev/null \ + | grep glusterfs.gfid.string | cut -d '"' -f 2` + +TEST setfattr -x trusted.glusterfs.dht $B0/$V0"0"/a + +TEST stat $M0/.gfid/$gfid_with_hyphen + +## Assuming that we have two bricks, we can have two permutations of layout +## Case 1: Brick - A Brick - B +## 0 - 50 51-100 +## +## Case 2: Brick - A Brick - B +## 51 - 100 0 - 50 +## +## To ensure layout is assigned properly, the following tests should be +## performed. +## +## Case 1: Layout_b0_s = 0; Layout_b0_e = 50, Layout_b1_s=51, +## Layout_b1_e = 100; +## +## layout_b1_s = layout_b0_e + 1; +## layout_b0_s = layout_b1_e + 1; but b0_s is 0, so change to 101 +## then compare +## Case 2: Layout_b0_s = 51, Layout_b0_e = 100, Layout_b1_s=0, +## Layout_b1_e = 51 +## +## layout_b0_s = Layout_b1_e + 1; +## layout_b1_s = Layout_b0_e + 1; but b1_s is 0, so chage to 101. + + +##Extract Layout +layout_b0_s=`get_layout $B0/$V0"0"/a | cut -c19-26` +layout_b0_e=`get_layout $B0/$V0"0"/a | cut -c27-34` +layout_b1_s=`get_layout $B0/$V0"1"/a | cut -c19-26` +layout_b1_e=`get_layout $B0/$V0"1"/a | cut -c27-34` + + +##Add 0X to perform Hex arithematic +layout_b0_s="0x"$layout_b0_s +layout_b0_e="0x"$layout_b0_e +layout_b1_s="0x"$layout_b1_s +layout_b1_e="0x"$layout_b1_e + + + +## Logic of converting starting layout "0" to "Max_value of layout + 1" +comp1=$(($layout_b0_s + 0)) +if [ "$comp1" == "0" ];then + comp1=4294967296 +fi + +comp2=$(($layout_b1_s + 0)) +if [ "$comp2" == "0" ];then + comp2=4294967296 +fi + +diff1=$(($layout_b0_e + 1)) +diff2=$(($layout_b1_e + 1)) + + +healed=0 + +if [ "$comp1" == "$diff1" ] && [ "$comp2" == "$diff2" ]; then + healed=$(($healed + 1)) +fi + +if [ "$comp1" == "$diff2" ] && [ "$comp2" == "$diff1" ]; then + healed=$(($healed + 1)) +fi + +TEST [ $healed == 1 ] + +cleanup + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 cluster.randomize-hash-range-by-gfid on +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --aux-gfid-mount --volfile-server=$H0 $M0 +TEST mkdir $M0/a + +gfid_with_hyphen=`getfattr -n glusterfs.gfid.string $M0/a 2>/dev/null \ + | grep glusterfs.gfid.string | cut -d '"' -f 2` + +TEST setfattr -x trusted.glusterfs.dht $B0/$V0"0"/a +TEST setfattr -x trusted.glusterfs.dht $B0/$V0"1"/a + +TEST stat $M0/.gfid/$gfid_with_hyphen + +##Extract Layout +layout_b0_s=`get_layout $B0/$V0"0"/a | cut -c19-26` +layout_b0_e=`get_layout $B0/$V0"0"/a | cut -c27-34` +layout_b1_s=`get_layout $B0/$V0"1"/a | cut -c19-26` +layout_b1_e=`get_layout $B0/$V0"1"/a | cut -c27-34` + + +##Add 0X to perform Hex arithematic +layout_b0_s="0x"$layout_b0_s +layout_b0_e="0x"$layout_b0_e +layout_b1_s="0x"$layout_b1_s +layout_b1_e="0x"$layout_b1_e + + + +## Logic of converting starting layout "0" to "Max_value of layout + 1" +comp1=$(($layout_b0_s + 0)) +if [ "$comp1" == "0" ];then + comp1=4294967296 +fi + +comp2=$(($layout_b1_s + 0)) +if [ "$comp2" == "0" ];then + comp2=4294967296 +fi + +diff1=$(($layout_b0_e + 1)) +diff2=$(($layout_b1_e + 1)) + + +healed=0 + +if [ "$comp1" == "$diff1" ] && [ "$comp2" == "$diff2" ]; then + healed=$(($healed + 1)) +fi + +if [ "$comp1" == "$diff2" ] && [ "$comp2" == "$diff1" ]; then + healed=$(($healed + 1)) +fi + +TEST [ $healed == 1 ] +cleanup + diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 5c4fe2d4ca4..7eb40faa9a3 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -212,6 +212,17 @@ dht_discover_complete (xlator_t *this, call_frame_t *discover_frame) op_errno = ESTALE; goto out; } + + /* For fixing the directory layout, we need to choose + * the subvolume on which layout will be set first. + * Because in nameless lookup, we have gfid only, + * we are dependent on gfid. Therefore if conf-> + * randomize_by_gfid is set, then only we proceed for + * healing layout of directory otherwise we don't heal. + */ + + if (local->inode && conf->randomize_by_gfid) + goto selfheal; } if (local->inode) @@ -227,6 +238,18 @@ out: NULL); return ret; + +selfheal: + + main_frame->local = local; + discover_frame->local = NULL; + FRAME_SU_DO (main_frame, dht_local_t); + uuid_copy (local->loc.gfid, local->gfid); + ret = dht_selfheal_directory_for_nameless_lookup (main_frame, + dht_lookup_selfheal_cbk, + &local->loc, layout); + return ret; + } diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index e1e3a5a4e38..ba66306dd13 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -456,6 +456,12 @@ int dht_lookup_everywhere (call_frame_t *f int dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, loc_t *loc, dht_layout_t *layout); + +int +dht_selfheal_directory_for_nameless_lookup (call_frame_t *frame, + dht_selfheal_dir_cbk_t cbk, + loc_t *loc, dht_layout_t *layout); + int dht_selfheal_new_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk, dht_layout_t *layout); diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c index 82127adf6e5..34892983a55 100644 --- a/xlators/cluster/dht/src/dht-layout.c +++ b/xlators/cluster/dht/src/dht-layout.c @@ -442,7 +442,6 @@ dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j) layout->list[j].xlator->name)); } - gf_boolean_t dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator) { diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c index 04e58903147..1e2d7438350 100644 --- a/xlators/cluster/dht/src/dht-selfheal.c +++ b/xlators/cluster/dht/src/dht-selfheal.c @@ -332,6 +332,144 @@ out: return 0; } +gf_boolean_t +dht_is_subvol_part_of_layout (dht_layout_t *layout, xlator_t *xlator) +{ + int i = 0; + gf_boolean_t ret = _gf_false; + + for (i = 0; i < layout->cnt; i++) { + if (!strcmp (layout->list[i].xlator->name, xlator->name)) { + ret = _gf_true; + break; + + } + } + + return ret; +} + +int +dht_layout_index_from_conf (dht_layout_t *layout, xlator_t *xlator) +{ + int i = -1; + int j = 0; + + for (j = 0; j < layout->cnt; j++) { + if (!strcmp (layout->list[j].xlator->name, xlator->name)) { + i = j; + break; + } + } + + return i; +} + + +static int +dht_selfheal_dir_xattr_for_nameless_lookup (call_frame_t *frame, loc_t *loc, + dht_layout_t *layout) +{ + dht_local_t *local = NULL; + int missing_xattr = 0; + int i = 0; + xlator_t *this = NULL; + dht_conf_t *conf = NULL; + dht_layout_t *dummy = NULL; + int j = 0; + + local = frame->local; + this = frame->this; + conf = this->private; + + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err != -1 || !layout->list[i].stop) { + /* err != -1 would mean xattr present on the directory + or the directory is non existent. + !layout->list[i].stop would mean layout absent + */ + + continue; + } + missing_xattr++; + } + + /* Also account for subvolumes with no-layout. Used for zero'ing out + the layouts and for setting quota key's if present */ + + /* Send where either the subvol is not part of layout, + * or it is part of the layout but error is non-zero but error + * is not equal to -1 or ENOENT. + */ + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (dht_is_subvol_part_of_layout (layout, conf->subvolumes[i]) + == _gf_false) { + missing_xattr++; + continue; + } + + j = dht_layout_index_from_conf (layout, conf->subvolumes[i]); + + if ((j != -1) && (layout->list[j].err != -1) && + (layout->list[j].err != 0) && + (layout->list[j].err != ENOENT)) { + missing_xattr++; + } + + } + + + gf_log (this->name, GF_LOG_TRACE, + "%d subvolumes missing xattr for %s", + missing_xattr, loc->path); + + if (missing_xattr == 0) { + dht_selfheal_dir_finish (frame, this, 0); + return 0; + } + + local->call_cnt = missing_xattr; + for (i = 0; i < layout->cnt; i++) { + if (layout->list[i].err != -1 || !layout->list[i].stop) + continue; + + dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i, NULL); + + if (--missing_xattr == 0) + break; + } + + dummy = dht_layout_new (this, 1); + if (!dummy) + goto out; + + for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) { + if (dht_is_subvol_part_of_layout (layout, conf->subvolumes[i]) + == _gf_false) { + dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0, + conf->subvolumes[i]); + missing_xattr--; + continue; + } + + j = dht_layout_index_from_conf (layout, conf->subvolumes[i]); + + if ((j != -1) && (layout->list[j].err != -1) && + (layout->list[j].err != ENOENT) && + (layout->list[j].err != 0)) { + dht_selfheal_dir_xattr_persubvol (frame, loc, dummy, 0, + conf->subvolumes[i]); + missing_xattr--; + } + } + + dht_layout_unref (this, dummy); +out: + return 0; + +} + int dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *statpre, @@ -1030,6 +1168,65 @@ sorry_no_fix: return 0; } +int +dht_selfheal_directory_for_nameless_lookup (call_frame_t *frame, + dht_selfheal_dir_cbk_t dir_cbk, + loc_t *loc, dht_layout_t *layout) +{ + dht_local_t *local = NULL; + uint32_t down = 0; + uint32_t misc = 0; + int ret = 0; + xlator_t *this = NULL; + + local = frame->local; + this = frame->this; + dht_layout_anomalies (this, loc, layout, + &local->selfheal.hole_cnt, + &local->selfheal.overlaps_cnt, + NULL, &local->selfheal.down, + &local->selfheal.misc, NULL); + + down = local->selfheal.down; + misc = local->selfheal.misc; + + local->selfheal.dir_cbk = dir_cbk; + local->selfheal.layout = dht_layout_ref (this, layout); + + if (down) { + gf_log (this->name, GF_LOG_WARNING, + "%d subvolumes down -- not fixing", down); + ret = 0; + goto sorry_no_fix; + } + + if (misc) { + gf_log (this->name, GF_LOG_WARNING, + "%d subvolumes have unrecoverable errors", misc); + ret = 0; + goto sorry_no_fix; + } + + dht_layout_sort_volname (layout); + ret = dht_selfheal_dir_getafix (frame, loc, layout); + + if (ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "not able to form layout for the directory"); + goto sorry_no_fix; + } + + dht_selfheal_dir_xattr_for_nameless_lookup (frame, &local->loc, layout); + return 0; + +sorry_no_fix: + /* TODO: need to put appropriate local->op_errno */ + dht_selfheal_dir_finish (frame, this, ret); + + return 0; + + +} int dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk, |