diff options
-rw-r--r-- | cli/src/cli-cmd-volume.c | 8 | ||||
-rw-r--r-- | heal/src/glfs-heal.c | 79 | ||||
-rw-r--r-- | tests/basic/afr/split-brain-heal-info.t | 60 | ||||
-rw-r--r-- | tests/volume.rc | 5 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 13 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 4 |
6 files changed, 149 insertions, 20 deletions
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c index 3035ad4d566..68755630d87 100644 --- a/cli/src/cli-cmd-volume.c +++ b/cli/src/cli-cmd-volume.c @@ -1881,7 +1881,8 @@ cli_print_brick_status (cli_volume_status_t *status) #define NEEDS_GLFS_HEAL(op) ((op == GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE) || \ (op == GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK) || \ - (op == GF_AFR_OP_INDEX_SUMMARY)) + (op == GF_AFR_OP_INDEX_SUMMARY) || \ + (op == GF_AFR_OP_SPLIT_BRAIN_FILES)) int cli_launch_glfs_heal (int heal_op, dict_t *options) @@ -1907,7 +1908,7 @@ cli_launch_glfs_heal (int heal_op, dict_t *options) ret = dict_get_str (options, "file", &filename); runner_add_args (&runner, "bigger-file", filename, NULL); break; - case GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK: + case GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK: ret = dict_get_str (options, "heal-source-hostname", &hostname); ret = dict_get_str (options, "heal-source-brickpath", @@ -1917,6 +1918,9 @@ cli_launch_glfs_heal (int heal_op, dict_t *options) if (dict_get_str (options, "file", &filename) == 0) runner_argprintf (&runner, filename); break; + case GF_AFR_OP_SPLIT_BRAIN_FILES: + runner_add_args (&runner, "split-brain-info", NULL); + break; default: ret = -1; } diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c index f49f3a58afc..a6208fa052f 100644 --- a/heal/src/glfs-heal.c +++ b/heal/src/glfs-heal.c @@ -21,7 +21,10 @@ #define DEFAULT_HEAL_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs" #define USAGE_STR "Usage: %s <VOLNAME> [bigger-file <FILE> | "\ - "source-brick <HOSTNAME:BRICKNAME> [<FILE>]]\n" + "source-brick <HOSTNAME:BRICKNAME> [<FILE>] | "\ + "split-brain-info]\n" + +typedef void (*print_status) (dict_t *, char *, uuid_t, uint64_t *); int glfsh_heal_splitbrain_file (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, char *file, dict_t *xattr_req); @@ -190,6 +193,25 @@ out: } void +glfsh_print_spb_status (dict_t *dict, char *path, uuid_t gfid, + uint64_t *num_entries) +{ + char *value = NULL; + int ret = 0; + + ret = dict_get_str (dict, "heal-info", &value); + if (ret) + return; + + if (!strcmp (value, "split-brain")) { + (*num_entries)++; + printf ("%s\n", + path ? path : uuid_utoa (gfid)); + } + return; +} + +void glfsh_print_heal_status (dict_t *dict, char *path, uuid_t gfid, uint64_t *num_entries) { @@ -250,7 +272,8 @@ glfsh_heal_entries (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, static int glfsh_process_entries (xlator_t *xl, fd_t *fd, gf_dirent_t *entries, - uint64_t *offset, uint64_t *num_entries) + uint64_t *offset, uint64_t *num_entries, + print_status glfsh_print_status) { gf_dirent_t *entry = NULL; gf_dirent_t *tmp = NULL; @@ -291,8 +314,8 @@ glfsh_process_entries (xlator_t *xl, fd_t *fd, gf_dirent_t *entries, continue; } if (dict) - glfsh_print_heal_status (dict, path, gfid, - num_entries); + glfsh_print_status (dict, path, gfid, + num_entries); } ret = 0; GF_FREE (path); @@ -331,8 +354,17 @@ glfsh_crawl_directory (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, goto out; if (heal_op == GF_AFR_OP_INDEX_SUMMARY) { - ret = glfsh_process_entries (readdir_xl, fd, &entries, - &offset, &num_entries); + ret = glfsh_process_entries (readdir_xl, fd, + &entries, &offset, + &num_entries, + glfsh_print_heal_status); + if (ret < 0) + goto out; + } else if (heal_op == GF_AFR_OP_SPLIT_BRAIN_FILES) { + ret = glfsh_process_entries (readdir_xl, fd, + &entries, &offset, + &num_entries, + glfsh_print_spb_status); if (ret < 0) goto out; } else if (heal_op == GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK) { @@ -353,6 +385,9 @@ out: } else { if (heal_op == GF_AFR_OP_INDEX_SUMMARY) printf ("Number of entries: %"PRIu64"\n", num_entries); + else if (heal_op == GF_AFR_OP_SPLIT_BRAIN_FILES) + printf ("Number of entries in split-brain: %"PRIu64"\n" + , num_entries); else if (heal_op == GF_AFR_OP_SBRAIN_HEAL_FROM_BRICK) printf ("Number of healed entries: %"PRIu64"\n", num_entries); @@ -412,7 +447,7 @@ out: void glfsh_print_pending_heals (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, - xlator_t *xl) + xlator_t *xl, gf_xl_afr_op_t heal_op) { int ret = 0; loc_t dirloc = {0}; @@ -424,7 +459,7 @@ glfsh_print_pending_heals (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, if (!xattr_req) goto out; - ret = dict_set_int32 (xattr_req, "heal-op", GF_AFR_OP_INDEX_SUMMARY); + ret = dict_set_int32 (xattr_req, "heal-op", heal_op); if (ret) goto out; ret = glfsh_print_brick (xl, rootloc); @@ -453,8 +488,13 @@ glfsh_print_pending_heals (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, fd_unref (fd); if (xattr_req) dict_unref (xattr_req); - if (ret < 0) - printf ("Failed to find entries with pending self-heal\n"); + if (ret < 0) { + if (heal_op == GF_AFR_OP_INDEX_SUMMARY) + printf ("Failed to find entries with pending" + " self-heal\n"); + if (heal_op == GF_AFR_OP_SPLIT_BRAIN_FILES) + printf ("Failed to find entries in split-brain\n"); + } out: loc_wipe (&dirloc); return; @@ -521,7 +561,8 @@ out: int -glfsh_gather_heal_info (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc) +glfsh_gather_heal_info (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, + gf_xl_afr_op_t heal_op) { xlator_t *xl = NULL; xlator_t *afr_xl = NULL; @@ -537,7 +578,8 @@ glfsh_gather_heal_info (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc) old_THIS = THIS; THIS = afr_xl; glfsh_print_pending_heals (fs, top_subvol, - rootloc, xl); + rootloc, xl, + heal_op); THIS = old_THIS; printf ("\n"); } @@ -711,6 +753,15 @@ main (int argc, char **argv) case 2: heal_op = GF_AFR_OP_INDEX_SUMMARY; break; + case 3: + if (!strcmp (argv[2], "split-brain-info")) { + heal_op = GF_AFR_OP_SPLIT_BRAIN_FILES; + } else { + printf (USAGE_STR, argv[0]); + ret = -1; + goto out; + } + break; case 4: if (!strcmp (argv[2], "bigger-file")) { heal_op = GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE; @@ -799,7 +850,9 @@ main (int argc, char **argv) switch (heal_op) { case GF_AFR_OP_INDEX_SUMMARY: - ret = glfsh_gather_heal_info (fs, top_subvol, &rootloc); + case GF_AFR_OP_SPLIT_BRAIN_FILES: + ret = glfsh_gather_heal_info (fs, top_subvol, &rootloc, + heal_op); break; case GF_AFR_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: ret = glfsh_heal_from_bigger_file (fs, top_subvol, diff --git a/tests/basic/afr/split-brain-heal-info.t b/tests/basic/afr/split-brain-heal-info.t new file mode 100644 index 00000000000..eabfbd0880a --- /dev/null +++ b/tests/basic/afr/split-brain-heal-info.t @@ -0,0 +1,60 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +function volume_start_force() +{ + local vol=$1 + TEST $CLI volume start $vol force + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $vol 0 + EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $vol 1 +} + +TESTS_EXPECTED_IN_LOOP=15 +SPB_FILES=0 +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 +TEST $CLI volume set $V0 cluster.self-heal-daemon off +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 + +TEST mkdir $M0/dspb +TEST mkdir $M0/mspb +TEST mkdir $M0/espb +TEST touch $M0/dspb/file + +#### Simlulate data-split-brain +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST `echo "abc" > $M0/dspb/file` +volume_start_force $V0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST `echo "def" > $M0/dspb/file` +volume_start_force $V0 +SPB_FILES=$(($SPB_FILES + 1)) + +### Simulate metadata-split-brain +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST chmod 757 $M0/mspb +volume_start_force $V0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST chmod 747 $M0/mspb +volume_start_force $V0 +SPB_FILES=$(($SPB_FILES + 1)) + +#### Simulate entry-split-brain +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST touch $M0/espb/a +volume_start_force $V0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST mkdir $M0/espb/a +volume_start_force $V0 +SPB_FILES=$(($SPB_FILES + 1)) + +#Multiply by 2, for each brick in replica pair +SPB_FILES=$(($SPB_FILES * 2)) +EXPECT "$SPB_FILES" afr_get_split_brain_count $V0 +cleanup; diff --git a/tests/volume.rc b/tests/volume.rc index 2fd07cd8745..36f1350b9bc 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -210,6 +210,11 @@ function afr_get_pending_heal_count { gluster volume heal $vol info | grep "Number of entries" | awk '{ sum+=$4} END {print sum}' } +function afr_get_split_brain_count { + local vol=$1 + gluster volume heal $vol info split-brain | grep "Number of entries in split-brain" | awk '{ sum+=$6} END {print sum}' +} + function afr_get_index_path { local brick_path=$1 echo "$brick_path/.glusterfs/indices/xattrop" diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index e6d45add4e8..533a7b5d5a1 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -4452,7 +4452,11 @@ afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc, dict = afr_set_heal_info ("split-brain"); } else if (ret == -EAGAIN) { dict = afr_set_heal_info ("possibly-healing"); - } else if (ret == 0) { + } else if (ret >= 0) { + /* value of ret = source index + * so ret >= 0 and at least one of the 3 booleans set to + * true means a source is identified; heal is required. + */ if (!data_selfheal && !entry_selfheal && !metadata_selfheal) { dict = afr_set_heal_info ("no-heal"); @@ -4460,6 +4464,13 @@ afr_get_heal_info (call_frame_t *frame, xlator_t *this, loc_t *loc, dict = afr_set_heal_info ("heal"); } } else if (ret < 0) { + /* Apart from above checked -ve ret values, there are + * other possible ret values like ENOTCONN + * (returned when number of valid replies received are + * less than 2) + * in which case heal is required when one of the + * selfheal booleans is set. + */ if (data_selfheal || entry_selfheal || metadata_selfheal) { dict = afr_set_heal_info ("heal"); diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 05d9f2b4917..cd8bb688a11 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -210,12 +210,8 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, struct iatt first = {0, }; int source = -1; int sources_count = 0; - dict_t *xdata_req = NULL; - afr_local_t *local = NULL; priv = this->private; - local = frame->local; - xdata_req = local->xdata_req; sources_count = AFR_COUNT (sources, priv->child_count); |