diff options
author | karthik-us <ksubrahm@redhat.com> | 2017-06-07 15:56:13 +0530 |
---|---|---|
committer | Jeff Darcy <jeff@pl.atyp.us> | 2017-07-18 15:24:54 +0000 |
commit | 657d78dbad118e511e1fca8b1badb9f8ae7a6f60 (patch) | |
tree | 5c2001d6dab7536d8ffeccb1dafc6e5585d7d07c | |
parent | ae14513eb82929662b11e4c304877030a7d685cd (diff) |
cluster/afr: GFID split-brain resolution with existing CLI
Problem:
Currently there is no way for the admin from CLI to resolve gfid
split-brain based on some policy like choice of the brick, mtime
or size.
Fix:
With the existing CLI options based on size, mtime, and choice of
brick, we do lookup on the parent for the specified file. As
part of the lookup, if we find gfid mismatch, we resolve them
based on the policy and return. If the file is not in gfid split-
brain, then we check for the data and metadata split-brain in the
getxattr code path, and resolve if any.
This will work provided absolute path to the file with the CLI
and not with gfid of the file. Hence the source-brick policy
without any file path will also not resolve the gfid split-brain
since it uses the gfid of the files. But it can resolve any other
type of split-brains and skip the gfid mismatch resolution with
the usual error message.
Reverting the change https://review.gluster.org/17290. This patch
resolves the issue.
Fixes gluster/glusterfs#135
Change-Id: Iaeba6fc32f184a34255d03be87cda02773130a09
BUG: 1459530
Signed-off-by: karthik-us <ksubrahm@redhat.com>
Reviewed-on: https://review.gluster.org/17485
Reviewed-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
-rw-r--r-- | heal/src/glfs-heal.c | 114 | ||||
-rw-r--r-- | tests/basic/afr/gfid-mismatch-resolution-with-cli.t | 168 | ||||
-rw-r--r-- | tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t | 3 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 18 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 250 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 107 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-name.c | 65 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 8 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 2 |
9 files changed, 584 insertions, 151 deletions
diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c index fb997948f57..27115f3ca6c 100644 --- a/heal/src/glfs-heal.c +++ b/heal/src/glfs-heal.c @@ -21,6 +21,7 @@ #include <string.h> #include <time.h> #include "glusterfs.h" +#include <libgen.h> #if (HAVE_LIB_XML) #include <libxml/encoding.h> @@ -1031,26 +1032,43 @@ _validate_directory (dict_t *xattr_req, char *file) int glfsh_heal_splitbrain_file (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, - char *file, dict_t *xattr_req) + char *file, dict_t *xattr_req) { - int ret = -1; - int reval = 0; - loc_t loc = {0, }; - char *path = NULL; - char *filename = NULL; - struct iatt iatt = {0, }; - xlator_t *xl = top_subvol; - dict_t *xattr_rsp = NULL; - char *sh_fail_msg = NULL; - int32_t op_errno = 0; + int ret = -1; + int reval = 0; + loc_t loc = {0, }; + char *path = NULL; + char *path1 = NULL; + char *path2 = NULL; + char *filename = NULL; + char *filename1 = NULL; + struct iatt iatt = {0, }; + xlator_t *xl = top_subvol; + dict_t *xattr_rsp = NULL; + char *sh_fail_msg = NULL; + char *gfid_heal_msg = NULL; + int32_t op_errno = 0; + gf_boolean_t flag = _gf_false; if (!strncmp (file, "gfid:", 5)) { filename = gf_strdup(file); + if (!filename) { + printf ("Error allocating memory to filename\n"); + goto out; + } path = strtok (filename, ":"); path = strtok (NULL, ";"); gf_uuid_parse (path, loc.gfid); loc.path = gf_strdup (uuid_utoa (loc.gfid)); + if (!loc.path) { + printf ("Error allocating memory to path\n"); + goto out; + } loc.inode = inode_new (rootloc->inode->table); + if (!loc.inode) { + printf ("Error getting inode\n"); + goto out; + } ret = syncop_lookup (xl, &loc, &iatt, 0, xattr_req, &xattr_rsp); if (ret) { op_errno = -ret; @@ -1065,9 +1083,72 @@ glfsh_heal_splitbrain_file (glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc, ret = -1; goto out; } -retry: + path1 = gf_strdup (file); + if (!path1) { + printf ("Error allocating memory to path\n"); + ret = -1; + goto out; + } + path2 = gf_strdup (file); + if (!path2) { + printf ("Error allocating memory to path\n"); + ret = -1; + goto out; + } + path = dirname (path1); + filename1 = basename (path2); +retry1: + ret = glfs_resolve (fs, xl, path, &loc, &iatt, reval); + ESTALE_RETRY (ret, errno, reval, &loc, retry1); + if (ret) { + printf("Lookup failed on %s:%s\n", + path, strerror (errno)); + goto out; + } + GF_FREE ((char *)loc.path); + loc.path = gf_strdup (file); + if (!loc.path) { + printf ("Error allocating memory for path\n"); + ret = -1; + goto out; + } + loc.parent = inode_unref (loc.parent); + loc.parent = inode_ref (loc.inode); + loc.inode = inode_unref (loc.inode); + loc.inode = inode_new (rootloc->inode->table); + if (!loc.inode) { + printf ("Error getting inode\n"); + ret = -1; + goto out; + } + loc.name = filename1; + gf_uuid_copy (loc.pargfid, loc.gfid); + gf_uuid_clear (loc.gfid); + + ret = syncop_lookup (xl, &loc, &iatt, 0, xattr_req, &xattr_rsp); + if (ret) { + op_errno = -ret; + printf ("Lookup failed on %s:%s.\n", file, + strerror(op_errno)); + flag = _gf_true; + } + + ret = dict_get_str (xattr_rsp, "gfid-heal-msg", &gfid_heal_msg); + if (!ret) { + printf ("%s for file %s\n", gfid_heal_msg, file); + loc_wipe (&loc); + goto out; + } + if (flag) + goto out; + + reval = 0; + loc_wipe (&loc); + memset (&iatt, 0, sizeof(iatt)); + +retry2: ret = glfs_resolve (fs, xl, file, &loc, &iatt, reval); - ESTALE_RETRY (ret, errno, reval, &loc, retry); + ESTALE_RETRY (ret, errno, reval, &loc, retry2); if (ret) { printf("Lookup failed on %s:%s\n", file, strerror (errno)); @@ -1098,6 +1179,13 @@ retry: out: if (xattr_rsp) dict_unref (xattr_rsp); + if (path1) + GF_FREE (path1); + if (path2) + GF_FREE (path2); + if (filename) + GF_FREE (filename); + loc_wipe (&loc); return ret; } diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-cli.t b/tests/basic/afr/gfid-mismatch-resolution-with-cli.t new file mode 100644 index 00000000000..b739ddc49cc --- /dev/null +++ b/tests/basic/afr/gfid-mismatch-resolution-with-cli.t @@ -0,0 +1,168 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 cluster.entry-self-heal off +TEST $CLI volume set $V0 cluster.metadata-self-heal off +TEST $CLI volume set $V0 cluster.data-self-heal off +cd $M0 + +##### Healing from latest mtime ###### + +TEST kill_brick $V0 $H0 $B0/${V0}0 +echo "Sink based on mtime" > f1 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +echo "Source based on mtime" > f1 + +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f1) +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1) +TEST [ "$gfid_0" != "$gfid_1" ] + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +#We know that first brick has the latest mtime +LATEST_MTIME_MD5=$(md5sum $B0/${V0}0/f1 | awk '{print $1}') + +TEST $CLI volume heal $V0 split-brain latest-mtime /f1 + +#gfid split-brain should be resolved +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1) +TEST [ "$gfid_0" == "$gfid_1" ] + +#Heal the data and check the md5sum +TEST $CLI volume set $V0 self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +HEALED_MD5=$(md5sum $B0/${V0}1/f1 | awk '{print $1}') +TEST [ "$LATEST_MTIME_MD5" == "$HEALED_MD5" ] + + +##### Healing from bigger file ###### + +TEST mkdir test +TEST $CLI volume set $V0 self-heal-daemon off +TEST kill_brick $V0 $H0 $B0/${V0}0 +echo "Bigger file" > test/f2 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +echo "Small file" > test/f2 + +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/test/f2) +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/test/f2) +TEST [ "$gfid_0" != "$gfid_1" ] + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +#We know that second brick has the bigger file +BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/test/f2 | awk '{print $1}') + +TEST $CLI volume heal $V0 split-brain bigger-file /test/f2 + +#gfid split-brain should be resolved +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/test/f2) +TEST [ "$gfid_0" == "$gfid_1" ] + +#Heal the data and check the md5sum +TEST $CLI volume set $V0 self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +HEALED_MD5=$(md5sum $B0/${V0}0/test/f2 | awk '{print $1}') +TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5" ] + + +#Add one more brick, and heal. +TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 + +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + + +##### Healing from source brick ###### + +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 cluster.quorum-type none +TEST kill_brick $V0 $H0 $B0/${V0}0 +echo "We will consider these as sinks" > test/f3 +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST kill_brick $V0 $H0 $B0/${V0}2 +echo "We will take this as source" > test/f3 + +gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/test/f3) +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/test/f3) +gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/test/f3) +TEST [ "$gfid_0" != "$gfid_1" ] +TEST [ "$gfid_1" == "$gfid_2" ] + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +#We will try to heal the split-brain with bigger file option. +#It should fail, since we have same file size in bricks 1 & 2. +EXPECT "No bigger file for file /test/f3" $CLI volume heal $V0 split-brain bigger-file /test/f3 + +#Now heal from taking the brick 0 as the source +SOURCE_MD5=$(md5sum $B0/${V0}0/test/f3 | awk '{print $1}') + +TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}0 /test/f3 + +#gfid split-brain should be resolved +gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/test/f3) +gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/test/f3) +TEST [ "$gfid_0" == "$gfid_1" ] +TEST [ "$gfid_0" == "$gfid_2" ] + +#Heal the data and check the md5sum +TEST $CLI volume set $V0 self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2 +TEST $CLI volume heal $V0 +EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +HEALED_MD5_1=$(md5sum $B0/${V0}1/test/f3 | awk '{print $1}') +HEALED_MD5_2=$(md5sum $B0/${V0}2/test/f3 | awk '{print $1}') +TEST [ "$SOURCE_MD5" == "$HEALED_MD5_1" ] +TEST [ "$SOURCE_MD5" == "$HEALED_MD5_2" ] + +cd - +cleanup; diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t index 9f1347bbb44..2f14f838e49 100644 --- a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t +++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t @@ -226,6 +226,3 @@ HEALED_MD5=$(md5sum $B0/${V0}2/f4 | cut -d\ -f1) TEST [ "$MAJORITY_MD5" == "$HEALED_MD5" ] cleanup; - -#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1450730 -#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1450730 diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 064320441b7..cba18b2ff8f 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2114,6 +2114,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) int op_errno = 0; int read_subvol = 0; int par_read_subvol = 0; + int ret = -1; unsigned char *readable = NULL; int event = 0; struct afr_reply *replies = NULL; @@ -2124,6 +2125,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) int spb_choice = -1; ia_type_t ia_type = IA_INVAL; afr_read_subvol_args_t args = {0,}; + char *gfid_heal_msg = NULL; priv = this->private; local = frame->local; @@ -2258,6 +2260,19 @@ unwind: local->op_errno = ENOTCONN; } + ret = dict_get_str (local->xattr_req, "gfid-heal-msg", &gfid_heal_msg); + if (!ret) { + ret = dict_set_str (local->replies[read_subvol].xdata, + "gfid-heal-msg", gfid_heal_msg); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, + "Error setting gfid-heal-msg dict"); + local->op_ret = -1; + local->op_errno = ENOMEM; + } + } + AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno, local->inode, &local->replies[read_subvol].poststat, local->replies[read_subvol].xdata, @@ -2520,7 +2535,7 @@ afr_lookup_selfheal_wrap (void *opaque) loc_pargfid (&local->loc, pargfid); ret = afr_selfheal_name (frame->this, pargfid, local->loc.name, - &local->cont.lookup.gfid_req); + &local->cont.lookup.gfid_req, local->xattr_req); if (ret == -EIO) goto unwind; @@ -2581,6 +2596,7 @@ afr_lookup_entry_heal (call_frame_t *frame, xlator_t *this) } if (need_heal) { + heal = copy_frame (frame); if (heal) heal->root->pid = GF_CLIENT_PID_SELF_HEALD; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 6b5e50d6c56..9ecd63ce10c 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -20,6 +20,256 @@ void afr_heal_synctask (xlator_t *this, afr_local_t *local); int +afr_gfid_sbrain_source_from_src_brick (xlator_t *this, + struct afr_reply *replies, + char *src_brick) +{ + int i = 0; + afr_private_t *priv = NULL; + + priv = this->private; + for (i = 0; i < priv->child_count; i++) { + if (!replies[i].valid || replies[i].op_ret == -1) + continue; + if (strcmp (priv->children[i]->name, src_brick) == 0) + return i; + } + return -1; +} + +int +afr_selfheal_gfid_mismatch_by_majority (struct afr_reply *replies, + int child_count) +{ + int j = 0; + int i = 0; + int src = -1; + int votes[child_count]; + + for (i = 0; i < child_count; i++) { + if (!replies[i].valid || replies[i].op_ret == -1) + continue; + + votes[i] = 1; + for (j = i+1; j < child_count; j++) { + if ((!gf_uuid_compare (replies[i].poststat.ia_gfid, + replies[j].poststat.ia_gfid))) + votes[i]++; + if (votes[i] > child_count / 2) { + src = i; + goto out; + } + } + } + +out: + return src; +} + +int afr_gfid_sbrain_source_from_bigger_file (struct afr_reply *replies, + int child_count) +{ + int i = 0; + int src = -1; + uint64_t size = 0; + + for (i = 0; i < child_count; i++) { + if (!replies[i].valid || replies[i].op_ret == -1) + continue; + if (size < replies[i].poststat.ia_size) { + src = i; + size = replies[i].poststat.ia_size; + } else if (replies[i].poststat.ia_size == size) { + src = -1; + } + } + return src; +} + +int afr_gfid_sbrain_source_from_latest_mtime (struct afr_reply *replies, + int child_count) +{ + int i = 0; + int src = -1; + uint32_t mtime = 0; + uint32_t mtime_nsec = 0; + + for (i = 0; i < child_count; i++) { + if (!replies[i].valid || replies[i].op_ret != 0) + continue; + if ((mtime < replies[i].poststat.ia_mtime) || + ((mtime == replies[i].poststat.ia_mtime) && + (mtime_nsec < replies[i].poststat.ia_mtime_nsec))) { + src = i; + mtime = replies[i].poststat.ia_mtime; + mtime_nsec = replies[i].poststat.ia_mtime_nsec; + } else if ((mtime == replies[i].poststat.ia_mtime) && + (mtime_nsec == replies[i].poststat.ia_mtime_nsec)) { + src = -1; + } + } + return src; +} + +int +afr_gfid_split_brain_source (xlator_t *this, struct afr_reply *replies, + inode_t *inode, uuid_t pargfid, const char *bname, + int src_idx, int child_idx, + unsigned char *locked_on, int *src, dict_t *xdata) +{ + afr_private_t *priv = NULL; + char g1[64] = {0,}; + char g2[64] = {0,}; + int up_count = 0; + int heal_op = -1; + int ret = -1; + char *src_brick = NULL; + + *src = -1; + priv = this->private; + up_count = AFR_COUNT (locked_on, priv->child_count); + if (up_count != priv->child_count) { + gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "All the bricks should be up to resolve the gfid split " + "barin"); + if (xdata) { + ret = dict_set_str (xdata, "gfid-heal-msg", "All the " + "bricks should be up to resolve the" + " gfid split barin"); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, "Error setting" + " gfid-heal-msg dict"); + } + goto out; + } + + if (xdata) { + ret = dict_get_int32 (xdata, "heal-op", &heal_op); + if (ret) + goto fav_child; + } else { + goto fav_child; + } + + switch (heal_op) { + case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE: + *src = afr_gfid_sbrain_source_from_bigger_file (replies, + priv->child_count); + if (*src == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_SPLIT_BRAIN, "No bigger file"); + if (xdata) { + ret = dict_set_str (xdata, "gfid-heal-msg", + "No bigger file"); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, "Error" + " setting gfid-heal-msg dict"); + } + } + break; + + case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME: + *src = afr_gfid_sbrain_source_from_latest_mtime (replies, + priv->child_count); + if (*src == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_SPLIT_BRAIN, "No difference in mtime"); + if (xdata) { + ret = dict_set_str (xdata, "gfid-heal-msg", + "No difference in mtime"); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, "Error" + "setting gfid-heal-msg dict"); + } + } + break; + + case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK: + ret = dict_get_str (xdata, "child-name", &src_brick); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_SPLIT_BRAIN, "Error getting the source " + "brick"); + break; + } + *src = afr_gfid_sbrain_source_from_src_brick (this, replies, + src_brick); + if (*src == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_SPLIT_BRAIN, "Error getting the source " + "brick"); + if (xdata) { + ret = dict_set_str (xdata, "gfid-heal-msg", + "Error getting the source " + "brick"); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, "Error" + " setting gfid-heal-msg dict"); + } + } + break; + + default: + break; + } + goto out; + +fav_child: + switch (priv->fav_child_policy) { + case AFR_FAV_CHILD_BY_SIZE: + *src = afr_sh_fav_by_size (this, replies, inode); + break; + case AFR_FAV_CHILD_BY_MTIME: + *src = afr_sh_fav_by_mtime (this, replies, inode); + break; + case AFR_FAV_CHILD_BY_CTIME: + *src = afr_sh_fav_by_ctime(this, replies, inode); + break; + case AFR_FAV_CHILD_BY_MAJORITY: + if (priv->child_count != 2) + *src = afr_selfheal_gfid_mismatch_by_majority (replies, + priv->child_count); + else + *src = -1; + + if (*src == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_SPLIT_BRAIN, "No majority to resolve " + "gfid split brain"); + } + break; + default: + break; + } + +out: + if (*src == -1) { + gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, + "Gfid mismatch detected for <gfid:%s>/%s>, %s on %s and" + " %s on %s.", uuid_utoa (pargfid), bname, + uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1), + priv->children[child_idx]->name, + uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2), + priv->children[src_idx]->name); + gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;type=gfid;file=" + "<gfid:%s>/%s>;count=2;child-%d=%s;gfid-%d=%s;" + "child-%d=%s;gfid-%d=%s", this->name, + uuid_utoa (pargfid), bname, child_idx, + priv->children[child_idx]->name, child_idx, + uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1), + src_idx, priv->children[src_idx]->name, src_idx, + uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2)); + return -1; + } + return 0; +} + + +int afr_selfheal_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) { diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 82ae6432d7d..d7e9e60a7bf 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -17,105 +17,6 @@ #include "syncop-utils.h" #include "events.h" -int -afr_selfheal_gfid_mismatch_by_majority (struct afr_reply *replies, - int child_count) -{ - int j = 0; - int i = 0; - int src = -1; - int votes[child_count]; - - for (i = 0; i < child_count; i++) { - if (!replies[i].valid || replies[i].op_ret == -1) - continue; - - votes[i] = 1; - for (j = i+1; j < child_count; j++) { - if ((!gf_uuid_compare (replies[i].poststat.ia_gfid, - replies[j].poststat.ia_gfid))) - votes[i]++; - if (votes[i] > child_count / 2) { - src = i; - goto out; - } - } - } - -out: - return src; -} - -int -afr_gfid_split_brain_source (xlator_t *this, struct afr_reply *replies, - inode_t *inode, uuid_t pargfid, char *bname, - int src_idx, int child_idx, - unsigned char *locked_on, int *src) -{ - afr_private_t *priv = NULL; - char g1[64] = {0,}; - char g2[64] = {0,}; - int up_count = 0; - - priv = this->private; - up_count = AFR_COUNT (locked_on, priv->child_count); - if (up_count != priv->child_count) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SPLIT_BRAIN, - "All the bricks should be up to resolve the gfid split " - "brain"); - goto out; - } - switch (priv->fav_child_policy) { - case AFR_FAV_CHILD_BY_SIZE: - *src = afr_sh_fav_by_size (this, replies, inode); - break; - case AFR_FAV_CHILD_BY_MTIME: - *src = afr_sh_fav_by_mtime (this, replies, inode); - break; - case AFR_FAV_CHILD_BY_CTIME: - *src = afr_sh_fav_by_ctime(this, replies, inode); - break; - case AFR_FAV_CHILD_BY_MAJORITY: - if (priv->child_count != 2) - *src = afr_selfheal_gfid_mismatch_by_majority (replies, - priv->child_count); - else - *src = -1; - - if (*src == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - AFR_MSG_SPLIT_BRAIN, "No majority to resolve " - "gfid split brain"); - } - break; - default: - break; - } - -out: - if (*src == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN, - "Gfid mismatch detected for <gfid:%s>/%s>, %s on %s and" - " %s on %s. Skipping conservative merge on the file.", - uuid_utoa (pargfid), bname, - uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1), - priv->children[child_idx]->name, - uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2), - priv->children[src_idx]->name); - gf_event (EVENT_AFR_SPLIT_BRAIN, "subvol=%s;type=gfid;file=" - "<gfid:%s>/%s>;count=2;child-%d=%s;gfid-%d=%s;" - "child-%d=%s;gfid-%d=%s", this->name, - uuid_utoa (pargfid), bname, child_idx, - priv->children[child_idx]->name, child_idx, - uuid_utoa_r (replies[child_idx].poststat.ia_gfid, g1), - src_idx, priv->children[src_idx]->name, src_idx, - uuid_utoa_r (replies[src_idx].poststat.ia_gfid, g2)); - return -1; - } - return 0; -} - static int afr_selfheal_entry_delete (xlator_t *this, inode_t *dir, const char *name, inode_t *inode, int child, struct afr_reply *replies) @@ -332,7 +233,13 @@ afr_selfheal_detect_gfid_and_type_mismatch (xlator_t *this, ret = afr_gfid_split_brain_source (this, replies, inode, pargfid, bname, src_idx, i, - locked_on, src); + locked_on, src, + NULL); + if (ret) + gf_msg (this->name, GF_LOG_ERROR, 0, + AFR_MSG_SPLIT_BRAIN, + "Skipping conservative merge on the " + "file."); return ret; } diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index 8372cb6e376..1d198a8883e 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -330,14 +330,15 @@ static int afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies, int source, unsigned char *sources, int *gfid_idx, uuid_t pargfid, - const char *bname) + const char *bname, inode_t *inode, + unsigned char *locked_on, dict_t *xdata) { int i = 0; int gfid_idx_iter = -1; + int ret = -1; void *gfid = NULL; void *gfid1 = NULL; afr_private_t *priv = NULL; - char g1[64], g2[64]; priv = this->private; @@ -358,31 +359,29 @@ afr_selfheal_name_gfid_mismatch_check (xlator_t *this, struct afr_reply *replies if (sources[i] || source == -1) { if ((sources[gfid_idx_iter] || source == -1) && gf_uuid_compare (gfid, gfid1)) { - gf_msg (this->name, GF_LOG_WARNING, 0, - AFR_MSG_SPLIT_BRAIN, - "GFID mismatch for <gfid:%s>/%s " - "%s on %s and %s on %s", - uuid_utoa (pargfid), bname, - uuid_utoa_r (gfid1, g1), - priv->children[i]->name, - uuid_utoa_r (gfid, g2), - priv->children[gfid_idx_iter]->name); - gf_event (EVENT_AFR_SPLIT_BRAIN, - "subvol=%s;type=gfid;" - "file=<gfid:%s>/%s;count=2;" - "child-%d=%s;gfid-%d=%s;child-%d=%s;" - "gfid-%d=%s", this->name, - uuid_utoa (pargfid), bname, i, - priv->children[i]->name, i, - uuid_utoa_r (gfid1, g1), - gfid_idx_iter, - priv->children[gfid_idx_iter]->name, - gfid_idx_iter, - uuid_utoa_r (gfid, g2)); - - return -EIO; + ret = afr_gfid_split_brain_source (this, + replies, + inode, + pargfid, + bname, + gfid_idx_iter, + i, locked_on, + gfid_idx, + xdata); + if (!ret && *gfid_idx >= 0) { + ret = dict_set_str (xdata, + "gfid-heal-msg", + "GFID split-brain " + "resolved"); + if (ret) + gf_msg (this->name, + GF_LOG_ERROR, 0, + AFR_MSG_DICT_SET_FAILED, + "Error setting gfid-" + "heal-msg dict"); + } + return ret; } - gfid = &replies[i].poststat.ia_gfid; gfid_idx_iter = i; } @@ -427,7 +426,7 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, int source, unsigned char *locked_on, struct afr_reply *replies, - void *gfid_req) + void *gfid_req, dict_t *xdata) { int gfid_idx = -1; int ret = -1; @@ -458,7 +457,8 @@ __afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, ret = afr_selfheal_name_gfid_mismatch_check (this, replies, source, sources, &gfid_idx, - pargfid, bname); + pargfid, bname, inode, + locked_on, xdata); if (ret) return ret; @@ -583,7 +583,8 @@ out: int afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, - uuid_t pargfid, const char *bname, void *gfid_req) + uuid_t pargfid, const char *bname, void *gfid_req, + dict_t *xdata) { afr_private_t *priv = NULL; unsigned char *sources = NULL; @@ -640,7 +641,7 @@ afr_selfheal_name_do (call_frame_t *frame, xlator_t *this, inode_t *parent, ret = __afr_selfheal_name_do (frame, this, parent, pargfid, bname, inode, sources, sinks, healed_sinks, source, locked_on, - replies, gfid_req); + replies, gfid_req, xdata); } unlock: afr_selfheal_unentrylk (frame, this, parent, this->name, bname, @@ -707,7 +708,7 @@ afr_selfheal_name_unlocked_inspect (call_frame_t *frame, xlator_t *this, int afr_selfheal_name (xlator_t *this, uuid_t pargfid, const char *bname, - void *gfid_req) + void *gfid_req, dict_t *xdata) { inode_t *parent = NULL; call_frame_t *frame = NULL; @@ -729,7 +730,7 @@ afr_selfheal_name (xlator_t *this, uuid_t pargfid, const char *bname, if (need_heal) { ret = afr_selfheal_name_do (frame, this, parent, pargfid, bname, - gfid_req); + gfid_req, xdata); if (ret) goto out; } diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 2e22ac2d7a1..36f081ec354 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -99,7 +99,7 @@ afr_throttled_selfheal (call_frame_t *frame, xlator_t *this); int afr_selfheal_name (xlator_t *this, uuid_t gfid, const char *name, - void *gfid_req); + void *gfid_req, dict_t *xdata); int afr_selfheal_data (call_frame_t *frame, xlator_t *this, inode_t *inode); @@ -330,4 +330,10 @@ int afr_sh_fav_by_ctime (xlator_t *this, struct afr_reply *replies, inode_t *inode); +int +afr_gfid_split_brain_source (xlator_t *this, struct afr_reply *replies, + inode_t *inode, uuid_t pargfid, const char *bname, + int src_idx, int child_idx, + unsigned char *locked_on, int *src, dict_t *xdata); + #endif /* !_AFR_SELFHEAL_H */ diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index e1a40521709..08817202b33 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -301,7 +301,7 @@ afr_shd_selfheal_name (struct subvol_healer *healer, int child, uuid_t parent, { int ret = -1; - ret = afr_selfheal_name (THIS, parent, bname, NULL); + ret = afr_selfheal_name (THIS, parent, bname, NULL, NULL); return ret; } |