From 34e8058b7c8e906c889d6d0e155ea63037148eea Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Thu, 11 Oct 2018 07:22:09 +0530 Subject: afr: fix incorrect reporting of directory split-brain Backport of https://review.gluster.org/#/c/glusterfs/+/21135/ Problem: When a directory has dirty xattrs due to failed post-ops or when replace/reset brick is performed, AFR does a conservative merge as expected, but heal-info reports it as split-brain because there are no clear sources. Fix: Modify pending flag to contain information about pending heals and split-brains. For directories, if spit-brain flag is not set,just show them as needing heal and not being in split-brain. Change-Id: I09ef821f6887c87d315ae99e6b1de05103cd9383 fixes: bz#1638163 Signed-off-by: Ravishankar N --- tests/afr.rc | 2 +- .../bugs/replicate/bug-1626994-info-split-brain.t | 62 ++++++++++++++++++++++ xlators/cluster/afr/src/afr-common.c | 14 ++--- xlators/cluster/afr/src/afr-self-heal-common.c | 6 ++- xlators/cluster/afr/src/afr-self-heal-data.c | 2 +- xlators/cluster/afr/src/afr-self-heal-entry.c | 2 +- xlators/cluster/afr/src/afr-self-heal-metadata.c | 2 +- xlators/cluster/afr/src/afr-self-heal.h | 8 +-- xlators/cluster/afr/src/afr.h | 4 ++ 9 files changed, 85 insertions(+), 17 deletions(-) create mode 100644 tests/bugs/replicate/bug-1626994-info-split-brain.t diff --git a/tests/afr.rc b/tests/afr.rc index f253e77561e..35f352df78f 100644 --- a/tests/afr.rc +++ b/tests/afr.rc @@ -2,7 +2,7 @@ function create_brick_xattrop_entry { local xattrop_dir=$(afr_get_index_path $1) - local base_entry=`ls $xattrop_dir` + local base_entry=`ls $xattrop_dir|grep xattrop` local gfid_str local params=`echo "$@" | cut -d' ' -f2-` echo $params diff --git a/tests/bugs/replicate/bug-1626994-info-split-brain.t b/tests/bugs/replicate/bug-1626994-info-split-brain.t new file mode 100644 index 00000000000..86bfecb1a9e --- /dev/null +++ b/tests/bugs/replicate/bug-1626994-info-split-brain.t @@ -0,0 +1,62 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +# Test to check dirs having dirty xattr do not show up in info split-brain. + +TEST glusterd; +TEST pidof glusterd; +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +# Create base entry in indices/xattrop +echo "Data" > $M0/FILE +rm -f $M0/FILE +EXPECT "1" count_index_entries $B0/${V0}0 +EXPECT "1" count_index_entries $B0/${V0}1 +EXPECT "1" count_index_entries $B0/${V0}2 + +TEST mkdir $M0/dirty_dir +TEST mkdir $M0/pending_dir + +# Set dirty xattrs on all bricks to simulate the case where entry transaction +# succeeded only the pre-op phase. +TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}0/dirty_dir +TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/dirty_dir +TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2/dirty_dir +create_brick_xattrop_entry $B0/${V0}0 dirty_dir +# Should not show up as split-brain. +EXPECT "0" afr_get_split_brain_count $V0 + +# replace/reset brick case where the new brick has dirty and the other 2 bricks +# blame it should not be reported as split-brain. +TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}0 +TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1 +TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2 +create_brick_xattrop_entry $B0/${V0}0 "/" +# Should not show up as split-brain. +EXPECT "0" afr_get_split_brain_count $V0 + +# Set pending xattrs on all bricks blaming each other to simulate the case of +# entry split-brain. +TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/pending_dir +TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1/pending_dir +TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/pending_dir +create_brick_xattrop_entry $B0/${V0}0 pending_dir +# Should show up as split-brain. +EXPECT "1" afr_get_split_brain_count $V0 + +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index eb0e7330a91..0fdbde42779 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -5814,7 +5814,7 @@ out: int afr_selfheal_locked_metadata_inspect(call_frame_t *frame, xlator_t *this, inode_t *inode, gf_boolean_t *msh, - gf_boolean_t *pending) + unsigned char *pending) { int ret = -1; unsigned char *locked_on = NULL; @@ -5859,7 +5859,7 @@ out: int afr_selfheal_locked_data_inspect(call_frame_t *frame, xlator_t *this, fd_t *fd, - gf_boolean_t *dsh, gf_boolean_t *pflag) + gf_boolean_t *dsh, unsigned char *pflag) { int ret = -1; unsigned char *data_lock = NULL; @@ -5903,7 +5903,7 @@ out: int afr_selfheal_locked_entry_inspect(call_frame_t *frame, xlator_t *this, inode_t *inode, gf_boolean_t *esh, - gf_boolean_t *pflag) + unsigned char *pflag) { int ret = -1; int source = -1; @@ -5952,7 +5952,7 @@ afr_selfheal_locked_entry_inspect(call_frame_t *frame, xlator_t *this, ret = __afr_selfheal_entry_prepare(frame, this, inode, data_lock, sources, sinks, healed_sinks, locked_replies, &source, pflag); - if ((ret == 0) && source < 0) + if ((ret == 0) && (*pflag & PFLAG_SBRAIN)) ret = -EIO; *esh = afr_decide_heal_info(priv, sources, ret); } @@ -5974,7 +5974,7 @@ afr_selfheal_locked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, inode_t **inode, gf_boolean_t *entry_selfheal, gf_boolean_t *data_selfheal, gf_boolean_t *metadata_selfheal, - gf_boolean_t *pending) + unsigned char *pending) { int ret = -1; @@ -6059,7 +6059,7 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc) gf_boolean_t data_selfheal = _gf_false; gf_boolean_t metadata_selfheal = _gf_false; gf_boolean_t entry_selfheal = _gf_false; - gf_boolean_t pending = _gf_false; + unsigned char pending = _gf_false; dict_t *dict = NULL; int ret = -1; int op_errno = 0; @@ -6077,7 +6077,7 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc) goto out; } - if (pending) { + if (pending & PFLAG_PENDING) { gf_asprintf(&substr, "-pending"); if (!substr) goto out; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index c48c47683c3..402f5ea5888 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1513,7 +1513,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, struct afr_reply *replies, afr_transaction_type type, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, - uint64_t *witness, gf_boolean_t *pflag) + uint64_t *witness, unsigned char *pflag) { afr_private_t *priv = NULL; int i = 0; @@ -1541,7 +1541,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, for (i = 0; i < priv->child_count; i++) { for (j = 0; j < priv->child_count; j++) if (matrix[i][j]) - *pflag = _gf_true; + *pflag |= PFLAG_PENDING; if (*pflag) break; } @@ -1623,6 +1623,8 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, if (locked_on[i]) sinks[i] = 1; } + if (pflag) + *pflag |= PFLAG_SBRAIN; } /* One more class of witness similar to dirty in v2 is where no pending diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index be4e693b3a4..9b296b9ad23 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -597,7 +597,7 @@ __afr_selfheal_data_prepare(call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, unsigned char *undid_pending, - struct afr_reply *replies, gf_boolean_t *pflag) + struct afr_reply *replies, unsigned char *pflag) { int ret = -1; int source = -1; diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index bf7a6b9d1e8..619558e94b7 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -487,7 +487,7 @@ __afr_selfheal_entry_prepare(call_frame_t *frame, xlator_t *this, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, struct afr_reply *replies, int *source_p, - gf_boolean_t *pflag) + unsigned char *pflag) { int ret = -1; int source = -1; diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index be6e574b6ca..ea2a7bfd52f 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -310,7 +310,7 @@ __afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, unsigned char *undid_pending, - struct afr_reply *replies, gf_boolean_t *pflag) + struct afr_reply *replies, unsigned char *pflag) { int ret = -1; int source = -1; diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 545e67f774d..9c7418c7169 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -177,7 +177,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, struct afr_reply *replies, afr_transaction_type type, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, - uint64_t *witness, gf_boolean_t *flag); + uint64_t *witness, unsigned char *flag); int afr_selfheal_fill_matrix(xlator_t *this, int **matrix, int subvol, int idx, dict_t *xdata); @@ -284,7 +284,7 @@ __afr_selfheal_data_prepare(call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, unsigned char *undid_pending, - struct afr_reply *replies, gf_boolean_t *flag); + struct afr_reply *replies, unsigned char *flag); int __afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this, @@ -292,14 +292,14 @@ __afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, unsigned char *undid_pending, - struct afr_reply *replies, gf_boolean_t *flag); + struct afr_reply *replies, unsigned char *flag); int __afr_selfheal_entry_prepare(call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, struct afr_reply *replies, int *source_p, - gf_boolean_t *flag); + unsigned char *flag); int afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 6252f91736a..cc4bceef521 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -39,6 +39,10 @@ #define AFR_TA_DOM_MODIFY "afr.ta.dom-modify" #define AFR_HALO_MAX_LATENCY 99999 + +#define PFLAG_PENDING (1 << 0) +#define PFLAG_SBRAIN (1 << 1) + typedef int (*afr_lock_cbk_t)(call_frame_t *frame, xlator_t *this); typedef int (*afr_read_txn_wind_t)(call_frame_t *frame, xlator_t *this, -- cgit