diff options
| author | Ravishankar N <ravishankar@redhat.com> | 2018-09-20 10:14:26 +0530 | 
|---|---|---|
| committer | Shyamsundar Ranganathan <srangana@redhat.com> | 2018-09-21 16:35:26 +0000 | 
| commit | 9d8f5a34c1ff4ae512f80fe9651508ae201c273e (patch) | |
| tree | 32c54faf293bc707280820a11c8bd8c7cbfe5a9f | |
| parent | d901308a06c38fa53a3b07ed1641522a3248378d (diff) | |
afr: fix incorrect reporting of directory split-brain
Problem:
When a directory has dirty xattrs due to failed post-ops or when
replace/reset brick is performed, AFR does a conservative merge as
expected, but heal-info reports it as split-brain because there are no
clear sources.
Fix:
Modify pending flag to contain information about pending heals and
split-brains. For directories, if spit-brain flag is not set,just show
them as needing heal and not being in split-brain.
Fixes: bz#1626994
Change-Id: I09ef821f6887c87d315ae99e6b1de05103cd9383
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
| -rw-r--r-- | tests/afr.rc | 2 | ||||
| -rw-r--r-- | tests/bugs/replicate/bug-1626994-info-split-brain.t | 62 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 14 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 6 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 8 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 4 | 
9 files changed, 85 insertions, 17 deletions
diff --git a/tests/afr.rc b/tests/afr.rc index f253e77561e..35f352df78f 100644 --- a/tests/afr.rc +++ b/tests/afr.rc @@ -2,7 +2,7 @@  function create_brick_xattrop_entry {          local xattrop_dir=$(afr_get_index_path $1) -        local base_entry=`ls $xattrop_dir` +        local base_entry=`ls $xattrop_dir|grep xattrop`          local gfid_str          local params=`echo "$@" | cut -d' ' -f2-`          echo $params diff --git a/tests/bugs/replicate/bug-1626994-info-split-brain.t b/tests/bugs/replicate/bug-1626994-info-split-brain.t new file mode 100644 index 00000000000..86bfecb1a9e --- /dev/null +++ b/tests/bugs/replicate/bug-1626994-info-split-brain.t @@ -0,0 +1,62 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +# Test to check dirs having dirty xattr do not show up in info split-brain. + +TEST glusterd; +TEST pidof glusterd; +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2 + +# Create base entry in indices/xattrop +echo "Data" > $M0/FILE +rm -f $M0/FILE +EXPECT "1" count_index_entries $B0/${V0}0 +EXPECT "1" count_index_entries $B0/${V0}1 +EXPECT "1" count_index_entries $B0/${V0}2 + +TEST mkdir $M0/dirty_dir +TEST mkdir $M0/pending_dir + +# Set dirty xattrs on all bricks to simulate the case where entry transaction +# succeeded only the pre-op phase. +TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}0/dirty_dir +TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/dirty_dir +TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2/dirty_dir +create_brick_xattrop_entry $B0/${V0}0  dirty_dir +# Should not show up as split-brain. +EXPECT "0" afr_get_split_brain_count $V0 + +# replace/reset brick case where the new brick has dirty and the other 2 bricks +# blame it should not be reported as split-brain. +TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}0 +TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1 +TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2 +create_brick_xattrop_entry $B0/${V0}0 "/" +# Should not show up as split-brain. +EXPECT "0" afr_get_split_brain_count $V0 + +# Set pending xattrs on all bricks blaming each other to simulate the case of +# entry split-brain. +TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/pending_dir +TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1/pending_dir +TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/pending_dir +create_brick_xattrop_entry $B0/${V0}0 pending_dir +# Should show up as split-brain. +EXPECT "1" afr_get_split_brain_count $V0 + +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 73f1d728809..a0ff6acd9dd 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -5814,7 +5814,7 @@ out:  int  afr_selfheal_locked_metadata_inspect(call_frame_t *frame, xlator_t *this,                                       inode_t *inode, gf_boolean_t *msh, -                                     gf_boolean_t *pending) +                                     unsigned char *pending)  {      int ret = -1;      unsigned char *locked_on = NULL; @@ -5859,7 +5859,7 @@ out:  int  afr_selfheal_locked_data_inspect(call_frame_t *frame, xlator_t *this, fd_t *fd, -                                 gf_boolean_t *dsh, gf_boolean_t *pflag) +                                 gf_boolean_t *dsh, unsigned char *pflag)  {      int ret = -1;      unsigned char *data_lock = NULL; @@ -5903,7 +5903,7 @@ out:  int  afr_selfheal_locked_entry_inspect(call_frame_t *frame, xlator_t *this,                                    inode_t *inode, gf_boolean_t *esh, -                                  gf_boolean_t *pflag) +                                  unsigned char *pflag)  {      int ret = -1;      int source = -1; @@ -5952,7 +5952,7 @@ afr_selfheal_locked_entry_inspect(call_frame_t *frame, xlator_t *this,              ret = __afr_selfheal_entry_prepare(frame, this, inode, data_lock,                                                 sources, sinks, healed_sinks,                                                 locked_replies, &source, pflag); -            if ((ret == 0) && source < 0) +            if ((ret == 0) && (*pflag & PFLAG_SBRAIN))                  ret = -EIO;              *esh = afr_decide_heal_info(priv, sources, ret);          } @@ -5974,7 +5974,7 @@ afr_selfheal_locked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,                              inode_t **inode, gf_boolean_t *entry_selfheal,                              gf_boolean_t *data_selfheal,                              gf_boolean_t *metadata_selfheal, -                            gf_boolean_t *pending) +                            unsigned char *pending)  {      int ret = -1; @@ -6059,7 +6059,7 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)      gf_boolean_t data_selfheal = _gf_false;      gf_boolean_t metadata_selfheal = _gf_false;      gf_boolean_t entry_selfheal = _gf_false; -    gf_boolean_t pending = _gf_false; +    unsigned char pending = 0;      dict_t *dict = NULL;      int ret = -1;      int op_errno = 0; @@ -6077,7 +6077,7 @@ afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)          goto out;      } -    if (pending) { +    if (pending & PFLAG_PENDING) {          gf_asprintf(&substr, "-pending");          if (!substr)              goto out; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index c48c47683c3..402f5ea5888 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1513,7 +1513,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,                              struct afr_reply *replies,                              afr_transaction_type type, unsigned char *locked_on,                              unsigned char *sources, unsigned char *sinks, -                            uint64_t *witness, gf_boolean_t *pflag) +                            uint64_t *witness, unsigned char *pflag)  {      afr_private_t *priv = NULL;      int i = 0; @@ -1541,7 +1541,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,          for (i = 0; i < priv->child_count; i++) {              for (j = 0; j < priv->child_count; j++)                  if (matrix[i][j]) -                    *pflag = _gf_true; +                    *pflag |= PFLAG_PENDING;              if (*pflag)                  break;          } @@ -1623,6 +1623,8 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,              if (locked_on[i])                  sinks[i] = 1;          } +        if (pflag) +            *pflag |= PFLAG_SBRAIN;      }      /* One more class of witness similar to dirty in v2 is where no pending diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index a477fae8039..a3650a3b22d 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -597,7 +597,7 @@ __afr_selfheal_data_prepare(call_frame_t *frame, xlator_t *this, inode_t *inode,                              unsigned char *locked_on, unsigned char *sources,                              unsigned char *sinks, unsigned char *healed_sinks,                              unsigned char *undid_pending, -                            struct afr_reply *replies, gf_boolean_t *pflag) +                            struct afr_reply *replies, unsigned char *pflag)  {      int ret = -1;      int source = -1; diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index bf7a6b9d1e8..619558e94b7 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -487,7 +487,7 @@ __afr_selfheal_entry_prepare(call_frame_t *frame, xlator_t *this,                               unsigned char *sources, unsigned char *sinks,                               unsigned char *healed_sinks,                               struct afr_reply *replies, int *source_p, -                             gf_boolean_t *pflag) +                             unsigned char *pflag)  {      int ret = -1;      int source = -1; diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index be6e574b6ca..ea2a7bfd52f 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -310,7 +310,7 @@ __afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this,                                  unsigned char *sources, unsigned char *sinks,                                  unsigned char *healed_sinks,                                  unsigned char *undid_pending, -                                struct afr_reply *replies, gf_boolean_t *pflag) +                                struct afr_reply *replies, unsigned char *pflag)  {      int ret = -1;      int source = -1; diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 545e67f774d..9c7418c7169 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -177,7 +177,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,                              struct afr_reply *replies,                              afr_transaction_type type, unsigned char *locked_on,                              unsigned char *sources, unsigned char *sinks, -                            uint64_t *witness, gf_boolean_t *flag); +                            uint64_t *witness, unsigned char *flag);  int  afr_selfheal_fill_matrix(xlator_t *this, int **matrix, int subvol, int idx,                           dict_t *xdata); @@ -284,7 +284,7 @@ __afr_selfheal_data_prepare(call_frame_t *frame, xlator_t *this, inode_t *inode,                              unsigned char *locked_on, unsigned char *sources,                              unsigned char *sinks, unsigned char *healed_sinks,                              unsigned char *undid_pending, -                            struct afr_reply *replies, gf_boolean_t *flag); +                            struct afr_reply *replies, unsigned char *flag);  int  __afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this, @@ -292,14 +292,14 @@ __afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this,                                  unsigned char *sources, unsigned char *sinks,                                  unsigned char *healed_sinks,                                  unsigned char *undid_pending, -                                struct afr_reply *replies, gf_boolean_t *flag); +                                struct afr_reply *replies, unsigned char *flag);  int  __afr_selfheal_entry_prepare(call_frame_t *frame, xlator_t *this,                               inode_t *inode, unsigned char *locked_on,                               unsigned char *sources, unsigned char *sinks,                               unsigned char *healed_sinks,                               struct afr_reply *replies, int *source_p, -                             gf_boolean_t *flag); +                             unsigned char *flag);  int  afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 6252f91736a..cc4bceef521 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -39,6 +39,10 @@  #define AFR_TA_DOM_MODIFY "afr.ta.dom-modify"  #define AFR_HALO_MAX_LATENCY 99999 + +#define PFLAG_PENDING (1 << 0) +#define PFLAG_SBRAIN (1 << 1) +  typedef int (*afr_lock_cbk_t)(call_frame_t *frame, xlator_t *this);  typedef int (*afr_read_txn_wind_t)(call_frame_t *frame, xlator_t *this,  | 
