diff options
| author | Pranith Kumar K <pranithk@gluster.com> | 2012-06-27 16:42:35 +0530 | 
|---|---|---|
| committer | Anand Avati <avati@redhat.com> | 2012-07-26 10:14:54 -0700 | 
| commit | f153c835807ac31006ba690b1deb47b20b51bc83 (patch) | |
| tree | 563b61874ec5304116d92f39bb6e1f749b1c6d3f /xlators/cluster/afr/src | |
| parent | c2a7a22bfe18316eab441d49e515726e53f74582 (diff) | |
cluster/afr: Modified split-brain handling
RCA
The bug is observed because the decision to mark
a file in split-brain is taken outside appropriate locks.
Lookup gathers xattrs outside any lock. The xattrs being
in split-brain in lookup should only be taken as a hint.
Appropriate inodelks should be taken before confirming
a split-brain. Self-heal confirms this at the moment.
If data/metadata self-heal is turned off, inspecting of
xattrs could not be performed so split-brain behavior
does not work correctly if the self-heal options are turned off.
Fix
Self-heals are launched to inspect xattrs even when the
data/metadata self-heal options are turned off. The decision
to heal data/metadata after the xattrs are inspected is based
on whether the options are turned on/off. So decision to set/reset
split-brain flag is taken inside appropriate locks.
Testcases:
tests 33-36 in
https://github.com/pranithk/gluster-tests/blob/master/afr/self-heal.sh
Change-Id: Ia8aeab08208b50c06609ad35a9d72f3d553ee343
BUG: 833727
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Reviewed-on: http://review.gluster.com/3626
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators/cluster/afr/src')
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 81 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 3 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 63 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 50 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 6 | 
5 files changed, 146 insertions, 57 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index b6e16074364..70a9cd35463 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -908,6 +908,8 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)                  GF_FREE (local->cont.lookup.success_children);                  GF_FREE (local->cont.lookup.sources); +                afr_matrix_cleanup (local->cont.lookup.pending_matrix, +                                    priv->child_count);          }          { /* getxattr */ @@ -1181,6 +1183,36 @@ afr_lookup_set_self_heal_params_by_xattr (afr_local_t *local, xlator_t *this,          }  } +void +afr_lookup_check_set_metadata_split_brain (afr_local_t *local, xlator_t *this) +{ +        int32_t                  *sources = NULL; +        afr_private_t            *priv = NULL; +        int32_t                  subvol_status = 0; +        int32_t                  *success_children   = NULL; +        dict_t                   **xattrs = NULL; +        struct iatt              *bufs = NULL; +        int32_t                  **pending_matrix = NULL; + +        priv = this->private; + +        sources = GF_CALLOC (priv->child_count, sizeof (*sources), +                             gf_afr_mt_int32_t); +        if (NULL == sources) +                goto out; +        success_children = local->cont.lookup.success_children; +        xattrs = local->cont.lookup.xattrs; +        bufs = local->cont.lookup.bufs; +        pending_matrix = local->cont.lookup.pending_matrix; +        afr_build_sources (this, xattrs, bufs, pending_matrix, +                           sources, success_children, AFR_METADATA_TRANSACTION, +                           &subvol_status, _gf_false); +        if (subvol_status & SPLIT_BRAIN) +                local->cont.lookup.possible_spb = _gf_true; +out: +        GF_FREE (sources); +} +  static void  afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,                              struct iatt *buf, struct iatt *lookup_buf) @@ -1214,8 +1246,26 @@ afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,  }  static void -afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this, -                                       gf_boolean_t split_brain) +afr_detect_self_heal_by_split_brain_status (afr_local_t *local, xlator_t *this) +{ +        gf_boolean_t split_brain = _gf_false; +        afr_self_heal_t *sh = NULL; + +        sh = &local->self_heal; + +        split_brain = afr_is_split_brain (this, local->cont.lookup.inode); +        split_brain = split_brain || local->cont.lookup.possible_spb; +        if ((local->success_count > 0) && split_brain && +            IA_ISREG (local->cont.lookup.inode->ia_type)) { +                sh->force_confirm_spb = _gf_true; +                gf_log (this->name, GF_LOG_WARNING, +                        "split brain detected during lookup of %s.", +                        local->loc.path); +        } +} + +static void +afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this)  {          GF_ASSERT (local);          GF_ASSERT (this); @@ -1233,15 +1283,6 @@ afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this,                  goto out;          } -        if ((local->success_count > 0) && split_brain && -            IA_ISREG (local->cont.lookup.inode->ia_type)) { -                local->self_heal.do_data_self_heal = _gf_true; -                local->self_heal.do_gfid_self_heal    = _gf_true; -                gf_log (this->name, GF_LOG_WARNING, -                        "split brain detected during lookup of %s.", -                        local->loc.path); -        } -  out:          return;  } @@ -1252,6 +1293,8 @@ afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)          GF_ASSERT (sh);          GF_ASSERT (priv); +        if (sh->force_confirm_spb) +                return _gf_true;          return (sh->do_gfid_self_heal                  || sh->do_missing_entry_self_heal                  || (afr_data_self_heal_enabled (priv->data_self_heal) && @@ -1516,13 +1559,11 @@ afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)          int32_t                 child1 = -1;          int32_t                 child2 = -1;          afr_self_heal_t         *sh = NULL; -        gf_boolean_t            split_brain = _gf_false;          priv  = this->private;          sh = &local->self_heal; -        split_brain = afr_is_split_brain (this, local->cont.lookup.inode); -        afr_detect_self_heal_by_lookup_status (local, this, split_brain); +        afr_detect_self_heal_by_lookup_status (local, this);          if (afr_lookup_gfid_missing_count (local, this))                  local->self_heal.do_gfid_self_heal    = _gf_true; @@ -1549,9 +1590,11 @@ afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)                  afr_lookup_set_self_heal_params_by_xattr (local, this,                                                            xattr[child1]);          } -        if (afr_open_only_data_self_heal (priv->data_self_heal) -            && !split_brain) +        if (afr_open_only_data_self_heal (priv->data_self_heal))                  sh->do_data_self_heal = _gf_false; +        if (sh->do_metadata_self_heal) +                afr_lookup_check_set_metadata_split_brain (local, this); +        afr_detect_self_heal_by_split_brain_status (local, this);  }  int @@ -2143,6 +2186,7 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)          struct iatt       *iatts         = NULL;          int32_t           *success_children = NULL;          int32_t           *sources       = NULL; +        int32_t           **pending_matrix = NULL;          GF_ASSERT (local);          local->cont.lookup.xattrs = GF_CALLOC (child_count, @@ -2175,6 +2219,11 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)                  goto out;          local->cont.lookup.sources = sources; +        pending_matrix = afr_matrix_create (child_count, child_count); +        if (NULL == pending_matrix) +                goto out; +        local->cont.lookup.pending_matrix = pending_matrix; +          ret = 0;  out:          return ret; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index f082685e207..48fa31d8689 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -2095,6 +2095,7 @@ afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)          shc->do_data_self_heal = sh->do_data_self_heal;          shc->do_metadata_self_heal = sh->do_metadata_self_heal;          shc->do_entry_self_heal = sh->do_entry_self_heal; +        shc->force_confirm_spb = sh->force_confirm_spb;          shc->forced_merge = sh->forced_merge;          shc->background = sh->background;          shc->type = sh->type; @@ -2162,7 +2163,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)          local = bgsh_frame->local;          sh    = &local->self_heal; -        if (local->govinda_gOvinda) +        if (local->govinda_gOvinda || sh->mdata_spb || sh->data_spb)                  split_brain = _gf_true;          afr_set_split_brain (this, sh->inode, split_brain); diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 1672b4f282e..6619c1353c7 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -688,16 +688,6 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)          nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,                                        sh->sources, sh->success_children,                                        AFR_DATA_TRANSACTION, NULL, _gf_true); -        if ((nsources == 0) && !sh->sync_done) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "No self-heal needed for %s", -                        local->loc.path); - -                local->govinda_gOvinda = 0; -                afr_sh_data_finish (frame, this); -                return 0; -        } -          if ((nsources == -1)              && (priv->favorite_child != -1)              && (sh->child_errno[priv->favorite_child] == 0)) { @@ -720,12 +710,12 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)                          "split-brain). Please delete the file from all but "                          "the preferred subvolume.", local->loc.path); -                local->govinda_gOvinda = 1; +                sh->data_spb = _gf_true;                  afr_sh_data_fail (frame, this);                  return 0;          } -        local->govinda_gOvinda = 0; +        sh->data_spb = _gf_false;          ret = afr_sh_inode_set_read_ctx (sh, this);          if (ret) {                  gf_log (this->name, GF_LOG_DEBUG, @@ -738,7 +728,20 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)          if (sh->sync_done) {                  afr_sh_data_setattr (frame, this);          } else { -                afr_sh_data_fix (frame, this); +                if (nsources == 0) { +                        gf_log (this->name, GF_LOG_DEBUG, +                                "No self-heal needed for %s", +                                local->loc.path); + +                        afr_sh_data_finish (frame, this); +                        return 0; +                } + +                if (sh->do_data_self_heal && +                    afr_data_self_heal_enabled (priv->data_self_heal)) +                        afr_sh_data_fix (frame, this); +                else +                        afr_sh_data_finish (frame, this);          }          return 0;  } @@ -764,11 +767,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,          bufs = local->cont.lookup.bufs;          success_children = local->cont.lookup.success_children; -        pending_matrix = afr_matrix_create (priv->child_count, -                                            priv->child_count); -        if (NULL == pending_matrix) -                goto out; - +        pending_matrix = local->cont.lookup.pending_matrix;          sources = local->cont.lookup.sources;          memset (sources, 0, sizeof (*sources) * priv->child_count); @@ -780,9 +779,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,                          local->loc.path);                  switch (txn_type) {                  case AFR_DATA_TRANSACTION: -                        afr_set_split_brain (this, -                                             local->cont.lookup.inode, -                                             _gf_true); +                        local->cont.lookup.possible_spb = _gf_true;                          nsources = 1;                          sources[success_children[0]] = 1;                          break; @@ -809,7 +806,6 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,                                                          sources,                                                          priv->hash_mode, gfid);  out: -        afr_matrix_cleanup (pending_matrix, priv->child_count);          gf_log (this->name, GF_LOG_DEBUG, "returning read_child: %d",                  read_child);          return read_child; @@ -1351,6 +1347,17 @@ afr_sh_non_reg_lock_success (call_frame_t *frame, xlator_t *this)          return 0;  } +gf_boolean_t +afr_can_start_data_self_heal (afr_self_heal_t *sh, afr_private_t *priv) +{ +        if (sh->force_confirm_spb) +                return _gf_true; +        if (sh->do_data_self_heal && +            afr_data_self_heal_enabled (priv->data_self_heal)) +                return _gf_true; +        return _gf_false; +} +  int  afr_self_heal_data (call_frame_t *frame, xlator_t *this)  { @@ -1361,10 +1368,14 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this)          local = frame->local;          sh = &local->self_heal; -        local->govinda_gOvinda = afr_is_split_brain (this, sh->inode); - -        if (sh->do_data_self_heal && -            afr_data_self_heal_enabled (priv->data_self_heal)) { +        /* Self-heal completion cbk changes inode split-brain status based on +         * govinda_gOvinda, mdata_spb, data_spb value. +         * Initialize data_spb with current split-brain status. +         * If for some reason self-heal fails(locking phase etc), it makes sure +         * we retain the split-brain status before this self-heal started. +         */ +        sh->data_spb = afr_is_split_brain (this, sh->inode); +        if (afr_can_start_data_self_heal (sh, priv)) {                  if (IA_ISREG (sh->type)) {                          afr_sh_data_open (frame, this);                  } else { diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 147ab98d409..79ce07d0039 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -50,7 +50,7 @@ afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)          sh = &local->self_heal;          afr_sh_reset (frame, this); -        if (local->govinda_gOvinda) { +        if (sh->mdata_spb) {                  gf_log (this->name, GF_LOG_INFO,                          "split-brain detected, aborting selfheal of %s",                          local->loc.path); @@ -383,15 +383,6 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,                                        sh->pending_matrix, sh->sources,                                        sh->success_children,                                        AFR_METADATA_TRANSACTION, NULL, _gf_false); -        if (nsources == 0) { -                gf_log (this->name, GF_LOG_TRACE, -                        "No self-heal needed for %s", -                        local->loc.path); - -                afr_sh_metadata_finish (frame, this); -                goto out; -        } -          if ((nsources == -1)              && (priv->favorite_child != -1)              && (sh->child_errno[priv->favorite_child] == 0)) { @@ -413,7 +404,17 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,                          "(possible split-brain). Please fix the file on "                          "all backend volumes", local->loc.path); -                local->govinda_gOvinda = 1; +                sh->mdata_spb = _gf_true; + +                afr_sh_metadata_finish (frame, this); +                goto out; +        } + +        sh->mdata_spb = _gf_false; +        if (nsources == 0) { +                gf_log (this->name, GF_LOG_TRACE, +                        "No self-heal needed for %s", +                        local->loc.path);                  afr_sh_metadata_finish (frame, this);                  goto out; @@ -452,7 +453,10 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,                                          sh->fresh_children);          } -        afr_sh_metadata_sync_prepare (frame, this); +        if (sh->do_metadata_self_heal && priv->metadata_self_heal) +                afr_sh_metadata_sync_prepare (frame, this); +        else +                afr_sh_metadata_finish (frame, this);  out:          return;  } @@ -512,17 +516,35 @@ afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)          return 0;  } +gf_boolean_t +afr_can_start_metadata_self_heal (afr_self_heal_t *sh, afr_private_t *priv) +{ +        if (sh->force_confirm_spb) +                return _gf_true; +        if (sh->do_metadata_self_heal && priv->metadata_self_heal) +                return _gf_true; +        return _gf_false; +}  int  afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)  {          afr_local_t   *local = NULL;          afr_private_t *priv = this->private; - +        afr_self_heal_t *sh = &local->self_heal;          local = frame->local; +        sh = &local->self_heal; + +        /* Self-heal completion cbk changes inode split-brain status based on +         * govinda_gOvinda, mdata_spb, data_spb value. +         * Initialize mdata_spb with current split-brain status. +         * If for some reason self-heal fails(locking phase etc), it makes sure +         * we retain the split-brain status before this self-heal started. +         */ +        sh->mdata_spb = afr_is_split_brain (this, sh->inode); -        if (local->self_heal.do_metadata_self_heal && priv->metadata_self_heal) { +        if (afr_can_start_metadata_self_heal (sh, priv)) {                  afr_sh_metadata_lock (frame, this);          } else {                  afr_sh_metadata_done (frame, this); diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index abb95533449..5ed478c4a2f 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -170,6 +170,8 @@ typedef struct {          gf_boolean_t do_entry_self_heal;          gf_boolean_t do_gfid_self_heal;          gf_boolean_t do_missing_entry_self_heal; +        gf_boolean_t force_confirm_spb; /* Check for split-brains even when +                                           self-heal is turned off */          gf_boolean_t forced_merge;        /* Is this a self-heal triggered to                                               forcibly merge the directories? */ @@ -266,6 +268,8 @@ typedef struct {          int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);          int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this);          void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this); +        gf_boolean_t    mdata_spb; +        gf_boolean_t    data_spb;          call_frame_t *sh_frame;  } afr_self_heal_t; @@ -444,7 +448,9 @@ typedef struct _afr_local {                          int32_t read_child;                          int32_t *sources;                          int32_t *success_children; +                        int32_t **pending_matrix;                          gf_boolean_t fresh_lookup; +                        gf_boolean_t possible_spb;                  } lookup;                  struct {  | 
