diff options
| author | Ravishankar N <ravishankar@redhat.com> | 2016-12-09 07:14:17 +0000 | 
|---|---|---|
| committer | Niels de Vos <ndevos@redhat.com> | 2017-01-08 02:48:49 -0800 | 
| commit | c539e23023abe743770287439ebe81989a732728 (patch) | |
| tree | 97db92a2ae4e6110871927b478a9cfbdd6899503 /xlators/cluster/afr/src/afr-common.c | |
| parent | cb8bc3396d16e777d9a2683886fefd43e747e8a3 (diff) | |
afr: allow I/O when favorite-child-policy is enabled
Problem:
Currently, I/O on a split-brained file fails even when the
favorite-child-policy is set until the self-heal is complete.
Fix:
If a valid 'source' is found using the set favorite-child-policy, inspect
and reset the afr pending xattrs on the 'sinks' (inside appropriate locks),
refresh the inode and then proceed with the read or write transaction.
The resetting itself happens in the self-heal code and hence can also
happen in the client side background-heal or by the shd's index-heal in
addition to the txn code path explained above. When it happens in via
heal, we also add checks in undo-pending to not reset the sink xattrs
again.
> Reviewed-on: http://review.gluster.org/15673
> Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
> Smoke: Gluster Build System <jenkins@build.gluster.org>
> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Change-Id: Ic8c1317720cb26bd114b6fe6af4e58c73b864626
BUG: 1378547
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reported-by: Simon Turcotte-Langevin <simon.turcotte-langevin@ubisoft.com>
Reviewed-on: http://review.gluster.org/16091
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Niels de Vos <ndevos@redhat.com>
Diffstat (limited to 'xlators/cluster/afr/src/afr-common.c')
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 205 | 
1 files changed, 202 insertions, 3 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index f4d1d63c3d5..2d21f2e426c 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -972,6 +972,13 @@ ret:  	return -err;  } +int +afr_fav_child_reset_sink_xattrs (void *opaque); + +int +afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *frame, +                                     void *opaque); +  gf_boolean_t  afr_selfheal_enabled (xlator_t *this)  { @@ -987,6 +994,82 @@ afr_selfheal_enabled (xlator_t *this)  	return data || priv->metadata_self_heal || priv->entry_self_heal;  } + +int +afr_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) +{ + +        call_frame_t *heal_frame = NULL; +        afr_local_t *heal_local = NULL; +        afr_local_t *local = NULL; +        afr_private_t *priv = NULL; +        inode_t *inode = NULL; +        int event_generation = 0; +        int read_subvol = -1; +        int op_errno = ENOMEM; +        int ret = 0; + +        local = frame->local; +        inode = local->inode; +        priv = this->private; + +        if (err) +                goto refresh_done; + +        if (local->op == GF_FOP_LOOKUP) +                goto refresh_done; + +        ret = afr_inode_get_readable (frame, inode, this, local->readable, +                                      &event_generation, +                                      local->transaction.type); + +        if (ret == -EIO || !event_generation) { +                /* No readable subvolume even after refresh ==> splitbrain.*/ +                if (!priv->fav_child_policy) { +                        err = -EIO; +                        goto refresh_done; +                } +                read_subvol = afr_sh_get_fav_by_policy (this, local->replies, +                                                        inode, NULL); +                if (read_subvol == -1) { +                        err = -EIO; +                        goto refresh_done; +                } + +                heal_frame = copy_frame (frame); +                if (!heal_frame) { +                        err = -EIO; +                        goto refresh_done; +                } +                heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD; +                heal_local = AFR_FRAME_INIT (heal_frame, op_errno); +                if (!heal_local) { +                        err = -EIO; +                        AFR_STACK_DESTROY (heal_frame); +                        goto refresh_done; +                } +                heal_local->xdata_req = dict_new(); +                if (!heal_local->xdata_req) { +                        err = -EIO; +                        AFR_STACK_DESTROY (heal_frame); +                        goto refresh_done; +                } +                heal_local->heal_frame = frame; +                ret = synctask_new (this->ctx->env, +                                    afr_fav_child_reset_sink_xattrs, +                                    afr_fav_child_reset_sink_xattrs_cbk, +                                    heal_frame, +                                    heal_frame); +                return 0; +        } + +refresh_done: +        afr_local_replies_wipe (local, this->private); +        local->refreshfn (frame, this, err); + +        return 0; +} +  int  afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)  { @@ -1005,8 +1088,6 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)  	err = afr_inode_refresh_err (frame, this); -        afr_local_replies_wipe (local, this->private); -  	if (ret && afr_selfheal_enabled (this) && start_heal) {                  heal_frame = copy_frame (frame);                  if (!heal_frame) @@ -1026,7 +1107,7 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)          }  refresh_done: -        local->refreshfn (frame, this, err); +        afr_txn_refresh_done (frame, this, err);  	return 0;  } @@ -4775,6 +4856,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,          unsigned char *sources = NULL;          unsigned char *sinks = NULL;          unsigned char *healed_sinks = NULL; +        unsigned char *undid_pending = NULL;          struct afr_reply *locked_replies = NULL;          afr_private_t *priv = this->private; @@ -4783,6 +4865,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,          sources = alloca0 (priv->child_count);          sinks = alloca0 (priv->child_count);          healed_sinks = alloca0 (priv->child_count); +        undid_pending = alloca0 (priv->child_count);          locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); @@ -4799,6 +4882,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,                  ret = __afr_selfheal_metadata_prepare (frame, this, inode,                                                         locked_on, sources,                                                         sinks, healed_sinks, +                                                       undid_pending,                                                         locked_replies,                                                         pending);                  *msh = afr_decide_heal_info (priv, sources, ret); @@ -4822,6 +4906,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,          unsigned char *sources = NULL;          unsigned char *sinks = NULL;          unsigned char *healed_sinks = NULL; +        unsigned char *undid_pending = NULL;          afr_private_t   *priv = NULL;          fd_t          *fd = NULL;          struct afr_reply *locked_replies = NULL; @@ -4835,6 +4920,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,          sources = alloca0 (priv->child_count);          sinks = alloca0 (priv->child_count);          healed_sinks = alloca0 (priv->child_count); +        undid_pending = alloca0 (priv->child_count);          /* Heal-info does an open() on the file being examined so that the           * current eager-lock holding client, if present, at some point sees @@ -4874,6 +4960,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,                          ret = __afr_selfheal_data_prepare (frame, this, inode,                                                             data_lock, sources,                                                             sinks, healed_sinks, +                                                           undid_pending,                                                             locked_replies,                                                             pflag);                          *dsh = afr_decide_heal_info (priv, sources, ret); @@ -5409,3 +5496,115 @@ afr_get_msg_id (char *op_type)                  return AFR_MSG_ADD_BRICK_STATUS;          return -1;  } + +int +afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *heal_frame, +                                     void *opaque) +{ + +        call_frame_t *txn_frame = NULL; +        afr_local_t *local = NULL; +        afr_local_t *heal_local = NULL; +        xlator_t *this = NULL; + +        heal_local = heal_frame->local; +        txn_frame = heal_local->heal_frame; +        local = txn_frame->local; +        this = txn_frame->this; + +        /* Refresh the inode agan and proceed with the transaction.*/ +        afr_inode_refresh (txn_frame, this, local->inode, NULL, +                           local->refreshfn); + +        if (heal_frame) +                AFR_STACK_DESTROY (heal_frame); + +        return 0; +} + +int +afr_fav_child_reset_sink_xattrs (void *opaque) +{ +        call_frame_t *heal_frame = NULL; +        call_frame_t *txn_frame = NULL; +        xlator_t *this = NULL; +        gf_boolean_t d_spb = _gf_false; +        gf_boolean_t m_spb = _gf_false; +        afr_local_t *heal_local = NULL; +        afr_local_t *txn_local = NULL; +        afr_private_t *priv = NULL; +        inode_t *inode  = NULL; +        unsigned char *locked_on = NULL; +        unsigned char *sources = NULL; +        unsigned char *sinks = NULL; +        unsigned char *healed_sinks = NULL; +        unsigned char *undid_pending = NULL; +        struct afr_reply *locked_replies = NULL; +        int ret = 0; + +        heal_frame = (call_frame_t *) opaque; +        heal_local = heal_frame->local; +        txn_frame = heal_local->heal_frame; +        txn_local = txn_frame->local; +        this = txn_frame->this; +        inode = txn_local->inode; +        priv = this->private; +        locked_on = alloca0 (priv->child_count); +        sources = alloca0 (priv->child_count); +        sinks = alloca0 (priv->child_count); +        healed_sinks = alloca0 (priv->child_count); +        undid_pending = alloca0 (priv->child_count); +        locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); + +        ret = _afr_is_split_brain (txn_frame, this, txn_local->replies, +                                   AFR_DATA_TRANSACTION, &d_spb); + +        ret = _afr_is_split_brain (txn_frame, this, txn_local->replies, +                                   AFR_METADATA_TRANSACTION, &m_spb); + +        /* Take appropriate locks and reset sink xattrs. */ +        if (d_spb) { +                ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name, +                                            0, 0, locked_on); +                { +                        if (ret < AFR_SH_MIN_PARTICIPANTS) +                                goto data_unlock; +                        ret = __afr_selfheal_data_prepare (heal_frame, this, +                                                           inode, locked_on, +                                                           sources, sinks, +                                                           healed_sinks, +                                                           undid_pending, +                                                           locked_replies, +                                                           NULL); +                } +data_unlock: +                afr_selfheal_uninodelk (heal_frame, this, inode, this->name, +                                        0, 0, locked_on); +        } + +        if (m_spb) { +                memset (locked_on, 0, sizeof (*locked_on) * priv->child_count); +                memset (undid_pending, 0, +                        sizeof (*undid_pending) * priv->child_count); +                ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name, +                                            LLONG_MAX-1, 0, locked_on); +                { +                        if (ret < AFR_SH_MIN_PARTICIPANTS) +                                goto mdata_unlock; +                        ret = __afr_selfheal_metadata_prepare (heal_frame, this, +                                                               inode, locked_on, +                                                               sources, sinks, +                                                               healed_sinks, +                                                               undid_pending, +                                                               locked_replies, +                                                               NULL); + +                } +mdata_unlock: +                afr_selfheal_uninodelk (heal_frame, this, inode, this->name, +                                        LLONG_MAX-1, 0, locked_on); +        } + +        return ret; + +}  | 
