summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr-common.c
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2016-12-09 07:14:17 +0000
committerNiels de Vos <ndevos@redhat.com>2017-01-08 02:48:49 -0800
commitc539e23023abe743770287439ebe81989a732728 (patch)
tree97db92a2ae4e6110871927b478a9cfbdd6899503 /xlators/cluster/afr/src/afr-common.c
parentcb8bc3396d16e777d9a2683886fefd43e747e8a3 (diff)
afr: allow I/O when favorite-child-policy is enabled
Problem: Currently, I/O on a split-brained file fails even when the favorite-child-policy is set until the self-heal is complete. Fix: If a valid 'source' is found using the set favorite-child-policy, inspect and reset the afr pending xattrs on the 'sinks' (inside appropriate locks), refresh the inode and then proceed with the read or write transaction. The resetting itself happens in the self-heal code and hence can also happen in the client side background-heal or by the shd's index-heal in addition to the txn code path explained above. When it happens in via heal, we also add checks in undo-pending to not reset the sink xattrs again. > Reviewed-on: http://review.gluster.org/15673 > Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com> > Smoke: Gluster Build System <jenkins@build.gluster.org> > Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> > NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> > CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Change-Id: Ic8c1317720cb26bd114b6fe6af4e58c73b864626 BUG: 1378547 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reported-by: Simon Turcotte-Langevin <simon.turcotte-langevin@ubisoft.com> Reviewed-on: http://review.gluster.org/16091 NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Niels de Vos <ndevos@redhat.com>
Diffstat (limited to 'xlators/cluster/afr/src/afr-common.c')
-rw-r--r--xlators/cluster/afr/src/afr-common.c205
1 files changed, 202 insertions, 3 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index f4d1d63c3d5..2d21f2e426c 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -972,6 +972,13 @@ ret:
return -err;
}
+int
+afr_fav_child_reset_sink_xattrs (void *opaque);
+
+int
+afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *frame,
+ void *opaque);
+
gf_boolean_t
afr_selfheal_enabled (xlator_t *this)
{
@@ -987,6 +994,82 @@ afr_selfheal_enabled (xlator_t *this)
return data || priv->metadata_self_heal || priv->entry_self_heal;
}
+
+int
+afr_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
+{
+
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *heal_local = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ int event_generation = 0;
+ int read_subvol = -1;
+ int op_errno = ENOMEM;
+ int ret = 0;
+
+ local = frame->local;
+ inode = local->inode;
+ priv = this->private;
+
+ if (err)
+ goto refresh_done;
+
+ if (local->op == GF_FOP_LOOKUP)
+ goto refresh_done;
+
+ ret = afr_inode_get_readable (frame, inode, this, local->readable,
+ &event_generation,
+ local->transaction.type);
+
+ if (ret == -EIO || !event_generation) {
+ /* No readable subvolume even after refresh ==> splitbrain.*/
+ if (!priv->fav_child_policy) {
+ err = -EIO;
+ goto refresh_done;
+ }
+ read_subvol = afr_sh_get_fav_by_policy (this, local->replies,
+ inode, NULL);
+ if (read_subvol == -1) {
+ err = -EIO;
+ goto refresh_done;
+ }
+
+ heal_frame = copy_frame (frame);
+ if (!heal_frame) {
+ err = -EIO;
+ goto refresh_done;
+ }
+ heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
+ heal_local = AFR_FRAME_INIT (heal_frame, op_errno);
+ if (!heal_local) {
+ err = -EIO;
+ AFR_STACK_DESTROY (heal_frame);
+ goto refresh_done;
+ }
+ heal_local->xdata_req = dict_new();
+ if (!heal_local->xdata_req) {
+ err = -EIO;
+ AFR_STACK_DESTROY (heal_frame);
+ goto refresh_done;
+ }
+ heal_local->heal_frame = frame;
+ ret = synctask_new (this->ctx->env,
+ afr_fav_child_reset_sink_xattrs,
+ afr_fav_child_reset_sink_xattrs_cbk,
+ heal_frame,
+ heal_frame);
+ return 0;
+ }
+
+refresh_done:
+ afr_local_replies_wipe (local, this->private);
+ local->refreshfn (frame, this, err);
+
+ return 0;
+}
+
int
afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
{
@@ -1005,8 +1088,6 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
err = afr_inode_refresh_err (frame, this);
- afr_local_replies_wipe (local, this->private);
-
if (ret && afr_selfheal_enabled (this) && start_heal) {
heal_frame = copy_frame (frame);
if (!heal_frame)
@@ -1026,7 +1107,7 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
}
refresh_done:
- local->refreshfn (frame, this, err);
+ afr_txn_refresh_done (frame, this, err);
return 0;
}
@@ -4775,6 +4856,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,
unsigned char *sources = NULL;
unsigned char *sinks = NULL;
unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
struct afr_reply *locked_replies = NULL;
afr_private_t *priv = this->private;
@@ -4783,6 +4865,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,
sources = alloca0 (priv->child_count);
sinks = alloca0 (priv->child_count);
healed_sinks = alloca0 (priv->child_count);
+ undid_pending = alloca0 (priv->child_count);
locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
@@ -4799,6 +4882,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this,
ret = __afr_selfheal_metadata_prepare (frame, this, inode,
locked_on, sources,
sinks, healed_sinks,
+ undid_pending,
locked_replies,
pending);
*msh = afr_decide_heal_info (priv, sources, ret);
@@ -4822,6 +4906,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
unsigned char *sources = NULL;
unsigned char *sinks = NULL;
unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
afr_private_t *priv = NULL;
fd_t *fd = NULL;
struct afr_reply *locked_replies = NULL;
@@ -4835,6 +4920,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
sources = alloca0 (priv->child_count);
sinks = alloca0 (priv->child_count);
healed_sinks = alloca0 (priv->child_count);
+ undid_pending = alloca0 (priv->child_count);
/* Heal-info does an open() on the file being examined so that the
* current eager-lock holding client, if present, at some point sees
@@ -4874,6 +4960,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this,
ret = __afr_selfheal_data_prepare (frame, this, inode,
data_lock, sources,
sinks, healed_sinks,
+ undid_pending,
locked_replies,
pflag);
*dsh = afr_decide_heal_info (priv, sources, ret);
@@ -5409,3 +5496,115 @@ afr_get_msg_id (char *op_type)
return AFR_MSG_ADD_BRICK_STATUS;
return -1;
}
+
+int
+afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *heal_frame,
+ void *opaque)
+{
+
+ call_frame_t *txn_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *heal_local = NULL;
+ xlator_t *this = NULL;
+
+ heal_local = heal_frame->local;
+ txn_frame = heal_local->heal_frame;
+ local = txn_frame->local;
+ this = txn_frame->this;
+
+ /* Refresh the inode agan and proceed with the transaction.*/
+ afr_inode_refresh (txn_frame, this, local->inode, NULL,
+ local->refreshfn);
+
+ if (heal_frame)
+ AFR_STACK_DESTROY (heal_frame);
+
+ return 0;
+}
+
+int
+afr_fav_child_reset_sink_xattrs (void *opaque)
+{
+ call_frame_t *heal_frame = NULL;
+ call_frame_t *txn_frame = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t d_spb = _gf_false;
+ gf_boolean_t m_spb = _gf_false;
+ afr_local_t *heal_local = NULL;
+ afr_local_t *txn_local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ unsigned char *locked_on = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int ret = 0;
+
+ heal_frame = (call_frame_t *) opaque;
+ heal_local = heal_frame->local;
+ txn_frame = heal_local->heal_frame;
+ txn_local = txn_frame->local;
+ this = txn_frame->this;
+ inode = txn_local->inode;
+ priv = this->private;
+ locked_on = alloca0 (priv->child_count);
+ sources = alloca0 (priv->child_count);
+ sinks = alloca0 (priv->child_count);
+ healed_sinks = alloca0 (priv->child_count);
+ undid_pending = alloca0 (priv->child_count);
+ locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count);
+
+ ret = _afr_is_split_brain (txn_frame, this, txn_local->replies,
+ AFR_DATA_TRANSACTION, &d_spb);
+
+ ret = _afr_is_split_brain (txn_frame, this, txn_local->replies,
+ AFR_METADATA_TRANSACTION, &m_spb);
+
+ /* Take appropriate locks and reset sink xattrs. */
+ if (d_spb) {
+ ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name,
+ 0, 0, locked_on);
+ {
+ if (ret < AFR_SH_MIN_PARTICIPANTS)
+ goto data_unlock;
+ ret = __afr_selfheal_data_prepare (heal_frame, this,
+ inode, locked_on,
+ sources, sinks,
+ healed_sinks,
+ undid_pending,
+ locked_replies,
+ NULL);
+ }
+data_unlock:
+ afr_selfheal_uninodelk (heal_frame, this, inode, this->name,
+ 0, 0, locked_on);
+ }
+
+ if (m_spb) {
+ memset (locked_on, 0, sizeof (*locked_on) * priv->child_count);
+ memset (undid_pending, 0,
+ sizeof (*undid_pending) * priv->child_count);
+ ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name,
+ LLONG_MAX-1, 0, locked_on);
+ {
+ if (ret < AFR_SH_MIN_PARTICIPANTS)
+ goto mdata_unlock;
+ ret = __afr_selfheal_metadata_prepare (heal_frame, this,
+ inode, locked_on,
+ sources, sinks,
+ healed_sinks,
+ undid_pending,
+ locked_replies,
+ NULL);
+
+ }
+mdata_unlock:
+ afr_selfheal_uninodelk (heal_frame, this, inode, this->name,
+ LLONG_MAX-1, 0, locked_on);
+ }
+
+ return ret;
+
+}