diff options
-rw-r--r-- | tests/bugs/replicate/bug-1386188-sbrain-fav-child.t | 82 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 205 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-read-txn.c | 13 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 159 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 20 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 3 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 15 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 24 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 9 |
9 files changed, 469 insertions, 61 deletions
diff --git a/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t b/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t new file mode 100644 index 00000000000..d049d95ef9a --- /dev/null +++ b/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t @@ -0,0 +1,82 @@ +#!/bin/bash +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume set $V0 data-self-heal off +TEST $CLI volume set $V0 entry-self-heal off +TEST $CLI volume set $V0 metadata-self-heal off +TEST $CLI volume start $V0 + +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; +TEST touch $M0/data.txt +TEST touch $M0/mdata.txt + +#Create data and metadata split-brain +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST dd if=/dev/urandom of=$M0/data.txt bs=1024 count=1024 +TEST setfattr -n user.value -v value1 $M0/mdata.txt +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0 +TEST kill_brick $V0 $H0 $B0/${V0}1 +TEST dd if=/dev/urandom of=$M0/data.txt bs=1024 count=1024 +TEST setfattr -n user.value -v value2 $M0/mdata.txt + +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 + +## Check that the file still in split-brain, + ## I/O fails + cat $M0/data.txt > /dev/null + EXPECT "1" echo $? + ## pending xattrs blame each other. + brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/data.txt) + brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt) + TEST [ $brick0_pending -ne "000000000000000000000000" ] + TEST [ $brick1_pending -ne "000000000000000000000000" ] + + ## I/O fails + getfattr -n user.value $M0/mdata.txt + EXPECT "1" echo $? + brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/mdata.txt) + brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/mdata.txt) + TEST [ $brick0_pending -ne "000000000000000000000000" ] + TEST [ $brick1_pending -ne "000000000000000000000000" ] + +## Let us use mtime as fav-child policy. So brick0 will be source. + # Set dirty (data part) on the sink brick to check if it is reset later along with the pending xattr. + TEST setfattr -n trusted.afr.dirty -v 0x000000010000000000000000 $B0/${V0}1/data.txt + # Set dirty (metadata part) on the sink brick to check if it is reset later along with the pending xattr. + TEST setfattr -n trusted.afr.dirty -v 0x000000000000000100000000 $B0/${V0}1/mdata.txt + + TEST $CLI volume set $V0 favorite-child-policy mtime + + # Reading the file should be allowed and sink brick xattrs must be reset. + cat $M0/data.txt > /dev/null + EXPECT "0" echo $? + TEST brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt) + TEST brick1_dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}1/data.txt) + TEST [ $brick1_dirty -eq "000000000000000000000000" ] + TEST [ $brick1_pending -eq "000000000000000000000000" ] + + # Accessing the file should be allowed and sink brick xattrs must be reset. + EXPECT "value2" echo $(getfattr --only-values -n user.value $M0/mdata.txt) + TEST brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt) + TEST brick1_dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}1/data.txt) + TEST [ $brick1_dirty -eq "000000000000000000000000" ] + TEST [ $brick1_pending -eq "000000000000000000000000" ] + +#Enable shd and heal the file. +TEST $CLI volume set $V0 cluster.self-heal-daemon on +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 +TEST $CLI volume heal $V0 +EXPECT 0 get_pending_heal_count $V0 +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 7173e8b032e..024f0f5f589 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -53,6 +53,13 @@ afr_quorum_errno (afr_private_t *priv) return EROFS; } +int +afr_fav_child_reset_sink_xattrs (void *opaque); + +int +afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *frame, + void *opaque); + gf_boolean_t afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv, int32_t *op_errno) @@ -1011,6 +1018,82 @@ afr_selfheal_enabled (xlator_t *this) return data || priv->metadata_self_heal || priv->entry_self_heal; } + +int +afr_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) +{ + + call_frame_t *heal_frame = NULL; + afr_local_t *heal_local = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + inode_t *inode = NULL; + int event_generation = 0; + int read_subvol = -1; + int op_errno = ENOMEM; + int ret = 0; + + local = frame->local; + inode = local->inode; + priv = this->private; + + if (err) + goto refresh_done; + + if (local->op == GF_FOP_LOOKUP) + goto refresh_done; + + ret = afr_inode_get_readable (frame, inode, this, local->readable, + &event_generation, + local->transaction.type); + + if (ret == -EIO || !event_generation) { + /* No readable subvolume even after refresh ==> splitbrain.*/ + if (!priv->fav_child_policy) { + err = -EIO; + goto refresh_done; + } + read_subvol = afr_sh_get_fav_by_policy (this, local->replies, + inode, NULL); + if (read_subvol == -1) { + err = -EIO; + goto refresh_done; + } + + heal_frame = copy_frame (frame); + if (!heal_frame) { + err = -EIO; + goto refresh_done; + } + heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD; + heal_local = AFR_FRAME_INIT (heal_frame, op_errno); + if (!heal_local) { + err = -EIO; + AFR_STACK_DESTROY (heal_frame); + goto refresh_done; + } + heal_local->xdata_req = dict_new(); + if (!heal_local->xdata_req) { + err = -EIO; + AFR_STACK_DESTROY (heal_frame); + goto refresh_done; + } + heal_local->heal_frame = frame; + ret = synctask_new (this->ctx->env, + afr_fav_child_reset_sink_xattrs, + afr_fav_child_reset_sink_xattrs_cbk, + heal_frame, + heal_frame); + return 0; + } + +refresh_done: + afr_local_replies_wipe (local, this->private); + local->refreshfn (frame, this, err); + + return 0; +} + int afr_inode_refresh_done (call_frame_t *frame, xlator_t *this) { @@ -1029,8 +1112,6 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this) err = afr_inode_refresh_err (frame, this); - afr_local_replies_wipe (local, this->private); - if (ret && afr_selfheal_enabled (this) && start_heal) { heal_frame = copy_frame (frame); if (!heal_frame) @@ -1047,7 +1128,7 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this) } refresh_done: - local->refreshfn (frame, this, err); + afr_txn_refresh_done (frame, this, err); return 0; } @@ -5110,6 +5191,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this, unsigned char *sources = NULL; unsigned char *sinks = NULL; unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; struct afr_reply *locked_replies = NULL; afr_private_t *priv = this->private; @@ -5118,6 +5200,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this, sources = alloca0 (priv->child_count); sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); @@ -5134,6 +5217,7 @@ afr_selfheal_locked_metadata_inspect (call_frame_t *frame, xlator_t *this, ret = __afr_selfheal_metadata_prepare (frame, this, inode, locked_on, sources, sinks, healed_sinks, + undid_pending, locked_replies, pending); *msh = afr_decide_heal_info (priv, sources, ret); @@ -5157,6 +5241,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this, unsigned char *sources = NULL; unsigned char *sinks = NULL; unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; afr_private_t *priv = NULL; fd_t *fd = NULL; struct afr_reply *locked_replies = NULL; @@ -5170,6 +5255,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this, sources = alloca0 (priv->child_count); sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); /* Heal-info does an open() on the file being examined so that the * current eager-lock holding client, if present, at some point sees @@ -5209,6 +5295,7 @@ afr_selfheal_locked_data_inspect (call_frame_t *frame, xlator_t *this, ret = __afr_selfheal_data_prepare (frame, this, inode, data_lock, sources, sinks, healed_sinks, + undid_pending, locked_replies, pflag); *dsh = afr_decide_heal_info (priv, sources, ret); @@ -5796,3 +5883,115 @@ afr_compound_cleanup (compound_args_t *args, dict_t *xdata, if (newloc_xdata) dict_unref (newloc_xdata); } + +int +afr_fav_child_reset_sink_xattrs_cbk (int ret, call_frame_t *heal_frame, + void *opaque) +{ + + call_frame_t *txn_frame = NULL; + afr_local_t *local = NULL; + afr_local_t *heal_local = NULL; + xlator_t *this = NULL; + + heal_local = heal_frame->local; + txn_frame = heal_local->heal_frame; + local = txn_frame->local; + this = txn_frame->this; + + /* Refresh the inode agan and proceed with the transaction.*/ + afr_inode_refresh (txn_frame, this, local->inode, NULL, + local->refreshfn); + + if (heal_frame) + AFR_STACK_DESTROY (heal_frame); + + return 0; +} + +int +afr_fav_child_reset_sink_xattrs (void *opaque) +{ + call_frame_t *heal_frame = NULL; + call_frame_t *txn_frame = NULL; + xlator_t *this = NULL; + gf_boolean_t d_spb = _gf_false; + gf_boolean_t m_spb = _gf_false; + afr_local_t *heal_local = NULL; + afr_local_t *txn_local = NULL; + afr_private_t *priv = NULL; + inode_t *inode = NULL; + unsigned char *locked_on = NULL; + unsigned char *sources = NULL; + unsigned char *sinks = NULL; + unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; + struct afr_reply *locked_replies = NULL; + int ret = 0; + + heal_frame = (call_frame_t *) opaque; + heal_local = heal_frame->local; + txn_frame = heal_local->heal_frame; + txn_local = txn_frame->local; + this = txn_frame->this; + inode = txn_local->inode; + priv = this->private; + locked_on = alloca0 (priv->child_count); + sources = alloca0 (priv->child_count); + sinks = alloca0 (priv->child_count); + healed_sinks = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); + locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); + + ret = _afr_is_split_brain (txn_frame, this, txn_local->replies, + AFR_DATA_TRANSACTION, &d_spb); + + ret = _afr_is_split_brain (txn_frame, this, txn_local->replies, + AFR_METADATA_TRANSACTION, &m_spb); + + /* Take appropriate locks and reset sink xattrs. */ + if (d_spb) { + ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name, + 0, 0, locked_on); + { + if (ret < AFR_SH_MIN_PARTICIPANTS) + goto data_unlock; + ret = __afr_selfheal_data_prepare (heal_frame, this, + inode, locked_on, + sources, sinks, + healed_sinks, + undid_pending, + locked_replies, + NULL); + } +data_unlock: + afr_selfheal_uninodelk (heal_frame, this, inode, this->name, + 0, 0, locked_on); + } + + if (m_spb) { + memset (locked_on, 0, sizeof (*locked_on) * priv->child_count); + memset (undid_pending, 0, + sizeof (*undid_pending) * priv->child_count); + ret = afr_selfheal_inodelk (heal_frame, this, inode, this->name, + LLONG_MAX-1, 0, locked_on); + { + if (ret < AFR_SH_MIN_PARTICIPANTS) + goto mdata_unlock; + ret = __afr_selfheal_metadata_prepare (heal_frame, this, + inode, locked_on, + sources, sinks, + healed_sinks, + undid_pending, + locked_replies, + NULL); + + } +mdata_unlock: + afr_selfheal_uninodelk (heal_frame, this, inode, this->name, + LLONG_MAX-1, 0, locked_on); + } + + return ret; + +} diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index cb81af42510..26b0f1c2a11 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -64,7 +64,6 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) { afr_local_t *local = NULL; int read_subvol = 0; - int event_generation = 0; inode_t *inode = NULL; int ret = -1; int spb_choice = -1; @@ -76,18 +75,12 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) local->op_errno = -err; local->op_ret = -1; read_subvol = -1; + gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN, + "Failing %s on gfid %s: split-brain observed.", + gf_fop_list[local->op], uuid_utoa (inode->gfid)); goto readfn; } - ret = afr_inode_get_readable (frame, inode, this, local->readable, - &event_generation, - local->transaction.type); - - if (ret == -EIO || !event_generation) - /* Even after refresh, we don't have a good - read subvolume. Time to bail */ - AFR_READ_TXN_SET_ERROR_AND_GOTO (-1, EIO, -1, readfn); - read_subvol = afr_read_subvol_select_by_policy (inode, this, local->readable, NULL); if (read_subvol == -1) diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index b6720ccfa5c..c3b62e3781a 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -145,8 +145,10 @@ err: int afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources, unsigned char *sinks, - unsigned char *healed_sinks, afr_transaction_type type, - struct afr_reply *replies, unsigned char *locked_on) + unsigned char *healed_sinks, + unsigned char *undid_pending, + afr_transaction_type type, struct afr_reply *replies, + unsigned char *locked_on) { afr_private_t *priv = NULL; afr_local_t *local = NULL; @@ -214,6 +216,10 @@ afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode, and inspected on. */ continue; + if (undid_pending[i]) + /* We already unset the pending xattrs in + * _afr_fav_child_reset_sink_xattrs(). */ + continue; xattr = afr_selfheal_output_xattr (this, local->need_full_crawl, type, output_dirty, @@ -735,6 +741,42 @@ afr_sh_fav_by_size (xlator_t *this, struct afr_reply *replies, inode_t *inode) return fav_child; } +int +afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies, + inode_t *inode, char **policy_str) +{ + afr_private_t *priv = NULL; + int fav_child = -1; + + priv = this->private; + switch (priv->fav_child_policy) { + case AFR_FAV_CHILD_BY_SIZE: + fav_child = afr_sh_fav_by_size (this, replies, inode); + if (policy_str && fav_child >= 0) + *policy_str = "SIZE"; + break; + case AFR_FAV_CHILD_BY_CTIME: + fav_child = afr_sh_fav_by_ctime (this, replies, inode); + if (policy_str && fav_child >= 0) + *policy_str = "CTIME"; + break; + case AFR_FAV_CHILD_BY_MTIME: + fav_child = afr_sh_fav_by_mtime (this, replies, inode); + if (policy_str && fav_child >= 0) + *policy_str = "MTIME"; + break; + case AFR_FAV_CHILD_BY_MAJORITY: + fav_child = afr_sh_fav_by_majority (this, replies, inode); + if (policy_str && fav_child >= 0) + *policy_str = "MAJORITY"; + break; + case AFR_FAV_CHILD_NONE: + default: + break; + } + + return fav_child; +} int afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame, @@ -756,24 +798,9 @@ afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame, time_t time; priv = this->private; - if (priv->fav_child_policy == AFR_FAV_CHILD_BY_MAJORITY) { - fav_child = afr_sh_fav_by_majority (this, replies, inode); - if (fav_child >= 0) - policy_str = "MAJORITY"; - } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_MTIME) { - fav_child = afr_sh_fav_by_mtime (this, replies, inode); - if (fav_child >= 0) - policy_str = "MTIME"; - } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_CTIME) { - fav_child = afr_sh_fav_by_ctime (this, replies, inode); - if (fav_child >= 0) - policy_str = "CTIME"; - } else if (priv->fav_child_policy == AFR_FAV_CHILD_BY_SIZE) { - fav_child = afr_sh_fav_by_size (this, replies, inode); - if (fav_child >= 0) - policy_str = "SIZE"; - } + fav_child = afr_sh_get_fav_by_policy (this, replies, inode, + &policy_str); if (fav_child > priv->child_count - 1) { gf_msg (this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY, "Invalid child (%d) " @@ -829,6 +856,7 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this, dict_t *xdata_req = NULL; int heal_op = -1; int ret = -1; + int source = -1; local = frame->local; priv = this->private; @@ -838,27 +866,96 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this, if (ret) goto autoheal; - ret = afr_mark_split_brain_source_sinks_by_heal_op (frame, this, + source = afr_mark_split_brain_source_sinks_by_heal_op (frame, this, sources, sinks, healed_sinks, locked_on, replies, type, heal_op); - return ret; + return source; autoheal: /* Automatically heal if fav_child_policy is set. */ if (priv->fav_child_policy != AFR_FAV_CHILD_NONE) { - ret = afr_mark_split_brain_source_sinks_by_policy (frame, this, - inode, - sources, - sinks, + source = afr_mark_split_brain_source_sinks_by_policy (frame, + this, + inode, + sources, + sinks, healed_sinks, - locked_on, - replies, - type); + locked_on, + replies, + type); + if (source != -1) { + ret = dict_set_int32 (xdata_req, "fav-child-policy", 1); + if (ret) + return -1; + } } - return ret; + return source; +} + +int +_afr_fav_child_reset_sink_xattrs (call_frame_t *frame, xlator_t *this, + inode_t *inode, int source, + unsigned char *healed_sinks, + unsigned char *undid_pending, + afr_transaction_type type, + unsigned char *locked_on, + struct afr_reply *replies) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int *input_dirty = NULL; + int **input_matrix = NULL; + int *output_dirty = NULL; + int **output_matrix = NULL; + dict_t *xattr = NULL; + dict_t *xdata = NULL; + int i = 0; + + priv = this->private; + local = frame->local; + + if (!dict_get (local->xdata_req, "fav-child-policy")) + return 0; + + xdata = dict_new(); + if (!xdata) + return -1; + + input_dirty = alloca0 (priv->child_count * sizeof (int)); + input_matrix = ALLOC_MATRIX (priv->child_count, int); + output_dirty = alloca0 (priv->child_count * sizeof (int)); + output_matrix = ALLOC_MATRIX (priv->child_count, int); + + afr_selfheal_extract_xattr (this, replies, type, input_dirty, + input_matrix); + + for (i = 0; i < priv->child_count; i++) { + if (i == source || !healed_sinks[i]) + continue; + output_dirty[i] = -input_dirty[i]; + output_matrix[i][source] = -input_matrix[i][source]; + } + + for (i = 0; i < priv->child_count; i++) { + if (!healed_sinks[i] || !locked_on[i]) + continue; + xattr = afr_selfheal_output_xattr (this, _gf_false, type, + output_dirty, output_matrix, + i, NULL); + + afr_selfheal_post_op (frame, this, inode, i, xattr, xdata); + + undid_pending[i] = 1; + dict_unref (xattr); + } + + if (xdata) + dict_unref (xdata); + + return 0; } gf_boolean_t @@ -1906,11 +2003,15 @@ afr_selfheal (xlator_t *this, uuid_t gfid) { int ret = -1; call_frame_t *frame = NULL; + afr_local_t *local = NULL; frame = afr_frame_create (this); if (!frame) return ret; + local = frame->local; + local->xdata_req = dict_new(); + ret = afr_selfheal_do (frame, this, gfid); if (frame) diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index fbbbd192323..d032284926c 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -580,6 +580,7 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this, unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on, + unsigned char *undid_pending, struct afr_reply *replies, uint64_t *witness) { @@ -603,6 +604,11 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this, "file=%s", this->name, uuid_utoa(inode->gfid)); return -EIO; } + + _afr_fav_child_reset_sink_xattrs (frame, this, inode, source, + healed_sinks, undid_pending, + AFR_DATA_TRANSACTION, + locked_on, replies); return source; } @@ -642,6 +648,7 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, + unsigned char *undid_pending, struct afr_reply *replies, gf_boolean_t *pflag) { int ret = -1; @@ -677,8 +684,8 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, source = __afr_selfheal_data_finalize_source (frame, this, inode, sources, sinks, healed_sinks, - locked_on, replies, - witness); + locked_on, undid_pending, + replies, witness); if (source < 0) return -EIO; @@ -696,6 +703,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, unsigned char *sinks = NULL; unsigned char *data_lock = NULL; unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; struct afr_reply *locked_replies = NULL; int source = -1; gf_boolean_t did_sh = _gf_true; @@ -707,6 +715,7 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); data_lock = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); @@ -726,9 +735,8 @@ __afr_selfheal_data (call_frame_t *frame, xlator_t *this, fd_t *fd, ret = __afr_selfheal_data_prepare (frame, this, fd->inode, data_lock, sources, sinks, - healed_sinks, - locked_replies, - NULL); + healed_sinks, undid_pending, + locked_replies, NULL); if (ret < 0) goto unlock; @@ -787,7 +795,7 @@ restore_time: } ret = afr_selfheal_undo_pending (frame, this, fd->inode, sources, sinks, healed_sinks, - AFR_DATA_TRANSACTION, + undid_pending, AFR_DATA_TRANSACTION, locked_replies, data_lock); skip_undo_pending: afr_selfheal_uninodelk (frame, this, fd->inode, this->name, 0, 0, diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index a0e361ab987..d8fe5422372 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -913,6 +913,7 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd, unsigned char *data_lock = NULL; unsigned char *postop_lock = NULL; unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; struct afr_reply *locked_replies = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; @@ -924,6 +925,7 @@ __afr_selfheal_entry (call_frame_t *frame, xlator_t *this, fd_t *fd, sources = alloca0 (priv->child_count); sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); data_lock = alloca0 (priv->child_count); postop_lock = alloca0 (priv->child_count); @@ -996,6 +998,7 @@ unlock: ret = afr_selfheal_undo_pending (frame, this, fd->inode, sources, sinks, healed_sinks, + undid_pending, AFR_ENTRY_TRANSACTION, locked_replies, postop_lock); } diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index 9dfe4a14e8c..5839ddc2e0f 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -203,6 +203,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, + unsigned char *undid_pending, unsigned char *locked_on, struct afr_reply *replies) { @@ -224,8 +225,14 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, healed_sinks, locked_on, replies, AFR_METADATA_TRANSACTION); - if (source >= 0) + if (source >= 0) { + _afr_fav_child_reset_sink_xattrs (frame, this, inode, + source, healed_sinks, + undid_pending, + AFR_METADATA_TRANSACTION, + locked_on, replies); return source; + } /* If this is a directory mtime/ctime only split brain use the most recent */ @@ -308,6 +315,7 @@ int __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, + unsigned char *undid_pending, struct afr_reply *replies, gf_boolean_t *pflag) { int ret = -1; @@ -362,6 +370,7 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, inode_t *i source = __afr_selfheal_metadata_finalize_source (frame, this, inode, sources, sinks, healed_sinks, + undid_pending, locked_on, replies); if (source < 0) @@ -379,6 +388,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode) unsigned char *sinks = NULL; unsigned char *data_lock = NULL; unsigned char *healed_sinks = NULL; + unsigned char *undid_pending = NULL; struct afr_reply *locked_replies = NULL; gf_boolean_t did_sh = _gf_true; int source = -1; @@ -388,6 +398,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode) sources = alloca0 (priv->child_count); sinks = alloca0 (priv->child_count); healed_sinks = alloca0 (priv->child_count); + undid_pending = alloca0 (priv->child_count); data_lock = alloca0 (priv->child_count); locked_replies = alloca0 (sizeof (*locked_replies) * priv->child_count); @@ -403,6 +414,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode) ret = __afr_selfheal_metadata_prepare (frame, this, inode, data_lock, sources, sinks, healed_sinks, + undid_pending, locked_replies, NULL); if (ret < 0) goto unlock; @@ -421,6 +433,7 @@ afr_selfheal_metadata (call_frame_t *frame, xlator_t *this, inode_t *inode) ret = afr_selfheal_undo_pending (frame, this, inode, sources, sinks, healed_sinks, + undid_pending, AFR_METADATA_TRANSACTION, locked_replies, data_lock); } diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index ec5337e60b2..80b7f3a125d 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -167,8 +167,10 @@ afr_selfheal_extract_xattr (xlator_t *this, struct afr_reply *replies, int afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources, unsigned char *sinks, - unsigned char *healed_sinks, afr_transaction_type type, - struct afr_reply *replies, unsigned char *locked_on); + unsigned char *healed_sinks, + unsigned char *undid_pending, + afr_transaction_type type, struct afr_reply *replies, + unsigned char *locked_on); int afr_selfheal_recreate_entry (xlator_t *this, int dst, int source, inode_t *dir, @@ -229,6 +231,19 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this, afr_transaction_type type); int +afr_sh_get_fav_by_policy (xlator_t *this, struct afr_reply *replies, + inode_t *inode, char **policy_str); + +int +_afr_fav_child_reset_sink_xattrs (call_frame_t *frame, xlator_t *this, + inode_t *inode, int source, + unsigned char *healed_sinks, + unsigned char *undid_pending, + afr_transaction_type type, + unsigned char *locked_on, + struct afr_reply *replies); + +int afr_get_child_index_from_name (xlator_t *this, char *name); gf_boolean_t @@ -239,8 +254,8 @@ __afr_selfheal_data_prepare (call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *locked_on, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, - struct afr_reply *replies, - gf_boolean_t *flag); + unsigned char *undid_pending, + struct afr_reply *replies, gf_boolean_t *flag); int __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, @@ -248,6 +263,7 @@ __afr_selfheal_metadata_prepare (call_frame_t *frame, xlator_t *this, unsigned char *sources, unsigned char *sinks, unsigned char *healed_sinks, + unsigned char *undid_pending, struct afr_reply *replies, gf_boolean_t *flag); int diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 4906921ca6a..e6878eb35ff 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -2527,19 +2527,12 @@ afr_write_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) if (err) { local->op_errno = -err; local->op_ret = -1; - goto fail; - } - ret = afr_inode_get_readable (frame, local->inode, this, - local->readable, NULL, - local->transaction.type); - if (ret < 0) { gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_SPLIT_BRAIN, "Failing %s on gfid %s: split-brain observed.", gf_fop_list[local->op], uuid_utoa (local->inode->gfid)); - local->op_ret = -1; - local->op_errno = -ret; goto fail; } + afr_transaction_start (frame, this); return 0; fail: |