summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr-self-heal-common.c
diff options
context:
space:
mode:
authorVenkatesh Somyajulu <vsomyaju@redhat.com>2013-06-28 19:11:47 +0530
committerVijay Bellur <vbellur@redhat.com>2013-07-02 10:25:17 -0700
commitef8092fab7b6fa5a16cc0e22b75945758519d5a6 (patch)
tree65aa4fa06801b135d61a5e57637dad882793e73e /xlators/cluster/afr/src/afr-self-heal-common.c
parent7062eda1575214819f5c7411748b06be95e08ffa (diff)
cluster/afr: Allow data/entry self heal for metadata split-brain
Problem: Currently whenever there is metadata split-brain, a variable sh->op_failed is set to 1 to denote that self heal got failed. But if we proceed for data self heal, even code-path of data self heal also relies on the sh->op_failed variable. So if will check for sh->op_failed variable and will eventually fails to do data self heal. So needed a mechanism to allow data self heal even if metadata is in split brain. Fix: Some data structure revamp is done in http://review.gluster.com/#/c/5106/ fix and this patch is based on the above fix. Now we can store which particular self-heal got failed i.e GFID_OR_MISSING_ENTRY_SELF_HEAL, METADATA, DATA, ENTRY. And we can do two types of self heal failure check. 1. Individual type check: We can check which among all four (Metadata, Data, Gfid or missing entry, entry self heal) got failed. 2. In afr_self_heal_completion_cbk, we need to make check based on the fact that if any specific self heal got failed treat the complete self heal as failure so that it will populate corresponding circular buffer of event history accordingly. Change-Id: Icb91e513bcc752386fc8a78812405cfabe5cac2d BUG: 977797 Signed-off-by: Venkatesh Somyajulu <vsomyaju@redhat.com> Reviewed-on: http://review.gluster.org/5253 Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/cluster/afr/src/afr-self-heal-common.c')
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c167
1 files changed, 93 insertions, 74 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 5f985374f29..f0915b01d2e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1018,7 +1018,7 @@ afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
local->loc.path);
}
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
sh->completion_cbk (frame, this);
} else {
gf_log (this->name, GF_LOG_TRACE,
@@ -1250,7 +1250,7 @@ out:
if (ret) {
gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, "
"reason: %s", local->loc.path, strerror (-ret));
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
afr_sh_missing_entries_finish (frame, this);
}
@@ -1265,7 +1265,7 @@ afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this,
local = frame->local;
sh = &local->self_heal;
if (op_ret < 0)
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_missing_entries_finish (frame, this);
return 0;
}
@@ -1386,7 +1386,7 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
}
return;
out:
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_missing_entries_finish (frame, this);
return;
@@ -1470,7 +1470,7 @@ afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
LOCK (&frame->lock);
{
afr_sh_set_error (sh, EIO);
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
UNLOCK (&frame->lock);
}
@@ -1552,7 +1552,7 @@ afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- if (is_self_heal_failed (sh)) {
+ if (is_self_heal_failed (sh, AFR_CHECK_SPECIFIC)) {
afr_sh_missing_entries_finish (frame, this);
} else {
if (afr_gfid_missing_count (this->name, sh->fresh_children,
@@ -1766,7 +1766,7 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
priv->child_count, ENOENT);
if (fresh_child_enoents == fresh_parent_count) {
afr_sh_set_error (sh, ENOENT);
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_purge_entry (frame, this);
} else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children,
priv->child_count, local->loc.path,
@@ -1787,7 +1787,7 @@ afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
return;
fail:
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_set_error (sh, op_errno);
afr_sh_missing_entries_finish (frame, this);
return;
@@ -1858,7 +1858,7 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
out:
afr_sh_set_error (sh, op_errno);
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_missing_entries_finish (frame, this);
return;
}
@@ -1962,7 +1962,7 @@ afr_sh_post_nb_entrylk_missing_entry_sh_cbk (call_frame_t *frame,
if (int_lock->lock_op_ret < 0) {
gf_log (this->name, GF_LOG_INFO,
"Non blocking entrylks failed.");
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
afr_sh_missing_entries_done (frame, this);
} else {
@@ -2047,8 +2047,9 @@ afr_self_heal_missing_entries (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- sh->afr_set_self_heal_status = afr_set_gfid_or_missing_entry_sh_status;
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
+ sh->sh_type_in_action = AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY;
+
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED);
afr_self_heal_parent_entrylk (frame, this,
afr_sh_post_nb_entrylk_missing_entry_sh_cbk);
@@ -2176,7 +2177,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
afr_self_heal_type_str_get (sh, sh_type_str,
sizeof(sh_type_str));
- if (is_self_heal_failed (sh) && !priv->shd.iamshd) {
+ if (is_self_heal_failed (sh, AFR_CHECK_ALL) && !priv->shd.iamshd) {
loglevel = GF_LOG_ERROR;
} else {
loglevel = GF_LOG_DEBUG;
@@ -2191,7 +2192,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
orig_frame_sh = &orig_frame_local->self_heal;
orig_frame_sh->actual_sh_started = _gf_true;
sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
- is_self_heal_failed (sh));
+ is_self_heal_failed (sh, AFR_CHECK_ALL));
}
if (sh->background) {
@@ -2305,6 +2306,8 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
sh->do_gfid_self_heal = _gf_false;
}
+ sh->sh_type_in_action = AFR_SELF_HEAL_INVALID;
+
FRAME_SU_DO (sh_frame, afr_local_t);
if (sh->do_missing_entry_self_heal || sh->do_gfid_self_heal) {
afr_self_heal_missing_entries (sh_frame, this);
@@ -2514,7 +2517,7 @@ out:
GF_FREE (erase_xattr);
if (ret < 0) {
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
finish (frame, this);
}
@@ -2522,59 +2525,39 @@ out:
}
void
-afr_set_data_sh_status (afr_self_heal_t *sh, afr_self_heal_status status)
-{
- xlator_t *this = NULL;
-
- this = THIS;
-
- if (sh)
- sh->afr_all_sh_status.data_self_heal = status;
- else
- gf_log_callingfn (this->name, GF_LOG_ERROR,
- "Null self heal struct");
-}
-
-void
-afr_set_metadata_sh_status (afr_self_heal_t *sh, afr_self_heal_status status)
-{
- xlator_t *this = NULL;
-
- this = THIS;
-
- if (sh)
- sh->afr_all_sh_status.metadata_self_heal = status;
- else
- gf_log_callingfn (this->name, GF_LOG_ERROR,
- "Null self heal struct");
-}
-
-void
-afr_set_entry_sh_status (afr_self_heal_t *sh, afr_self_heal_status status)
+afr_set_self_heal_status(afr_self_heal_t *sh, afr_self_heal_status status)
{
- xlator_t *this = NULL;
-
+ xlator_t *this = NULL;
+ afr_sh_status_for_all_type *sh_status = &(sh->afr_all_sh_status);
+ afr_self_heal_type sh_type_in_action = sh->sh_type_in_action;
this = THIS;
- if (sh)
- sh->afr_all_sh_status.entry_self_heal = status;
- else
- gf_log_callingfn (this->name, GF_LOG_ERROR,
- "Null self heal struct");
-}
-void
-afr_set_gfid_or_missing_entry_sh_status (afr_self_heal_t *sh,
- afr_self_heal_status status)
-{
- xlator_t *this = NULL;
-
- this = THIS;
+ if (!sh) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal"
+ "Structure");
+ goto out;
+ }
- if (sh)
- sh->afr_all_sh_status.gfid_or_missing_entry_self_heal = status;
- else
- gf_log_callingfn (this->name, GF_LOG_ERROR,
- "Null self heal struct");
+ switch (sh_type_in_action) {
+ case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
+ sh_status->gfid_or_missing_entry_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_METADATA:
+ sh_status->metadata_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_DATA:
+ sh_status->data_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_ENTRY:
+ sh_status->entry_self_heal = status;
+ break;
+ case AFR_SELF_HEAL_INVALID:
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Invalid"
+ "self heal type in action");
+ break;
+ }
+out:
+ return;
}
void
@@ -2585,22 +2568,58 @@ afr_set_local_for_unhealable (afr_local_t *local)
sh = &local->self_heal;
local->unhealable = 1;
- if (sh->afr_set_self_heal_status)
- sh->afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
+ afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED);
}
int
-is_self_heal_failed (afr_self_heal_t *sh)
+is_self_heal_failed (afr_self_heal_t *sh, afr_sh_fail_check_type type)
{
- afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status;
+ afr_sh_status_for_all_type sh_status = sh->afr_all_sh_status;
+ afr_self_heal_type sh_type_in_action = AFR_SELF_HEAL_INVALID;
+ afr_self_heal_status status = AFR_SELF_HEAL_FAILED;
+ xlator_t *this = NULL;
+ int sh_failed = 0;
+
+ this = THIS;
- int sh_failed = 0;
- if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED)
- || (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED)
- || (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED)
- || (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED))
- sh_failed = 1;
+ if (!sh) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "Null self heal "
+ "structure");
+ sh_failed = 1;
+ goto out;
+ }
+ if (type == AFR_CHECK_ALL) {
+ if ((sh_status.gfid_or_missing_entry_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.metadata_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.data_self_heal == AFR_SELF_HEAL_FAILED)
+ || (sh_status.entry_self_heal == AFR_SELF_HEAL_FAILED))
+ sh_failed = 1;
+ } else if (type == AFR_CHECK_SPECIFIC) {
+ sh_type_in_action = sh->sh_type_in_action;
+ switch (sh_type_in_action) {
+ case AFR_SELF_HEAL_GFID_OR_MISSING_ENTRY:
+ status = sh_status.gfid_or_missing_entry_self_heal;
+ break;
+ case AFR_SELF_HEAL_METADATA:
+ status = sh_status.metadata_self_heal;
+ break;
+ case AFR_SELF_HEAL_ENTRY:
+ status = sh_status.entry_self_heal;
+ break;
+ case AFR_SELF_HEAL_DATA:
+ status = sh_status.data_self_heal;
+ break;
+ case AFR_SELF_HEAL_INVALID:
+ status = AFR_SELF_HEAL_NOT_ATTEMPTED;
+ break;
+ }
+ if (status == AFR_SELF_HEAL_FAILED)
+ sh_failed = 1;
+
+ }
+
+out:
return sh_failed;
}