summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pranithk@gluster.com>2012-07-18 10:28:18 +0530
committerVijay Bellur <vbellur@redhat.com>2012-08-12 21:51:39 -0700
commit12aa31278c651f36c2ea1c0698ba789aa3f9262c (patch)
tree21642a29f8456553dea1c0ca26feda431c850ff4
parentaa4e7d04ff20e38182a62576595951d59524fff0 (diff)
cluster/afr: Avoid setting split-brain outside inode locks
RCA: The bug is observed because the decision to mark a file in split-brain is taken outside appropriate locks. Lookup gathers xattrs outside any lock. The xattrs being in split-brain in lookup should only be taken as a hint. Appropriate inodelks should be taken before confirming a split-brain. Self-heal confirms this at the moment. Fix: Self-heals are launched to inspect xattrs when the data/metadata self-heal options are turned on. Decision to set/reset split-brain flag is taken inside appropriate locks. Known Issue After fix: If data/metadata self-heal is turned off, inspecting of xattrs could not be performed so split-brain behavior does not work correctly if the self-heal options are turned off. This bug is handled only in upstream. Change-Id: I59a43d5ce7bf9ca35bff54a51bf4cfa55d717a9e BUG: 833727 Signed-off-by: Pranith Kumar K <pranithk@gluster.com> Reviewed-on: http://review.gluster.com/3691 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--xlators/cluster/afr/src/afr-common.c3
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c36
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c33
-rw-r--r--xlators/cluster/afr/src/afr.h6
5 files changed, 46 insertions, 34 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 7973dfdde04..f416ed921ce 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1220,7 +1220,7 @@ afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this,
if ((local->success_count > 0) && split_brain &&
IA_ISREG (local->cont.lookup.inode->ia_type)) {
local->self_heal.do_data_self_heal = _gf_true;
- local->self_heal.do_gfid_self_heal = _gf_true;
+ local->self_heal.do_metadata_self_heal = _gf_true;
gf_log (this->name, GF_LOG_WARNING,
"split brain detected during lookup of %s.",
local->loc.path);
@@ -1504,6 +1504,7 @@ afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
sh = &local->self_heal;
split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
+ split_brain = split_brain || local->cont.lookup.possible_spb;
afr_detect_self_heal_by_lookup_status (local, this, split_brain);
if (afr_lookup_gfid_missing_count (local, this))
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 9e493092e2a..be8809a5de7 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -2144,7 +2144,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
local = bgsh_frame->local;
sh = &local->self_heal;
- if (local->govinda_gOvinda)
+ if (local->govinda_gOvinda || sh->mdata_spb || sh->data_spb)
split_brain = _gf_true;
afr_set_split_brain (this, sh->inode, split_brain);
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 90a2af18b5a..72bf8d4b917 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -740,16 +740,6 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
sh->sources, sh->success_children,
AFR_DATA_TRANSACTION, NULL, _gf_true);
- if ((nsources == 0) && !sh->sync_done) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No self-heal needed for %s",
- local->loc.path);
-
- local->govinda_gOvinda = 0;
- afr_sh_data_finish (frame, this);
- return 0;
- }
-
if ((nsources == -1)
&& (priv->favorite_child != -1)
&& (sh->child_errno[priv->favorite_child] == 0)) {
@@ -772,12 +762,12 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
"split-brain). Please delete the file from all but "
"the preferred subvolume.", local->loc.path);
- local->govinda_gOvinda = 1;
+ sh->data_spb = _gf_true;
afr_sh_data_fail (frame, this);
return 0;
}
- local->govinda_gOvinda = 0;
+ sh->data_spb = _gf_false;
ret = afr_sh_inode_set_read_ctx (sh, this);
if (ret) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -790,6 +780,15 @@ afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
if (sh->sync_done) {
afr_sh_data_setattr (frame, this);
} else {
+ if (nsources == 0) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "No self-heal needed for %s",
+ local->loc.path);
+
+ afr_sh_data_finish (frame, this);
+ return 0;
+ }
+
afr_sh_data_fix (frame, this);
}
return 0;
@@ -831,9 +830,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
local->loc.path);
switch (txn_type) {
case AFR_DATA_TRANSACTION:
- afr_set_split_brain (this,
- local->cont.lookup.inode,
- _gf_true);
+ local->cont.lookup.possible_spb = _gf_true;
nsources = 1;
sources[success_children[0]] = 1;
break;
@@ -1393,8 +1390,13 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this)
local = frame->local;
sh = &local->self_heal;
- local->govinda_gOvinda = afr_is_split_brain (this, sh->inode);
-
+ /* Self-heal completion cbk changes inode split-brain status based on
+ * govinda_gOvinda, mdata_spb, data_spb values. Initialize data_spb
+ * with current split-brain status. If for some reason self-heal
+ * fails(locking phase etc), it makes sure we retain the split-brain
+ * status before this self-heal started.
+ */
+ sh->data_spb = afr_is_split_brain (this, sh->inode);
if (sh->do_data_self_heal &&
afr_data_self_heal_enabled (priv->data_self_heal)) {
afr_sh_data_open (frame, this);
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 79718315cbf..6c3989e8490 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -50,7 +50,7 @@ afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
afr_sh_reset (frame, this);
- if (local->govinda_gOvinda) {
+ if (sh->mdata_spb) {
gf_log (this->name, GF_LOG_INFO,
"split-brain detected, aborting selfheal of %s",
local->loc.path);
@@ -450,15 +450,6 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
sh->pending_matrix, sh->sources,
sh->success_children,
AFR_METADATA_TRANSACTION, NULL, _gf_false);
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- goto out;
- }
-
if ((nsources == -1)
&& (priv->favorite_child != -1)
&& (sh->child_errno[priv->favorite_child] == 0)) {
@@ -480,7 +471,16 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
"(possible split-brain). Please fix the file on "
"all backend volumes", local->loc.path);
- local->govinda_gOvinda = 1;
+ sh->mdata_spb = _gf_true;
+
+ afr_sh_metadata_finish (frame, this);
+ goto out;
+ }
+ sh->mdata_spb = _gf_false;
+ if (nsources == 0) {
+ gf_log (this->name, GF_LOG_TRACE,
+ "No self-heal needed for %s",
+ local->loc.path);
afr_sh_metadata_finish (frame, this);
goto out;
@@ -584,10 +584,19 @@ int
afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
{
afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
afr_private_t *priv = this->private;
-
local = frame->local;
+ sh = &local->self_heal;
+
+ /* Self-heal completion cbk changes inode split-brain status based on
+ * govinda_gOvinda, mdata_spb, data_spb values. Initialize mdata_spb
+ * with current split-brain status. If for some reason self-heal
+ * fails(locking phase etc), it makes sure we retain the split-brain
+ * status before this self-heal started.
+ */
+ sh->mdata_spb = afr_is_split_brain (this, sh->inode);
if (local->self_heal.do_metadata_self_heal && priv->metadata_self_heal) {
afr_sh_metadata_lock (frame, this);
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 795db603528..fe0e2b2e6ed 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -261,6 +261,8 @@ typedef struct {
int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);
int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this);
void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this);
+ gf_boolean_t mdata_spb;
+ gf_boolean_t data_spb;
call_frame_t *sh_frame;
} afr_self_heal_t;
@@ -368,10 +370,7 @@ typedef struct _afr_local {
unsigned int call_count;
unsigned int success_count;
unsigned int enoent_count;
-
-
unsigned int govinda_gOvinda;
-
unsigned int read_child_index;
unsigned char read_child_returned;
unsigned int first_up_child;
@@ -439,6 +438,7 @@ typedef struct _afr_local {
int32_t *sources;
int32_t *success_children;
gf_boolean_t fresh_lookup;
+ gf_boolean_t possible_spb;
} lookup;
struct {