summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pranithk@gluster.com>2012-01-20 17:30:54 +0530
committerAnand Avati <avati@redhat.com>2012-03-12 19:37:26 -0700
commit154a59a6e0988194c6a6e17527a30cca47a697f9 (patch)
tree6b7e2bcd6acead9d64430847ef848f35daff0861
parentfafd5c17c0d126e10b401199cd4f01f7786deef8 (diff)
cluster/afr: Handle split-brain/all-fool xattrs for directory
In case of split-brain/all-fool xattrs perform conservative merge. Don't treat ignorant subvol as fool. Change-Id: I3044d388d816d79268fec170d202ef23e7d5bf1c BUG: 765528 Signed-off-by: Pranith Kumar K <pranithk@gluster.com> Reviewed-on: http://review.gluster.com/2674 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c223
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h17
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c40
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c48
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c2
-rw-r--r--xlators/cluster/afr/src/afr.h4
6 files changed, 158 insertions, 176 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 377c72a88f2..db39512d227 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -157,6 +157,7 @@ afr_mark_ignorant_subvols_as_pending (int32_t **pending_matrix,
int
afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
+ unsigned char *ignorant_subvols,
dict_t *xattr[], afr_transaction_type type,
size_t child_count)
{
@@ -167,12 +168,6 @@ afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
int i = 0;
int j = 0;
int k = 0;
- unsigned char *ignorant_subvols = NULL;
-
- ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols), child_count,
- gf_afr_mt_char);
- if (NULL == ignorant_subvols)
- goto out;
afr_init_pending_matrix (pending_matrix, child_count);
@@ -190,7 +185,8 @@ afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
* subvolume.
*/
- ignorant_subvols[i] = 1;
+ if (ignorant_subvols)
+ ignorant_subvols[i] = 1;
continue;
}
@@ -201,19 +197,14 @@ afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
}
}
- afr_mark_ignorant_subvols_as_pending (pending_matrix,
- ignorant_subvols,
- child_count);
- GF_FREE (ignorant_subvols);
-out:
return ret;
}
typedef enum {
+ AFR_NODE_INVALID,
AFR_NODE_INNOCENT,
AFR_NODE_FOOL,
AFR_NODE_WISE,
- AFR_NODE_INVALID = -1,
} afr_node_type;
typedef struct {
@@ -467,23 +458,18 @@ out:
int
afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs,
- int32_t *valid_children, int child_count,
- uint32_t uid)
+ int32_t *success_children,
+ unsigned int child_count, uint32_t uid)
{
int i = 0;
int nsources = 0;
int child = 0;
- GF_ASSERT (bufs);
- GF_ASSERT (valid_children);
- GF_ASSERT (sources);
- GF_ASSERT (child_count > 0);
-
for (i = 0; i < child_count; i++) {
- if (-1 == valid_children[i])
- continue;
+ if (-1 == success_children[i])
+ break;
- child = valid_children[i];
+ child = success_children[i];
if (uid == bufs[child].ia_uid) {
sources[child] = 1;
nsources++;
@@ -493,21 +479,17 @@ afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs,
}
int
-afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *valid_children,
- int child_count)
+afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *success_children,
+ unsigned int child_count)
{
int i = 0;
int smallest = -1;
int child = 0;
- GF_ASSERT (bufs);
- GF_ASSERT (valid_children);
- GF_ASSERT (child_count > 0);
-
for (i = 0; i < child_count; i++) {
- if (-1 == valid_children[i])
- continue;
- child = valid_children[i];
+ if (-1 == success_children[i])
+ break;
+ child = success_children[i];
if ((smallest == -1) ||
(bufs[child].ia_uid < bufs[smallest].ia_uid)) {
smallest = child;
@@ -517,20 +499,20 @@ afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *valid_children,
}
static int
-afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *valid_children,
+afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *success_children,
int child_count, int32_t *sources)
{
int nsources = 0;
int smallest = 0;
- smallest = afr_get_child_with_lowest_uid (bufs, valid_children,
+ smallest = afr_get_child_with_lowest_uid (bufs, success_children,
child_count);
if (smallest < 0) {
nsources = -1;
goto out;
}
nsources = afr_mark_child_as_source_by_uid (sources, bufs,
- valid_children, child_count,
+ success_children, child_count,
bufs[smallest].ia_uid);
out:
return nsources;
@@ -560,7 +542,7 @@ afr_get_character_str (afr_node_type type)
afr_node_type
afr_find_child_character_type (int32_t *pending_row, int32_t child,
- int32_t child_count, const char *xlator_name)
+ unsigned int child_count)
{
afr_node_type type = AFR_NODE_INVALID;
@@ -574,11 +556,6 @@ afr_find_child_character_type (int32_t *pending_row, int32_t child,
type = AFR_NODE_FOOL;
else if (afr_sh_is_wise (pending_row, child, child_count))
type = AFR_NODE_WISE;
- else
- GF_ASSERT (0);
-
- gf_log (xlator_name, GF_LOG_DEBUG, "child %d character %s",
- child, afr_get_character_str (type));
return type;
}
@@ -586,43 +563,76 @@ int
afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
int32_t **pending_matrix, int32_t *sources,
int32_t *success_children, afr_transaction_type type,
- afr_source_flags_t *flags)
+ int32_t *subvol_status, gf_boolean_t ignore_ignorant)
{
afr_private_t *priv = NULL;
afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
int nsources = -1;
+ unsigned char *ignorant_subvols = NULL;
+ unsigned int child_count = 0;
priv = this->private;
+ child_count = priv->child_count;
if (afr_get_children_count (success_children, priv->child_count) == 0)
goto out;
+ if (!ignore_ignorant) {
+ ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols),
+ child_count, gf_afr_mt_char);
+ if (NULL == ignorant_subvols)
+ goto out;
+ }
+
afr_build_pending_matrix (priv->pending_key, pending_matrix,
- xattr, type, priv->child_count);
+ ignorant_subvols, xattr, type,
+ priv->child_count);
+ if (!ignore_ignorant)
+ afr_mark_ignorant_subvols_as_pending (pending_matrix,
+ ignorant_subvols,
+ priv->child_count);
sh_type = afr_self_heal_type_for_transaction (type);
if (AFR_SELF_HEAL_INVALID == sh_type)
goto out;
afr_sh_print_pending_matrix (pending_matrix, this);
- nsources = afr_mark_sources (sources, pending_matrix, bufs,
- priv->child_count, sh_type,
- success_children, this->name, flags);
+ nsources = afr_mark_sources (this, sources, pending_matrix, bufs,
+ sh_type, success_children, subvol_status);
out:
+ GF_FREE (ignorant_subvols);
return nsources;
}
void
-afr_mark_valid_children_sources (int32_t *sources, int32_t *valid_children,
- unsigned int child_count)
+afr_find_character_types (afr_node_character *characters,
+ int32_t **pending_matrix, int32_t *success_children,
+ unsigned int child_count)
+{
+ afr_node_type type = AFR_NODE_INVALID;
+ int child = 0;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ child = success_children[i];
+ if (child == -1)
+ break;
+ type = afr_find_child_character_type (pending_matrix[child],
+ child, child_count);
+ characters[child].type = type;
+ }
+}
+
+void
+afr_mark_success_children_sources (int32_t *sources, int32_t *success_children,
+ unsigned int child_count)
{
int i = 0;
- memset (sources, 0, sizeof (*sources) * child_count);
for (i = 0; i < child_count; i++) {
- if (valid_children[i] == -1)
+ if (success_children[i] == -1)
break;
- sources[valid_children[i]] = 1;
+ sources[success_children[i]] = 1;
}
}
@@ -646,23 +656,23 @@ afr_mark_valid_children_sources (int32_t *sources, int32_t *valid_children,
* a split-brain. If one wise node refers to the other but the other doesn't
* refer back, the referrer is a source.
*
- * All fools are sinks, unless there are no 'wise' nodes. if 'allfools' is NULL,
- * biggest fool(s) is/are marked as source.
+ * All fools are sinks, unless there are no 'wise' nodes. In that case,
+ * one of the fools is made a source.
*/
int
-afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
- int32_t child_count, afr_self_heal_type type,
- int32_t *valid_children, const char *xlator_name,
- afr_source_flags_t *flags)
+afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
+ struct iatt *bufs, afr_self_heal_type type,
+ int32_t *success_children, int32_t *subvol_status)
{
/* stores the 'characters' (innocent, fool, wise) of the nodes */
-
afr_node_character *characters = NULL;
- int i = 0;
- int nsources = -1;
- xlator_t *this = NULL;
+ int nsources = -1;
+ unsigned int child_count = 0;
+ afr_private_t *priv = NULL;
+ priv = this->private;
+ child_count = priv->child_count;
characters = GF_CALLOC (sizeof (afr_node_character),
child_count, gf_afr_mt_afr_node_character);
if (!characters)
@@ -671,26 +681,14 @@ afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
this = THIS;
/* start clean */
- for (i = 0; i < child_count; i++) {
- sources[i] = 0;
- }
-
+ memset (sources, 0, sizeof (*sources) * child_count);
nsources = 0;
- for (i = 0; i < child_count; i++) {
- characters[i].type =
- afr_find_child_character_type (pending_matrix[i], i,
- child_count,
- xlator_name);
- if (AFR_NODE_INVALID == characters[i].type)
- gf_log (xlator_name, GF_LOG_WARNING,
- "child %d had invalid xattrs", i);
- }
-
- if ((type == AFR_SELF_HEAL_METADATA)
- && afr_sh_all_nodes_innocent (characters, child_count)) {
-
- nsources = afr_sh_mark_lowest_uid_as_source (bufs,
- valid_children,
+ afr_find_character_types (characters, pending_matrix, success_children,
+ child_count);
+ if (afr_sh_all_nodes_innocent (characters, child_count)) {
+ if (type == AFR_SELF_HEAL_METADATA)
+ nsources = afr_sh_mark_lowest_uid_as_source (bufs,
+ success_children,
child_count,
sources);
goto out;
@@ -700,24 +698,17 @@ afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
afr_sh_compute_wisdom (pending_matrix, characters, child_count);
if (afr_sh_wise_nodes_conflict (characters, child_count)) {
- /* split-brain */
- gf_log (this->name, GF_LOG_INFO,
- "split-brain possible, no source detected");
- if (flags)
- *flags |= AFR_SPLIT_BRAIN;
+ if (subvol_status)
+ *subvol_status |= SPLIT_BRAIN;
nsources = -1;
-
} else {
nsources = afr_sh_mark_wisest_as_sources (sources,
characters,
child_count);
}
} else {
- if (flags) {
- *flags |= AFR_ALL_FOOLS;
- nsources = -1;
- goto out;
- }
+ if (subvol_status)
+ *subvol_status |= ALL_FOOLS;
nsources = afr_mark_biggest_of_fools_as_source (sources,
pending_matrix,
characters,
@@ -726,10 +717,9 @@ afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
out:
if (nsources == 0)
- afr_mark_valid_children_sources (sources, valid_children,
- child_count);
- if (characters)
- GF_FREE (characters);
+ afr_mark_success_children_sources (sources, success_children,
+ child_count);
+ GF_FREE (characters);
gf_log (this->name, GF_LOG_DEBUG, "Number of sources: %d", nsources);
return nsources;
@@ -1261,7 +1251,8 @@ afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
nsources = afr_build_sources (this, sh->xattr, sh->buf,
sh->pending_matrix, sh->sources,
sh->child_success,
- afr_transaction_type_get (ia_type), NULL);
+ afr_transaction_type_get (ia_type),
+ NULL, _gf_false);
if (nsources < 0) {
gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s,"
" in missing entry self-heal, continuing with the rest"
@@ -1686,13 +1677,13 @@ static void
afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
int32_t op_ret, int32_t op_errno)
{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int enoent_count = 0;
- int nsources = 0;
- int source = -1;
- afr_source_flags_t flags = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int enoent_count = 0;
+ int nsources = 0;
+ int source = -1;
+ int32_t subvol_status = 0;
local = frame->local;
sh = &local->self_heal;
@@ -1722,22 +1713,22 @@ afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
nsources = afr_build_sources (this, sh->xattr, sh->buf,
sh->pending_matrix, sh->sources,
sh->child_success,
- AFR_ENTRY_TRANSACTION, &flags);
- if ((nsources < 0) && !flags) {
- gf_log (this->name, GF_LOG_ERROR, "No sources for dir of %s,"
- " in missing entry self-heal, aborting self-heal",
- local->loc.path);
+ AFR_ENTRY_TRANSACTION, &subvol_status,
+ _gf_true);
+ if ((subvol_status & ALL_FOOLS) ||
+ (subvol_status & SPLIT_BRAIN)) {
+ gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative "
+ "merge", sh->parent_loc.path);
+ afr_mark_success_children_sources (sh->sources,
+ sh->child_success,
+ priv->child_count);
+ } else if (nsources < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "No sources for dir "
+ "of %s, in missing entry self-heal, aborting "
+ "self-heal", local->loc.path);
goto out;
}
- //if allfools/split-brain give the behavior of missing entry creation
- if (flags) {
- gf_log (this->name, GF_LOG_DEBUG, "%s: All subvols pending so "
- "do missing entry creation", local->loc.path);
- afr_mark_valid_children_sources (sh->sources, sh->child_success,
- priv->child_count);
- }
-
source = afr_sh_select_source (sh->sources, priv->child_count);
if (source == -1) {
GF_ASSERT (0);
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
index b313c17e984..1e325685d57 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ b/xlators/cluster/afr/src/afr-self-heal-common.h
@@ -34,11 +34,6 @@ typedef enum {
AFR_LOOKUP_FAIL_MISSING_GFIDS = 2,
} afr_lookup_flags_t;
-typedef enum {
- AFR_SPLIT_BRAIN = 1,
- AFR_ALL_FOOLS =2
-} afr_source_flags_t;
-
int
afr_sh_select_source (int sources[], int child_count);
@@ -53,6 +48,7 @@ afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this);
int
afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
+ unsigned char *ignorant_subvols,
dict_t *xattr[], afr_transaction_type type,
size_t child_count);
@@ -62,10 +58,9 @@ afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
int child_count, afr_transaction_type type);
int
-afr_mark_sources (int32_t *sources, int32_t **pending_matrix, struct iatt *bufs,
- int32_t child_count, afr_self_heal_type type,
- int32_t *valid_children, const char *xlator_name,
- afr_source_flags_t *flags);
+afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
+ struct iatt *bufs, afr_self_heal_type type,
+ int32_t *success_children, int32_t *subvol_status);
int
afr_sh_delta_to_xattr (afr_private_t *priv,
@@ -83,10 +78,10 @@ afr_self_heal_type
afr_self_heal_type_for_transaction (afr_transaction_type type);
int
-afr_build_sources (xlator_t *xlator, dict_t **xattr, struct iatt *bufs,
+afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
int32_t **pending_matrix, int32_t *sources,
int32_t *success_children, afr_transaction_type type,
- afr_source_flags_t *flags);
+ int32_t *subvol_status, gf_boolean_t ignore_ignorant);
void
afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count);
int
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index a2bc3bb5827..4aa5b9e8644 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -594,15 +594,9 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
sh = &local->self_heal;
priv = this->private;
- afr_build_pending_matrix (priv->pending_key, sh->pending_matrix,
- sh->xattr, AFR_DATA_TRANSACTION,
- priv->child_count);
-
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
-
- nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf,
- priv->child_count, AFR_SELF_HEAL_DATA,
- sh->child_success, this->name, NULL);
+ nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
+ sh->sources, sh->child_success,
+ AFR_DATA_TRANSACTION, NULL, _gf_false);
if (nsources == 0) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -711,7 +705,7 @@ static int
afr_select_read_child_from_policy (int32_t *sources, int32_t child_count,
int32_t prev_read_child,
int32_t config_read_child,
- int32_t *valid_children)
+ int32_t *success_children)
{
int32_t read_child = -1;
int i = 0;
@@ -729,7 +723,7 @@ afr_select_read_child_from_policy (int32_t *sources, int32_t child_count,
goto out;
for (i = 0; i < child_count; i++) {
- read_child = valid_children[i];
+ read_child = success_children[i];
if (read_child < 0)
break;
if (_gf_true == afr_is_fresh_read_child (sources, child_count,
@@ -796,17 +790,17 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
int ret = -1;
int32_t **pending_matrix = NULL;
int32_t *sources = NULL;
- int32_t *valid_children = NULL;
+ int32_t *success_children = NULL;
struct iatt *bufs = NULL;
int32_t nsources = 0;
int32_t prev_read_child = -1;
int32_t config_read_child = -1;
+ int32_t subvol_status = 0;
afr_self_heal_t *sh = NULL;
- afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
priv = this->private;
bufs = local->cont.lookup.bufs;
- valid_children = local->cont.lookup.child_success;
+ success_children = local->cont.lookup.child_success;
sh = &local->self_heal;
pending_matrix = afr_create_pending_matrix (priv->child_count);
@@ -815,16 +809,12 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
sources = local->cont.lookup.sources;
memset (sources, 0, sizeof (*sources) * priv->child_count);
- afr_build_pending_matrix (priv->pending_key, pending_matrix,
- xattr, txn_type, priv->child_count);
-
- sh_type = afr_self_heal_type_for_transaction (txn_type);
- if (AFR_SELF_HEAL_INVALID == sh_type)
- goto out;
-
- nsources = afr_mark_sources (sources, pending_matrix, bufs,
- priv->child_count, sh_type,
- valid_children, this->name, NULL);
+ nsources = afr_build_sources (this, xattr, bufs, pending_matrix,
+ sources, success_children, txn_type,
+ &subvol_status, _gf_false);
+ if (subvol_status & SPLIT_BRAIN)
+ gf_log (this->name, GF_LOG_WARNING, "%s: Possible split-brain",
+ local->loc.path);
if (nsources < 0) {
ret = -1;
goto out;
@@ -836,7 +826,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
priv->child_count,
prev_read_child,
config_read_child,
- valid_children);
+ success_children);
ret = 0;
out:
afr_destroy_pending_matrix (pending_matrix, priv->child_count);
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 161b870f509..c7a4c2b2897 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -1602,7 +1602,7 @@ afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
}
gf_boolean_t
-afr_sh_need_recreate (afr_self_heal_t *impunge_sh, int *sources,
+afr_sh_need_recreate (afr_self_heal_t *impunge_sh,
unsigned int child, unsigned int child_count)
{
int32_t *success_children = NULL;
@@ -1610,7 +1610,6 @@ afr_sh_need_recreate (afr_self_heal_t *impunge_sh, int *sources,
GF_ASSERT (impunge_sh->impunging_entry_mode);
GF_ASSERT (impunge_sh->child_errno);
- GF_ASSERT (sources);
success_children = impunge_sh->child_success;
if (child == impunge_sh->active_source) {
@@ -1638,7 +1637,7 @@ afr_sh_recreate_count (afr_self_heal_t *impunge_sh, int *sources,
int i = 0;
for (i = 0; i < child_count; i++) {
- if (afr_sh_need_recreate (impunge_sh, sources, i, child_count))
+ if (afr_sh_need_recreate (impunge_sh, i, child_count))
count++;
}
@@ -1673,8 +1672,7 @@ afr_sh_entry_call_impunge_recreate (call_frame_t *impunge_frame,
GF_ASSERT (recreate_count);
impunge_local->call_count = recreate_count;
for (i = 0; i < priv->child_count; i++) {
- if (afr_sh_need_recreate (impunge_sh, sh->sources, i,
- priv->child_count)) {
+ if (afr_sh_need_recreate (impunge_sh, i, priv->child_count)) {
(void)afr_sh_entry_impunge_create (impunge_frame, this,
i, buf,
postparent);
@@ -2135,12 +2133,12 @@ void
afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,
int32_t op_ret, int32_t op_errno)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- afr_source_flags_t flags = 0;
-
- int nsources = 0;
+ afr_local_t *local = NULL;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ int source = 0;
+ int nsources = 0;
+ int32_t subvol_status = 0;
local = frame->local;
sh = &local->self_heal;
@@ -2158,26 +2156,30 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,
goto heal;
}
- afr_build_pending_matrix (priv->pending_key, sh->pending_matrix,
- sh->xattr, AFR_ENTRY_TRANSACTION,
- priv->child_count);
-
- afr_sh_print_pending_matrix (sh->pending_matrix, this);
-
- nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf,
- priv->child_count, AFR_SELF_HEAL_ENTRY,
- sh->child_success, this->name, &flags);
-
- if (nsources == 0) {
+ nsources = afr_build_sources (this, sh->xattr, sh->buf,
+ sh->pending_matrix, sh->sources,
+ sh->child_success,
+ AFR_ENTRY_TRANSACTION, &subvol_status,
+ _gf_true);
+ if ((subvol_status & ALL_FOOLS) ||
+ (subvol_status & SPLIT_BRAIN)) {
+ gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative "
+ "merge", local->loc.path);
+ source = -1;
+ memset (sh->sources, 0,
+ sizeof (*sh->sources) * priv->child_count);
+ } else if (nsources == 0) {
gf_log (this->name, GF_LOG_TRACE,
"No self-heal needed for %s",
local->loc.path);
afr_sh_entry_finish (frame, this);
return;
+ } else {
+ source = afr_sh_select_source (sh->sources, priv->child_count);
}
- sh->source = afr_sh_select_source (sh->sources, priv->child_count);
+ sh->source = source;
heal:
afr_sh_entry_sync_prepare (frame, this);
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 9b920eb159f..0dc55593722 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -480,7 +480,7 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
nsources = afr_build_sources (this, sh->xattr, sh->buf,
sh->pending_matrix, sh->sources,
sh->child_success,
- AFR_METADATA_TRANSACTION, NULL);
+ AFR_METADATA_TRANSACTION, NULL, _gf_false);
if (nsources == 0) {
gf_log (this->name, GF_LOG_TRACE,
"No self-heal needed for %s",
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index f29dcae4121..1e3592f658b 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -221,6 +221,10 @@ typedef struct {
call_frame_t *sh_frame;
} afr_self_heal_t;
+typedef enum {
+ SPLIT_BRAIN = 1,
+ ALL_FOOLS = 2
+} afr_subvol_status_t;
typedef enum {
AFR_DATA_TRANSACTION, /* truncate, write, ... */