summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRichard Wareing <rwareing@fb.com>2015-09-25 13:42:40 -0700
committerJeff Darcy <jeff@pl.atyp.us>2017-07-05 16:04:43 +0000
commit8b35f108ca58901ef28bcc5b249890543ee71743 (patch)
treea75c8f2fd07ced9884817c614802c89e34c96b91
parentdb9d4408e9a14d20c9b2fb972310b391a54b0fd0 (diff)
cluster/afr: Fix metadata split-brain flow (HOTFIX)
Summary: - The metadata heal flow for some reason likes to tinker with the sink states prior to having the source finalized, this broke the policy based unsplit flow. This patch fixes it by simply setting those chilren who aren't the favorite as sinks. Test Plan: - Tested against some reported instances Reviewers: moox, sshreyas, dph Reviewed By: sshreyas Differential Revision: https://phabricator.fb.com/D2481527 Signature: t1:2481527:1443215555:1165d8eb5f3dec216ec3ff0795d9837712906b1d Blame Revision: Change-Id: I56f96fdcef32dd4fc5d35958148d0e56d142d5e4 Change-Id: I16aa445a22c3bcd7b589954e2da513ed53822d5b Signed-off-by: Jeff Darcy <jdarcy@fb.com> Reviewed-on: https://review.gluster.org/17682 Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Jeff Darcy <jeff@pl.atyp.us> Tested-by: Jeff Darcy <jeff@pl.atyp.us> CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c11
1 files changed, 11 insertions, 0 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index d5d95ef8852..f3fa5d39506 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -216,6 +216,17 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
sources_count = AFR_COUNT (sources, priv->child_count);
+ /* __afr_selfheal_metadata_prepare tinkers with the state
+ * of healed_sinks pre-maturely (the source hasn't
+ * actually been finalized yet!), so reset the children
+ * which aren't our source to sinks so we can heal.
+ * I'll leave it to the AFR2 maintainer to fix that code
+ * in the future as they may have had a good reason.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i] && locked_on[i])
+ healed_sinks[i] = 1;
+ }
if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0)
|| !sources_count) {