summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src
diff options
context:
space:
mode:
authorRichard Wareing <rwareing@fb.com>2015-09-25 13:42:40 -0700
committerJeff Darcy <jdarcy@fb.com>2017-07-03 07:48:08 -0700
commit992a9f8494a358f828eeef34b46e9f5ccfca1d3b (patch)
tree353da3de0f4a72f5ddfaaa07778e85cb4a232ef0 /xlators/cluster/afr/src
parent32fc893c176965109d58a3398ad042683d159d77 (diff)
cluster/afr: Fix metadata split-brain flow (HOTFIX)
Summary: - The metadata heal flow for some reason likes to tinker with the sink states prior to having the source finalized, this broke the policy based unsplit flow. This patch fixes it by simply setting those chilren who aren't the favorite as sinks. Test Plan: - Tested against some reported instances Reviewers: moox, sshreyas, dph Reviewed By: sshreyas Differential Revision: https://phabricator.fb.com/D2481527 Signature: t1:2481527:1443215555:1165d8eb5f3dec216ec3ff0795d9837712906b1d Blame Revision: Change-Id: I56f96fdcef32dd4fc5d35958148d0e56d142d5e4 Change-Id: I16aa445a22c3bcd7b589954e2da513ed53822d5b
Diffstat (limited to 'xlators/cluster/afr/src')
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c17
1 files changed, 17 insertions, 0 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index d5d95ef8852..44374d5c0ad 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -216,6 +216,23 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
sources_count = AFR_COUNT (sources, priv->child_count);
+ if (sources_count == 0) {
+ afr_sh_get_source_by_policy (this, sources, healed_sinks,
+ locked_on, replies, inode);
+ sources_count = AFR_COUNT (sources, priv->child_count);
+ /* __afr_selfheal_metadata_prepare tinkers with the state
+ * of healed_sinks pre-maturely (the source hasn't
+ * actually been finalized yet!), so reset the children
+ * which aren't our source to sinks so we can heal.
+ * I'll leave it to the AFR2 maintainer to fix that code
+ * in the future as they may have had a good reason.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i] && locked_on[i])
+ healed_sinks[i] = 1;
+ }
+ }
+
if ((AFR_CMP (locked_on, healed_sinks, priv->child_count) == 0)
|| !sources_count) {