From 353fb2e97bfc3a44cc5301fa8dfb015406565048 Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Wed, 8 Dec 2010 09:58:27 +0000 Subject: replicate: propagate CHILD_DOWN upwards properly During initialization if one subvolume returns CHILD_DOWN, then do not consider the other subvolume as down if we still haven't heard from it yet This causes DHT (on top) to prematurely (and wrongly) send CHILD_UP/DOWN upwards Signed-off-by: Anand V. Avati Signed-off-by: Anand V. Avati BUG: 2200 (cp dies with "Invalid argument" after failover) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2200 --- xlators/cluster/afr/src/afr-common.c | 9 +++++---- xlators/cluster/afr/src/afr.c | 7 +++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 356f077e7..4fb33a565 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2532,6 +2532,7 @@ afr_notify (xlator_t *this, int32_t event, int i = -1; int up_children = 0; + int down_children = 0; priv = this->private; @@ -2562,7 +2563,7 @@ afr_notify (xlator_t *this, int32_t event, */ for (i = 0; i < priv->child_count; i++) - if (child_up[i]) + if (child_up[i] == 1) up_children++; if (up_children == 1) { @@ -2592,10 +2593,10 @@ afr_notify (xlator_t *this, int32_t event, */ for (i = 0; i < priv->child_count; i++) - if (child_up[i]) - up_children++; + if (child_up[i] == 0) + down_children++; - if (up_children == 0) { + if (down_children == priv->child_count) { gf_log (this->name, GF_LOG_ERROR, "All subvolumes are down. Going offline " "until atleast one of them comes back up."); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index f1b163f0b..5f586e201 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -874,6 +874,13 @@ init (xlator_t *this) goto out; } + for (i = 0; i < child_count; i++) + priv->child_up[i] = -1; /* start with unknown state. + this initialization needed + for afr_notify() to work + reliably + */ + priv->children = GF_CALLOC (sizeof (xlator_t *), child_count, gf_afr_mt_xlator_t); if (!priv->children) { -- cgit