summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pranithk@gluster.com>2011-05-31 01:55:57 +0000
committerAnand Avati <avati@gluster.com>2011-05-31 09:12:17 -0700
commitbc7d07d8d5eab29741c7e28b7dcb38ce66c101cb (patch)
tree43c0f38b79c704687b00f4640bb7504d98e6599d
parent631fc71ef8f356333bd478d444f2d638c1f30aeb (diff)
cluster/afr: Send the first child up/down after all its children notify
Signed-off-by: Pranith Kumar K <pranithk@gluster.com> Signed-off-by: Anand Avati <avati@gluster.com> BUG: 2870 (Inconsistent xattr values when creating bricks) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2870
-rw-r--r--xlators/cluster/afr/src/afr-common.c161
-rw-r--r--xlators/cluster/afr/src/afr.c7
-rw-r--r--xlators/cluster/afr/src/afr.h1
3 files changed, 115 insertions, 54 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index f000aaf9217..c8b1ea96011 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -2470,92 +2470,145 @@ int32_t
afr_notify (xlator_t *this, int32_t event,
void *data, ...)
{
- afr_private_t * priv = NULL;
- unsigned char * child_up = NULL;
- int i = -1;
- int up_children = 0;
- int down_children = 0;
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int up_children = 0;
+ int down_children = 0;
+ int propagate = 0;
+
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ int idx = -1;
+ int ret = -1;
priv = this->private;
if (!priv)
return 0;
- child_up = priv->child_up;
+ had_heard_from_all = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i]) {
+ had_heard_from_all = 0;
+ }
+ }
+
+ /* parent xlators dont need to know about every child_up, child_down
+ * because of afr ha. If all subvolumes go down, child_down has
+ * to be triggered. In that state when 1 subvolume comes up child_up
+ * needs to be triggered. dht optimises revalidate lookup by sending
+ * it only to one of its subvolumes. When child up/down happens
+ * for afr's subvolumes dht should be notified by child_modified. The
+ * subsequent revalidate lookup happens on all the dht's subvolumes
+ * which triggers afr self-heals if any.
+ */
+ idx = find_child_index (this, data);
+ if (idx < 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Received child_up "
+ "from invalid subvolume");
+ goto out;
+ }
switch (event) {
case GF_EVENT_CHILD_UP:
- i = find_child_index (this, data);
-
- /* temporarily
- afr_attempt_lock_recovery (this, i);
- */
-
- child_up[i] = 1;
-
LOCK (&priv->lock);
{
+ priv->child_up[idx] = 1;
priv->up_count++;
+
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 1)
+ up_children++;
+ if (up_children == 1) {
+ gf_log (this->name, GF_LOG_INFO,
+ "Subvolume '%s' came back up; "
+ "going online.", ((xlator_t *)data)->name);
+ } else {
+ event = GF_EVENT_CHILD_MODIFIED;
+ }
+
+ priv->last_event[idx] = event;
}
UNLOCK (&priv->lock);
- /*
- if all the children were down, and one child came up,
- send notify to parent
- */
+ break;
- for (i = 0; i < priv->child_count; i++)
- if (child_up[i] == 1)
- up_children++;
+ case GF_EVENT_CHILD_DOWN:
+ LOCK (&priv->lock);
+ {
+ priv->child_up[idx] = 0;
+ priv->down_count++;
- if (up_children == 1) {
- gf_log (this->name, GF_LOG_INFO,
- "Subvolume '%s' came back up; "
- "going online.", ((xlator_t *)data)->name);
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 0)
+ down_children++;
+ if (down_children == priv->child_count) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "All subvolumes are down. Going offline "
+ "until atleast one of them comes back up.");
+ } else {
+ event = GF_EVENT_CHILD_MODIFIED;
+ }
- default_notify (this, event, data);
- } else {
- default_notify (this, GF_EVENT_CHILD_MODIFIED, data);
+ priv->last_event[idx] = event;
}
+ UNLOCK (&priv->lock);
break;
- case GF_EVENT_CHILD_DOWN:
- i = find_child_index (this, data);
-
- child_up[i] = 0;
-
+ case GF_EVENT_CHILD_CONNECTING:
LOCK (&priv->lock);
{
- priv->down_count++;
+ priv->last_event[idx] = event;
}
UNLOCK (&priv->lock);
+ break;
+ default:
+ propagate = 1;
+ break;
+ }
- /*
- if all children are down, and this was the last to go down,
- send notify to parent
- */
-
- for (i = 0; i < priv->child_count; i++)
- if (child_up[i] == 0)
- down_children++;
+ /* have all subvolumes reported status once by now? */
+ have_heard_from_all = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i])
+ have_heard_from_all = 0;
+ }
- if (down_children == priv->child_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "All subvolumes are down. Going offline "
- "until atleast one of them comes back up.");
+ /* if all subvols have reported status, no need to hide anything
+ or wait for anything else. Just propagate blindly */
+ if (have_heard_from_all)
+ propagate = 1;
- default_notify (this, event, data);
- } else {
- default_notify (this, GF_EVENT_CHILD_MODIFIED, data);
- }
+ if (!had_heard_from_all && have_heard_from_all) {
+ /* This is the first event which completes aggregation
+ of events from all subvolumes. If at least one subvol
+ had come up, propagate CHILD_UP, but only this time
+ */
+ event = GF_EVENT_CHILD_DOWN;
- break;
+ LOCK (&priv->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
+ event = GF_EVENT_CHILD_UP;
+ break;
+ }
- default:
- default_notify (this, event, data);
+ if (priv->last_event[i] ==
+ GF_EVENT_CHILD_CONNECTING) {
+ event = GF_EVENT_CHILD_CONNECTING;
+ /* continue to check other events for CHILD_UP */
+ }
+ }
+ }
+ UNLOCK (&priv->lock);
}
- return 0;
+ ret = 0;
+ if (propagate)
+ ret = default_notify (this, event, data);
+out:
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 79753c91b09..35dad50072d 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -706,6 +706,13 @@ init (xlator_t *this)
i++;
}
+ priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
+ gf_afr_mt_int32_t);
+ if (!priv->last_event) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
LOCK_INIT (&priv->root_inode_lk);
priv->first_lookup = 1;
priv->root_inode = NULL;
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index f1b0efbd22f..b806a524320 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -93,6 +93,7 @@ typedef struct _afr_private {
gf_boolean_t optimistic_change_log;
char vol_uuid[UUID_SIZE + 1];
+ int32_t *last_event;
} afr_private_t;
typedef struct {