diff options
| author | Pranith Kumar K <pranithk@gluster.com> | 2011-05-31 01:54:54 +0000 | 
|---|---|---|
| committer | Anand Avati <avati@gluster.com> | 2011-05-31 09:11:00 -0700 | 
| commit | a5105cdab107240465838ab5e40fae74baea4c5b (patch) | |
| tree | f8667264000517cf8a0a8689e82ac4b2c6869461 | |
| parent | 5f1efbc32d69097a00b56eec3a2ca2aa26ae084c (diff) | |
cluster/afr: Send the first child up/down after all its children notify
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Signed-off-by: Anand Avati <avati@gluster.com>
BUG: 2870 (Inconsistent xattr values when creating bricks)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2870
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 160 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 7 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 1 | 
3 files changed, 113 insertions, 55 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index ffd2200066f..ce1eb60cde5 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2507,93 +2507,143 @@ int32_t  afr_notify (xlator_t *this, int32_t event,              void *data, ...)  { -        afr_private_t *     priv     = NULL; -        unsigned char *     child_up = NULL; +        afr_private_t   *priv               = NULL; +        int             i                   = -1; +        int             up_children         = 0; +        int             down_children       = 0; +        int             propagate           = 0; -        int i           = -1; -        int up_children = 0; -        int down_children = 0; +        int             had_heard_from_all  = 0; +        int             have_heard_from_all = 0; +        int             idx                 = -1; +        int             ret                 = -1;          priv = this->private;          if (!priv)                  return 0; -        child_up = priv->child_up; +        had_heard_from_all = 1; +        for (i = 0; i < priv->child_count; i++) { +                if (!priv->last_event[i]) { +                        had_heard_from_all = 0; +                } +        } + +        /* parent xlators dont need to know about every child_up, child_down +         * because of afr ha. If all subvolumes go down, child_down has +         * to be triggered. In that state when 1 subvolume comes up child_up +         * needs to be triggered. dht optimises revalidate lookup by sending +         * it only to one of its subvolumes. When child up/down happens +         * for afr's subvolumes dht should be notified by child_modified. The +         * subsequent revalidate lookup happens on all the dht's subvolumes +         * which triggers afr self-heals if any. +         */ +        idx = find_child_index (this, data); +        if (idx < 0) { +                gf_log (this->name, GF_LOG_ERROR, "Received child_up " +                        "from invalid subvolume"); +                goto out; +        }          switch (event) {          case GF_EVENT_CHILD_UP: -                i = find_child_index (this, data); - -                /* temporarily -                afr_attempt_lock_recovery (this, i); -                */ - -                child_up[i] = 1; -                  LOCK (&priv->lock);                  { +                        priv->child_up[idx] = 1;                          priv->up_count++; -                } -                UNLOCK (&priv->lock); -                /* -                   if all the children were down, and one child came up, -                   send notify to parent -                */ - -                for (i = 0; i < priv->child_count; i++) -                        if (child_up[i] == 1) -                                up_children++; - -                if (up_children == 1) { -                        gf_log (this->name, GF_LOG_NORMAL, -                                "Subvolume '%s' came back up; " -                                "going online.", ((xlator_t *)data)->name); +                        for (i = 0; i < priv->child_count; i++) +                                if (priv->child_up[i] == 1) +                                        up_children++; +                        if (up_children == 1) { +                                gf_log (this->name, GF_LOG_INFO, +                                        "Subvolume '%s' came back up; " +                                        "going online.", ((xlator_t *)data)->name); +                        } else { +                                event = GF_EVENT_CHILD_MODIFIED; +                        } -                        default_notify (this, event, data); -                } else { -                        default_notify (this, GF_EVENT_CHILD_MODIFIED, data); +                        priv->last_event[idx] = event;                  } +                UNLOCK (&priv->lock);                  break;          case GF_EVENT_CHILD_DOWN: -                i = find_child_index (this, data); - -                child_up[i] = 0; -                  LOCK (&priv->lock);                  { +                        priv->child_up[idx] = 0;                          priv->down_count++; +                        for (i = 0; i < priv->child_count; i++) +                                if (priv->child_up[i] == 0) +                                        down_children++; +                        if (down_children == priv->child_count) { +                                gf_log (this->name, GF_LOG_ERROR, +                                        "All subvolumes are down. Going offline " +                                        "until atleast one of them comes back up."); +                        } else { +                                event = GF_EVENT_CHILD_MODIFIED; +                        } +                        priv->last_event[idx] = event;                  }                  UNLOCK (&priv->lock); -                /* -                   if all children are down, and this was the last to go down, -                   send notify to parent -                */ +                break; -                for (i = 0; i < priv->child_count; i++) -                        if (child_up[i] == 0) -                                down_children++; +        case GF_EVENT_CHILD_CONNECTING: +                 LOCK (&priv->lock); +                 { +                        priv->last_event[idx] = event; +                 } +                 UNLOCK (&priv->lock); +                break; +        default: +                propagate = 1; +                break; +        } -                if (down_children == priv->child_count) { -                        gf_log (this->name, GF_LOG_ERROR, -                                "All subvolumes are down. Going offline " -                                "until atleast one of them comes back up."); +        /* have all subvolumes reported status once by now? */ +        have_heard_from_all = 1; +        for (i = 0; i < priv->child_count; i++) { +                if (!priv->last_event[i]) +                        have_heard_from_all = 0; +        } -                        default_notify (this, event, data); -                } else { -                        default_notify (this, GF_EVENT_CHILD_MODIFIED, data); -                } +        /* if all subvols have reported status, no need to hide anything +           or wait for anything else. Just propagate blindly */ +        if (have_heard_from_all) +                propagate = 1; -                break; +        if (!had_heard_from_all && have_heard_from_all) { +                /* This is the first event which completes aggregation +                   of events from all subvolumes. If at least one subvol +                   had come up, propagate CHILD_UP, but only this time +                */ +                event = GF_EVENT_CHILD_DOWN; -        default: -                default_notify (this, event, data); +                LOCK (&priv->lock); +                { +                        for (i = 0; i < priv->child_count; i++) { +                                if (priv->last_event[i] == GF_EVENT_CHILD_UP) { +                                        event = GF_EVENT_CHILD_UP; +                                        break; +                                } + +                                if (priv->last_event[i] == +                                                GF_EVENT_CHILD_CONNECTING) { +                                        event = GF_EVENT_CHILD_CONNECTING; +                                        /* continue to check other events for CHILD_UP */ +                                } +                        } +                } +                UNLOCK (&priv->lock);          } -        return 0; +        ret = 0; +        if (propagate) +                ret = default_notify (this, event, data); +out: +        return ret;  } diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 3aa035c1712..91514861e8f 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -972,6 +972,13 @@ init (xlator_t *this)  		i++;  	} +        priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event), +                                      gf_afr_mt_int32_t); +        if (!priv->last_event) { +                ret = -ENOMEM; +                goto out; +        } +          LOCK_INIT (&priv->root_inode_lk);          priv->first_lookup = 1;          priv->root_inode = NULL; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index e6cd9bf4905..6af96f9db18 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -93,6 +93,7 @@ typedef struct _afr_private {          gf_boolean_t     optimistic_change_log;          char                   vol_uuid[UUID_SIZE + 1]; +        int32_t                *last_event;  } afr_private_t;  typedef struct {  | 
