diff options
| author | Pranith Kumar K <pranithk@gluster.com> | 2011-05-31 01:55:57 +0000 | 
|---|---|---|
| committer | Anand Avati <avati@gluster.com> | 2011-05-31 09:12:17 -0700 | 
| commit | bc7d07d8d5eab29741c7e28b7dcb38ce66c101cb (patch) | |
| tree | 43c0f38b79c704687b00f4640bb7504d98e6599d | |
| parent | 631fc71ef8f356333bd478d444f2d638c1f30aeb (diff) | |
cluster/afr: Send the first child up/down after all its children notify
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Signed-off-by: Anand Avati <avati@gluster.com>
BUG: 2870 (Inconsistent xattr values when creating bricks)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2870
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 161 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 7 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 1 | 
3 files changed, 115 insertions, 54 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index f000aaf9217..c8b1ea96011 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2470,92 +2470,145 @@ int32_t  afr_notify (xlator_t *this, int32_t event,              void *data, ...)  { -        afr_private_t *     priv     = NULL; -        unsigned char *     child_up = NULL; -        int i           = -1; -        int up_children = 0; -        int down_children = 0; +        afr_private_t   *priv               = NULL; +        int             i                   = -1; +        int             up_children         = 0; +        int             down_children       = 0; +        int             propagate           = 0; + +        int             had_heard_from_all  = 0; +        int             have_heard_from_all = 0; +        int             idx                 = -1; +        int             ret                 = -1;          priv = this->private;          if (!priv)                  return 0; -        child_up = priv->child_up; +        had_heard_from_all = 1; +        for (i = 0; i < priv->child_count; i++) { +                if (!priv->last_event[i]) { +                        had_heard_from_all = 0; +                } +        } + +        /* parent xlators dont need to know about every child_up, child_down +         * because of afr ha. If all subvolumes go down, child_down has +         * to be triggered. In that state when 1 subvolume comes up child_up +         * needs to be triggered. dht optimises revalidate lookup by sending +         * it only to one of its subvolumes. When child up/down happens +         * for afr's subvolumes dht should be notified by child_modified. The +         * subsequent revalidate lookup happens on all the dht's subvolumes +         * which triggers afr self-heals if any. +         */ +        idx = find_child_index (this, data); +        if (idx < 0) { +                gf_log (this->name, GF_LOG_ERROR, "Received child_up " +                        "from invalid subvolume"); +                goto out; +        }          switch (event) {          case GF_EVENT_CHILD_UP: -                i = find_child_index (this, data); - -                /* temporarily -                   afr_attempt_lock_recovery (this, i); -                */ - -                child_up[i] = 1; -                  LOCK (&priv->lock);                  { +                        priv->child_up[idx] = 1;                          priv->up_count++; + +                        for (i = 0; i < priv->child_count; i++) +                                if (priv->child_up[i] == 1) +                                        up_children++; +                        if (up_children == 1) { +                                gf_log (this->name, GF_LOG_INFO, +                                        "Subvolume '%s' came back up; " +                                        "going online.", ((xlator_t *)data)->name); +                        } else { +                                event = GF_EVENT_CHILD_MODIFIED; +                        } + +                        priv->last_event[idx] = event;                  }                  UNLOCK (&priv->lock); -                /* -                  if all the children were down, and one child came up, -                  send notify to parent -                */ +                break; -                for (i = 0; i < priv->child_count; i++) -                        if (child_up[i] == 1) -                                up_children++; +        case GF_EVENT_CHILD_DOWN: +                LOCK (&priv->lock); +                { +                        priv->child_up[idx] = 0; +                        priv->down_count++; -                if (up_children == 1) { -                        gf_log (this->name, GF_LOG_INFO, -                                "Subvolume '%s' came back up; " -                                "going online.", ((xlator_t *)data)->name); +                        for (i = 0; i < priv->child_count; i++) +                                if (priv->child_up[i] == 0) +                                        down_children++; +                        if (down_children == priv->child_count) { +                                gf_log (this->name, GF_LOG_ERROR, +                                        "All subvolumes are down. Going offline " +                                        "until atleast one of them comes back up."); +                        } else { +                                event = GF_EVENT_CHILD_MODIFIED; +                        } -                        default_notify (this, event, data); -                } else { -                        default_notify (this, GF_EVENT_CHILD_MODIFIED, data); +                        priv->last_event[idx] = event;                  } +                UNLOCK (&priv->lock);                  break; -        case GF_EVENT_CHILD_DOWN: -                i = find_child_index (this, data); - -                child_up[i] = 0; - +        case GF_EVENT_CHILD_CONNECTING:                  LOCK (&priv->lock);                  { -                        priv->down_count++; +                        priv->last_event[idx] = event;                  }                  UNLOCK (&priv->lock); +                break; +        default: +                propagate = 1; +                break; +        } -                /* -                  if all children are down, and this was the last to go down, -                  send notify to parent -                */ - -                for (i = 0; i < priv->child_count; i++) -                        if (child_up[i] == 0) -                                down_children++; +        /* have all subvolumes reported status once by now? */ +        have_heard_from_all = 1; +        for (i = 0; i < priv->child_count; i++) { +                if (!priv->last_event[i]) +                        have_heard_from_all = 0; +        } -                if (down_children == priv->child_count) { -                        gf_log (this->name, GF_LOG_ERROR, -                                "All subvolumes are down. Going offline " -                                "until atleast one of them comes back up."); +        /* if all subvols have reported status, no need to hide anything +           or wait for anything else. Just propagate blindly */ +        if (have_heard_from_all) +                propagate = 1; -                        default_notify (this, event, data); -                } else { -                        default_notify (this, GF_EVENT_CHILD_MODIFIED, data); -                } +        if (!had_heard_from_all && have_heard_from_all) { +                /* This is the first event which completes aggregation +                   of events from all subvolumes. If at least one subvol +                   had come up, propagate CHILD_UP, but only this time +                */ +                event = GF_EVENT_CHILD_DOWN; -                break; +                LOCK (&priv->lock); +                { +                        for (i = 0; i < priv->child_count; i++) { +                                if (priv->last_event[i] == GF_EVENT_CHILD_UP) { +                                        event = GF_EVENT_CHILD_UP; +                                        break; +                                } -        default: -                default_notify (this, event, data); +                                if (priv->last_event[i] == +                                                GF_EVENT_CHILD_CONNECTING) { +                                        event = GF_EVENT_CHILD_CONNECTING; +                                        /* continue to check other events for CHILD_UP */ +                                } +                        } +                } +                UNLOCK (&priv->lock);          } -        return 0; +        ret = 0; +        if (propagate) +                ret = default_notify (this, event, data); +out: +        return ret;  } diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 79753c91b09..35dad50072d 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -706,6 +706,13 @@ init (xlator_t *this)                  i++;          } +        priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event), +                                      gf_afr_mt_int32_t); +        if (!priv->last_event) { +                ret = -ENOMEM; +                goto out; +        } +          LOCK_INIT (&priv->root_inode_lk);          priv->first_lookup = 1;          priv->root_inode = NULL; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index f1b0efbd22f..b806a524320 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -93,6 +93,7 @@ typedef struct _afr_private {          gf_boolean_t     optimistic_change_log;          char                   vol_uuid[UUID_SIZE + 1]; +        int32_t                *last_event;  } afr_private_t;  typedef struct {  | 
