diff options
| author | Pranith Kumar K <pkarampu@redhat.com> | 2020-02-04 18:42:33 +0530 | 
|---|---|---|
| committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2020-03-13 13:20:37 +0000 | 
| commit | b164a74884becef281b57ef93428bb740e3e342e (patch) | |
| tree | 996aee140f4bd86adf2d18a311ff23dd08d0e176 /xlators/cluster | |
| parent | eb916c057036db8289b41265797e5dce066d1512 (diff) | |
cluster/afr: Fixes for halo
Current implementation assumes that ping-event will come after connect event
but that may not be the case in the cases where after socket connection fds
need to be re-opened which would consume more time. So handle any order of the
ping/child-up events.
fixes: bz#1800583
Change-Id: I6bcdc0caa503bdc039ef2b4739fbf4afae121f05
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Diffstat (limited to 'xlators/cluster')
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 19 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 4 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 1 | 
3 files changed, 19 insertions, 5 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 5462f0c7f53..4ee83659c6e 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -5665,6 +5665,8 @@ afr_priv_dump(xlator_t *this)                             GF_ATOMIC_GET(priv->pending_reads[i]));          sprintf(key, "child_latency[%d]", i);          gf_proc_dump_write(key, "%" PRId64, priv->child_latency[i]); +        sprintf(key, "halo_child_up[%d]", i); +        gf_proc_dump_write(key, "%d", priv->halo_child_up[i]);      }      gf_proc_dump_write("data_self_heal", "%d", priv->data_self_heal);      gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal); @@ -5841,7 +5843,7 @@ find_best_down_child(xlator_t *this)      priv = this->private;      for (i = 0; i < priv->child_count; i++) { -        if (priv->child_up[i] && priv->child_latency[i] >= 0 && +        if (!priv->child_up[i] && priv->child_latency[i] >= 0 &&              priv->child_latency[i] < best_latency) {              best_child = i;              best_latency = priv->child_latency[i]; @@ -5913,7 +5915,9 @@ __afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx,                     "), "                     "marking child down.",                     child_latency_msec, halo_max_latency_msec); -            *event = GF_EVENT_CHILD_DOWN; +            if (priv->halo_child_up[idx]) { +                *event = GF_EVENT_CHILD_DOWN; +            }          }      } else if (child_latency_msec < halo_max_latency_msec &&                 priv->child_up[idx] == 0) { @@ -5925,7 +5929,9 @@ __afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx,                     "), "                     "marking child up.",                     child_latency_msec, halo_max_latency_msec); -            *event = GF_EVENT_CHILD_UP; +            if (priv->halo_child_up[idx]) { +                *event = GF_EVENT_CHILD_UP; +            }          } else {              gf_log(child_xlator->name, GF_LOG_INFO,                     "Not marking child %d up, " @@ -5992,7 +5998,10 @@ __afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator,      if (child_latency_msec < 0) {          /*set to INT64_MAX-1 so that it is found for best_down_child*/ -        priv->child_latency[idx] = AFR_HALO_MAX_LATENCY; +        priv->halo_child_up[idx] = 1; +        if (priv->child_latency[idx] < 0) { +            priv->child_latency[idx] = AFR_HALO_MAX_LATENCY; +        }      }      /* @@ -6081,6 +6090,7 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,       */      if (child_latency_msec < 0) {          priv->child_latency[idx] = child_latency_msec; +        priv->halo_child_up[idx] = 0;      }      priv->child_up[idx] = 0; @@ -6661,6 +6671,7 @@ afr_priv_destroy(afr_private_t *priv)      GF_FREE(priv->pending_key);      GF_FREE(priv->children);      GF_FREE(priv->child_up); +    GF_FREE(priv->halo_child_up);      GF_FREE(priv->child_latency);      LOCK_DESTROY(&priv->lock); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index ec7aa226821..a38489d9932 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -556,8 +556,10 @@ init(xlator_t *this)      priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count,                                      gf_afr_mt_child_latency_t); +    priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count, +                                    gf_afr_mt_char); -    if (!priv->child_up || !priv->child_latency) { +    if (!priv->child_up || !priv->child_latency || !priv->halo_child_up) {          ret = -ENOMEM;          goto out;      } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 88456562610..7f50a27e6c9 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -191,6 +191,7 @@ typedef struct _afr_private {      struct list_head ta_onwireq;      unsigned char *child_up; +    unsigned char *halo_child_up;      int64_t *child_latency;      unsigned char *local;  | 
