diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2020-02-04 18:42:33 +0530 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2020-03-13 13:20:37 +0000 |
commit | b164a74884becef281b57ef93428bb740e3e342e (patch) | |
tree | 996aee140f4bd86adf2d18a311ff23dd08d0e176 | |
parent | eb916c057036db8289b41265797e5dce066d1512 (diff) |
cluster/afr: Fixes for halo
Current implementation assumes that ping-event will come after connect event
but that may not be the case in the cases where after socket connection fds
need to be re-opened which would consume more time. So handle any order of the
ping/child-up events.
fixes: bz#1800583
Change-Id: I6bcdc0caa503bdc039ef2b4739fbf4afae121f05
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
-rw-r--r-- | tests/afr.rc | 10 | ||||
-rw-r--r-- | tests/basic/afr/halo.t | 61 | ||||
-rw-r--r-- | tests/volume.rc | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 19 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 4 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 1 |
6 files changed, 91 insertions, 6 deletions
diff --git a/tests/afr.rc b/tests/afr.rc index 35f352df78f..5fc7fa1898d 100644 --- a/tests/afr.rc +++ b/tests/afr.rc @@ -105,3 +105,13 @@ function get_quorum_type() local repl_id="$3" cat $m/.meta/graphs/active/$v-replicate-$repl_id/private|grep quorum-type|awk '{print $3}' } + +function afr_private_key_value() +{ + local v=$1 + local m=$2 + local replica_id=$3 + local key=$4 +#xargs at the end will strip leading spaces + grep -E "^${key} = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'=' | xargs +} diff --git a/tests/basic/afr/halo.t b/tests/basic/afr/halo.t new file mode 100644 index 00000000000..3f61f5a0402 --- /dev/null +++ b/tests/basic/afr/halo.t @@ -0,0 +1,61 @@ +#!/bin/bash +#Tests that halo basic functionality works as expected + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +function get_up_child() +{ + if [ "1" == $(afr_private_key_value $V0 $M0 0 "child_up\[0\]") ]; + then + echo 0 + elif [ "1" == $(afr_private_key_value $V0 $M0 0 "child_up\[1\]") ] + then + echo 1 + fi +} + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume set $V0 cluster.halo-enabled yes +TEST $CLI volume set $V0 cluster.halo-max-replicas 1 +TEST $CLI volume start $V0 +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; +EXPECT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[0\]" +EXPECT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[1\]" +EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[0\]" +EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[1\]" + +up_id=$(get_up_child) +TEST [[ ! -z "$up_id" ]] + +down_id=$((1-up_id)) + +TEST kill_brick $V0 $H0 $B0/${V0}${up_id} +#As max-replicas is configured to be 1, down_child should be up now +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${down_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "child_up\[${down_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "child_up\[${up_id}\]" +EXPECT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[${up_id}\]" +EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[${down_id}\]" + +#Bring the brick back up and the state should be restored +TEST $CLI volume start $V0 force +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]" + +up_id=$(get_up_child) +TEST [[ ! -z "$up_id" ]] +down_id=$((1-up_id)) +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${down_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "child_up\[${down_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]" +EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "child_up\[${up_id}\]" +EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[0\]" +EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[1\]" + +cleanup; diff --git a/tests/volume.rc b/tests/volume.rc index 288d491de39..bc768c9434f 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -193,7 +193,7 @@ function afr_child_up_status_meta { local mnt=$1 local repl=$2 local child=$3 - grep "child_up\[$child\]" $mnt/.meta/graphs/active/$repl/private | awk '{print $3}' + grep -E "^child_up\[$child\]" $mnt/.meta/graphs/active/$repl/private | awk '{print $3}' } function client_connected_status_meta { diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 5462f0c7f53..4ee83659c6e 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -5665,6 +5665,8 @@ afr_priv_dump(xlator_t *this) GF_ATOMIC_GET(priv->pending_reads[i])); sprintf(key, "child_latency[%d]", i); gf_proc_dump_write(key, "%" PRId64, priv->child_latency[i]); + sprintf(key, "halo_child_up[%d]", i); + gf_proc_dump_write(key, "%d", priv->halo_child_up[i]); } gf_proc_dump_write("data_self_heal", "%d", priv->data_self_heal); gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal); @@ -5841,7 +5843,7 @@ find_best_down_child(xlator_t *this) priv = this->private; for (i = 0; i < priv->child_count; i++) { - if (priv->child_up[i] && priv->child_latency[i] >= 0 && + if (!priv->child_up[i] && priv->child_latency[i] >= 0 && priv->child_latency[i] < best_latency) { best_child = i; best_latency = priv->child_latency[i]; @@ -5913,7 +5915,9 @@ __afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx, "), " "marking child down.", child_latency_msec, halo_max_latency_msec); - *event = GF_EVENT_CHILD_DOWN; + if (priv->halo_child_up[idx]) { + *event = GF_EVENT_CHILD_DOWN; + } } } else if (child_latency_msec < halo_max_latency_msec && priv->child_up[idx] == 0) { @@ -5925,7 +5929,9 @@ __afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx, "), " "marking child up.", child_latency_msec, halo_max_latency_msec); - *event = GF_EVENT_CHILD_UP; + if (priv->halo_child_up[idx]) { + *event = GF_EVENT_CHILD_UP; + } } else { gf_log(child_xlator->name, GF_LOG_INFO, "Not marking child %d up, " @@ -5992,7 +5998,10 @@ __afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator, if (child_latency_msec < 0) { /*set to INT64_MAX-1 so that it is found for best_down_child*/ - priv->child_latency[idx] = AFR_HALO_MAX_LATENCY; + priv->halo_child_up[idx] = 1; + if (priv->child_latency[idx] < 0) { + priv->child_latency[idx] = AFR_HALO_MAX_LATENCY; + } } /* @@ -6081,6 +6090,7 @@ __afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx, */ if (child_latency_msec < 0) { priv->child_latency[idx] = child_latency_msec; + priv->halo_child_up[idx] = 0; } priv->child_up[idx] = 0; @@ -6661,6 +6671,7 @@ afr_priv_destroy(afr_private_t *priv) GF_FREE(priv->pending_key); GF_FREE(priv->children); GF_FREE(priv->child_up); + GF_FREE(priv->halo_child_up); GF_FREE(priv->child_latency); LOCK_DESTROY(&priv->lock); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index ec7aa226821..a38489d9932 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -556,8 +556,10 @@ init(xlator_t *this) priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count, gf_afr_mt_child_latency_t); + priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count, + gf_afr_mt_char); - if (!priv->child_up || !priv->child_latency) { + if (!priv->child_up || !priv->child_latency || !priv->halo_child_up) { ret = -ENOMEM; goto out; } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 88456562610..7f50a27e6c9 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -191,6 +191,7 @@ typedef struct _afr_private { struct list_head ta_onwireq; unsigned char *child_up; + unsigned char *halo_child_up; int64_t *child_latency; unsigned char *local; |