diff options
| -rw-r--r-- | tests/basic/halo-failover-enabled.t | 46 | ||||
| -rw-r--r-- | tests/include.rc | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 39 |
3 files changed, 62 insertions, 25 deletions
diff --git a/tests/basic/halo-failover-enabled.t b/tests/basic/halo-failover-enabled.t index e897d076813..aa73589366f 100644 --- a/tests/basic/halo-failover-enabled.t +++ b/tests/basic/halo-failover-enabled.t @@ -24,6 +24,7 @@ TEST $CLI volume set $V0 cluster.shd-max-threads 1 TEST $CLI volume set $V0 cluster.halo-enabled True TEST $CLI volume set $V0 cluster.halo-failover-enabled on TEST $CLI volume set $V0 cluster.halo-max-replicas 2 +TEST $CLI volume set $V0 cluster.halo-min-samples 1 TEST $CLI volume set $V0 cluster.quorum-type fixed TEST $CLI volume set $V0 cluster.quorum-count 2 TEST $CLI volume set $V0 cluster.heal-timeout 5 @@ -34,36 +35,45 @@ TEST $CLI volume set $V0 cluster.self-heal-daemon on TEST $CLI volume set $V0 cluster.eager-lock off TEST $CLI volume set $V0 network.ping-timeout 20 TEST $CLI volume set $V0 cluster.choose-local off +TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG +TEST $CLI volume set $V0 diagnostics.brick-log-level DEBUG +TEST $CLI volume set $V0 nfs.log-level DEBUG TEST $CLI volume start $V0 TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 -cd $M0 # Write some data to the mount dd if=/dev/urandom of=$M0/test bs=1k count=200 conv=fsync -# Calulate the MD5s on the two up volumes. -MD5_B0=$(md5sum $B0/${V0}0/test | cut -d' ' -f1) -MD5_B1=$(md5sum $B0/${V0}1/test | cut -d' ' -f1) +KILL_IDX=$(cat /var/log/glusterfs/$M0LOG | grep "halo state: UP" | tail -n1 | grep -Eo "Child [0-9]+" | grep -Eo "[0-9]+") +TEST [ -n "$KILL_IDX" ] +# NB: UP_CHILDREN is the set of children that should be up after we kill +# the brick indicated by KILL_IDX, *not* the set of children which are +# currently up! +UP_CHILDREN=($(echo "0 1 2" | sed "s/${KILL_IDX}//g")) +UP1_HAS_TEST="$(ls $B0/${V0}${UP_CHILDREN[0]}/test 2>/dev/null)" +UP2_HAS_TEST="$(ls $B0/${V0}${UP_CHILDREN[1]}/test 2>/dev/null)" -# Verify they are the same -TEST [ "$MD5_B0" == "$MD5_B1" ] +# Of the bricks which will remain standing, there is only a single +# brick which has the file called test. If the both have the first +# test file, the test is invalid as all the bricks are up and the +# halo-max-replicas is not being honored; e.g. bug exists. +ONLY_ONE=$((([ -z "$UP2_HAS_TEST" ] || [ -z "$UP1_HAS_TEST" ]) && + ([ -n "$UP2_HAS_TEST" ] || [ -n "$UP1_HAS_TEST" ])) && echo true) +TEST [ "x$ONLY_ONE" == "xtrue" ] -sleep 0.5 -# Kill the first brick, fail-over to 3rd -TEST kill_brick $V0 $H0 $B0/${V0}0 +echo "Failing child ${KILL_IDX}..." +TEST kill_brick $V0 $H0 $B0/${V0}${KILL_IDX} # Test the mount is still RW (i.e. quorum works) -TEST dd if=/dev/urandom of=$M0/test_rw bs=1M count=1 conv=fsync +TEST dd if=/dev/urandom of=$M0/test_failover bs=1M count=1 conv=fsync # Calulate the MD5s -MD5_B0=$(md5sum $B0/${V0}0/test_rw | cut -d' ' -f1) -MD5_B1=$(md5sum $B0/${V0}1/test_rw | cut -d' ' -f1) -MD5_B2=$(md5sum $B0/${V0}2/test_rw | cut -d' ' -f1) +MD5_UP1=$(md5sum $B0/${V0}${UP_CHILDREN[0]}/test_failover | cut -d' ' -f1) +MD5_UP2=$(md5sum $B0/${V0}${UP_CHILDREN[1]}/test_failover | cut -d' ' -f1) -# Verify they are the same -TEST [ x"$MD5_B1" == x"$MD5_B2" ] - -# Verify the failed brick has a different MD5 -TEST [ x"$MD5_B0" != x"$MD5_B1" ] +# Verify the two up bricks have identical MD5s, if both are identical +# then we must have successfully failed-over to the brick which was +# previously proven to be down (via the ONLY_ONE test). +TEST [ "$MD5_UP1" == "$MD5_UP2" ] cleanup diff --git a/tests/include.rc b/tests/include.rc index e13bbacc392..d1acbee5995 100644 --- a/tests/include.rc +++ b/tests/include.rc @@ -18,6 +18,8 @@ META_MNT=${META_MNT:=/var/run/gluster/shared_storage}; # Mount point of shared g CC=cc OSTYPE=$(uname -s) +M0LOG=${M0LOG:="mnt-glusterfs-0.log"}; # Log file for 0th FUSE mount point + ENV_RC=$(dirname $0)/../env.rc if [ ! -f $ENV_RC ]; then ENV_RC=$(dirname $0)/../../env.rc diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 8e46117b025..c4b6fd6a9b6 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -4253,6 +4253,21 @@ find_worst_up_child (xlator_t *this) return worst_child; } +static void dump_halo_states (xlator_t *this) { + afr_private_t *priv = NULL; + int i = -1; + + priv = this->private; + + for (i = 0; i < priv->child_count; i++) { + gf_log (this->name, GF_LOG_DEBUG, + "Child %d halo state: %s (%"PRIi64"ms)", + i, + priv->child_up[i] ? CHILD_UP_STR : CHILD_DOWN_STR, + priv->child_latency[i]); + } +} + static void _afr_handle_ping_event (xlator_t *this, xlator_t *child_xlator, const int idx, const int64_t halo_max_latency_msec, @@ -4264,7 +4279,6 @@ _afr_handle_ping_event (xlator_t *this, xlator_t *child_xlator, int up_children = 0; int best_down_child = 0; uint64_t latency_samples = 0; - char *child_state_str = NULL; priv = this->private; @@ -4276,13 +4290,7 @@ _afr_handle_ping_event (xlator_t *this, xlator_t *child_xlator, for (i = 0; i < priv->child_count; i++) { if (priv->child_up[i] == 1) { up_children++; - child_state_str = CHILD_UP_STR; - } else { - child_state_str = CHILD_DOWN_STR; } - gf_log (child_xlator->name, GF_LOG_DEBUG, - "Child %d halo state: %s (%"PRIi64"ms)", - i, child_state_str, priv->child_latency[i]); } /* Don't do anything until you have some minimum numbner of @@ -4340,6 +4348,7 @@ _afr_handle_ping_event (xlator_t *this, xlator_t *child_xlator, * Case 3: Child latency is within halo,and currently marked up, * mark it down if it's the highest latency child and the * number of up children is greater than halo_max_replicas. + * UNLESS you are an SHD in which case do nothing. */ } else if ((child_halo_enabled == _gf_true && *child_latency_msec <= halo_max_latency_msec) && @@ -4358,6 +4367,12 @@ _afr_handle_ping_event (xlator_t *this, xlator_t *child_xlator, *event = GF_EVENT_CHILD_DOWN; } } + + if (*event != GF_EVENT_CHILD_PING && + gf_log_get_loglevel () >= GF_LOG_DEBUG) { + gf_log (this->name, GF_LOG_DEBUG, "Initial halo states:"); + dump_halo_states (this); + } } void @@ -4457,6 +4472,11 @@ out: } priv->last_event[idx] = *event; + + if (gf_log_get_loglevel () >= GF_LOG_DEBUG) { + gf_log (this->name, GF_LOG_DEBUG, "New halo states:"); + dump_halo_states (this); + } } void @@ -4551,6 +4571,11 @@ _afr_handle_child_down_event (xlator_t *this, xlator_t *child_xlator, *event = GF_EVENT_CHILD_MODIFIED; } priv->last_event[idx] = *event; + + if (gf_log_get_loglevel () >= GF_LOG_DEBUG) { + gf_log (this->name, GF_LOG_DEBUG, "New halo states:"); + dump_halo_states (this); + } } int64_t |
