diff options
| -rw-r--r-- | tests/basic/halo-failover.t | 65 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 8 |
2 files changed, 69 insertions, 4 deletions
diff --git a/tests/basic/halo-failover.t b/tests/basic/halo-failover.t new file mode 100644 index 00000000000..220fa1f2207 --- /dev/null +++ b/tests/basic/halo-failover.t @@ -0,0 +1,65 @@ +#!/bin/bash +# +# Tests that fail-over works correctly for Halo Geo-replication +# +# 1. Create a volume @ 3x replication w/ halo + quorum enabled +# 2. Write some data, background it & fail a brick +# 3. The expected result is that the writes fail-over to the 3rd +# brick immediatelly, and md5s will show they are equal once +# the write completes. +# 4. The mount should also be RW after the brick is killed as +# quorum will be immediately restored by swapping in the +# other brick. +# +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume set $V0 cluster.background-self-heal-count 0 +TEST $CLI volume set $V0 cluster.shd-max-threads 1 +TEST $CLI volume set $V0 cluster.halo-enabled True +TEST $CLI volume set $V0 cluster.halo-max-replicas 2 +TEST $CLI volume set $V0 cluster.quorum-type fixed +TEST $CLI volume set $V0 cluster.quorum-count 2 +TEST $CLI volume set $V0 cluster.heal-timeout 5 +TEST $CLI volume set $V0 cluster.entry-self-heal on +TEST $CLI volume set $V0 cluster.data-self-heal on +TEST $CLI volume set $V0 cluster.metadata-self-heal on +TEST $CLI volume set $V0 cluster.self-heal-daemon on +TEST $CLI volume set $V0 cluster.eager-lock off +TEST $CLI volume set $V0 network.ping-timeout 20 +TEST $CLI volume set $V0 cluster.choose-local off +TEST $CLI volume start $V0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +cd $M0 + +# Write some data to the mount +dd if=/dev/urandom of=$M0/test bs=1k count=200 oflag=sync &> /dev/null & + +sleep 0.5 +# Kill the first brick, fail-over to 3rd +TEST kill_brick $V0 $H0 $B0/${V0}0 + +# Test the mount is still RW (i.e. quorum works) +TEST dd if=/dev/urandom of=$M0/test_rw bs=1M count=1 + +# Wait for the dd to finish +wait +sleep 3 + +# Calulate the MD5s +MD5_B0=$(md5sum $B0/${V0}0/test | cut -d' ' -f1) +MD5_B1=$(md5sum $B0/${V0}1/test | cut -d' ' -f1) +MD5_B2=$(md5sum $B0/${V0}2/test | cut -d' ' -f1) + +# Verify they are the same +TEST [ "$MD5_B1" == "$MD5_B2" ] + +# Verify the failed brick has a different MD5 +TEST [ x"$MD5_B0" != x"$MD5_B1" ] + +cleanup diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index c2e95953a7f..ed2c6414718 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -4211,8 +4211,8 @@ find_best_down_child (xlator_t *this) priv = this->private; for (i = 0; i < priv->child_count; i++) { - if (priv->child_up[i] && - priv->child_latency[i] >= 0.0 && + if (!priv->child_up[i] && + priv->child_latency[i] >= 0 && priv->child_latency[i] < best_latency) { best_child = i; best_latency = priv->child_latency[i]; @@ -4237,7 +4237,7 @@ find_worst_up_child (xlator_t *this) for (i = 0; i < priv->child_count; i++) { if (priv->child_up[i] && - priv->child_latency[i] >= 0.0 && + priv->child_latency[i] >= 0 && priv->child_latency[i] > worst_latency) { worst_child = i; worst_latency = priv->child_latency[i]; @@ -4510,7 +4510,7 @@ afr_notify (xlator_t *this, int32_t event, gf_boolean_t had_quorum = _gf_false; gf_boolean_t has_quorum = _gf_false; int64_t halo_max_latency_msec = 0; - int64_t child_latency_msec = 0; + int64_t child_latency_msec = -1; child_xlator = (xlator_t *)data; priv = this->private; |
