From 7a6ead5c03e9f62fe8726b141c94cc7d31a79c39 Mon Sep 17 00:00:00 2001
From: Richard Wareing <rwareing@fb.com>
Date: Mon, 8 Sep 2014 16:06:11 -0700
Subject: Add halo-min-samples option, better swap logic, edge case fixes

Summary:
- Changes halo-decision to be based on the lowest halo value observed
- Adds halo-min-sample option to wait until N latency samples have been
  gathered prior to activating halos.
- Fixed 3 edge cases where halo's weren't being correctly
  config'd, or not configured as quickly as is possible.  Namely:
  1. Don't mark a child down if there's no better alternative (and you'd
  no longer satisfy min/max replicas); fixes unneccessary flapping.
  2. If a child goes down and this causes us to fall below max_replicas,
  swap in a warm child immediately if it is within our halo latency
  (don't wait around for the next "ping"); swaps in a new child
  immediately helping with resiliency.
  3. If the child latency is within the halo, and it's currently marked
  up, mark it down if it's the highest latency child and the number of
  children is > max_replicas; this will allow us to support the
  SHD use-case where we can "beam" a single copy to a geo and have it
  replicate within the geo after that.
- More commenting

Test Plan:
- Run halo prove tests
- Pointed compiled code at gfsglobal.prn2, tested out an NFS daemon and
  FUSE mounts to ensure they worked as expected on a large scale
  cluster.

Reviewers: dph, jackl, cjh, mmckeen

Reviewed By: mmckeen

FB-commit-id: 7e2e8ae6b8ec62a5e0b31c9fd6100c81795b3424

Change-Id: Iba2b2f1bc848b4546cb96117ff1895f83953a4f8
Signed-off-by: Kevin Vigor <kvigor@fb.com>
Reviewed-on: http://review.gluster.org/16304
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Shreyas Siravara <sshreyas@fb.com>
---
 tests/basic/halo-failover-disabled.t | 6 +++---
 tests/basic/halo.t                   | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'tests/basic')

diff --git a/tests/basic/halo-failover-disabled.t b/tests/basic/halo-failover-disabled.t
index 05ccd7e822a..31a1d166404 100644
--- a/tests/basic/halo-failover-disabled.t
+++ b/tests/basic/halo-failover-disabled.t
@@ -25,6 +25,7 @@ TEST $CLI volume set $V0 cluster.halo-enabled True
 TEST $CLI volume set $V0 cluster.halo-max-latency 9999
 TEST $CLI volume set $V0 cluster.halo-shd-max-latency 9999
 TEST $CLI volume set $V0 cluster.halo-max-replicas 2
+TEST $CLI volume set $V0 cluster.halo-min-samples 1
 TEST $CLI volume set $V0 cluster.halo-failover-enabled off
 TEST $CLI volume set $V0 cluster.quorum-type fixed
 TEST $CLI volume set $V0 cluster.quorum-count 2
@@ -44,9 +45,8 @@ TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0
 cd $M0
 
 # Write some data to the mount
-dd if=/dev/urandom of=$M0/test bs=1k count=200 oflag=sync &> /dev/null &
+TEST dd if=/dev/urandom of=$M0/test bs=1k count=200 conv=fsync
 
-sleep 0.5
 # Kill the first brick, fail-over to 3rd
 TEST kill_brick $V0 $H0 $B0/${V0}0
 
@@ -56,7 +56,7 @@ TEST kill_brick $V0 $H0 $B0/${V0}0
 # will not be fullfilled.  If we waited 1000 second the brick would
 # indeed be activated based on ping time, but for our test we want
 # the decision to be solely "down event" driven, not ping driven.
-TEST ! dd if=/dev/urandom of=$M0/test_rw bs=1M count=1
+TEST ! dd if=/dev/urandom of=$M0/test_rw bs=1M count=1 conv=fsync
 
 TEST $CLI volume start $V0 force
 sleep 2
diff --git a/tests/basic/halo.t b/tests/basic/halo.t
index 03fc0f88a19..25aca3442ab 100644
--- a/tests/basic/halo.t
+++ b/tests/basic/halo.t
@@ -23,6 +23,7 @@ TEST $CLI volume set $V0 cluster.background-self-heal-count 0
 TEST $CLI volume set $V0 cluster.shd-max-threads 1
 TEST $CLI volume set $V0 cluster.halo-enabled True
 TEST $CLI volume set $V0 cluster.halo-max-replicas 2
+TEST $CLI volume set $V0 cluster.halo-min-samples 1
 TEST $CLI volume set $V0 cluster.heal-timeout 5
 TEST $CLI volume set $V0 cluster.self-heal-daemon off
 TEST $CLI volume set $V0 cluster.eager-lock off
-- 
cgit