From 8210ca1a5c0e78e91c6fab7df7e002e39660b706 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Sun, 10 Jan 2016 09:19:34 +0530 Subject: afr: Add throttled background client-side heals If a heal is needed after inode refresh (lookup, read_txn), launch it in the background instead of blocking the fop (that triggered refresh) until the heal happens. afr_replies_interpret() is modified such that the heal is launched only if atleast one sink brick is up. Max. no of heals that can happen in parallel is configurable via the 'background-self-heal-count' volume option. Any number greater than that is put in a wait queue whose length is configurable via 'heal-wait-queue-leng' volume option. If the wait queue is also full, further heals will be ignored. Default values: background-self-heal-count=8, heal-wait-queue-leng=128 Change-Id: I1d4a52814cdfd43d90591b6d2ad7b6219937ce70 BUG: 1297172 Signed-off-by: Ravishankar N Reviewed-on: http://review.gluster.org/13207 Smoke: Gluster Build System CentOS-regression: Gluster Build System Reviewed-by: Pranith Kumar Karampuri Tested-by: Pranith Kumar Karampuri NetBSD-regression: NetBSD Build System --- tests/bugs/glusterd/859927/repl.t | 4 ++- tests/bugs/quota/bug-1035576.t | 1 - tests/bugs/replicate/bug-802417.t | 5 ++-- tests/bugs/replicate/bug-977797.t | 52 +++++++++++++++++---------------------- 4 files changed, 28 insertions(+), 34 deletions(-) (limited to 'tests/bugs') diff --git a/tests/bugs/glusterd/859927/repl.t b/tests/bugs/glusterd/859927/repl.t index a500961165c..40e86029685 100755 --- a/tests/bugs/glusterd/859927/repl.t +++ b/tests/bugs/glusterd/859927/repl.t @@ -23,7 +23,6 @@ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}; TEST $CLI volume set $V0 cluster.self-heal-daemon off TEST $CLI volume set $V0 performance.stat-prefetch off TEST $CLI volume set $V0 client-log-level DEBUG -TEST $CLI volume set $V0 cluster.background-self-heal-count 0 TEST $CLI volume start $V0 TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0; @@ -34,6 +33,7 @@ EXPECT full volume_option $V0 cluster.data-self-heal-algorithm create_setup_for_self_heal $M0/a EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 cat $file 2>&1 > /dev/null +EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0 TEST cmp $B0/${V0}1/a $B0/${V0}2/a TEST $CLI volume set $V0 cluster.data-self-heal-algorithm diff @@ -41,12 +41,14 @@ EXPECT diff volume_option $V0 cluster.data-self-heal-algorithm create_setup_for_self_heal $M0/a EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 cat $file 2>&1 > /dev/null +EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0 TEST cmp $B0/${V0}1/a $B0/${V0}2/a TEST $CLI volume reset $V0 cluster.data-self-heal-algorithm create_setup_for_self_heal $M0/a EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 cat $file 2>&1 > /dev/null +EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0 TEST cmp $B0/${V0}1/a $B0/${V0}2/a TEST ! $CLI volume set $V0 cluster.data-self-heal-algorithm "" diff --git a/tests/bugs/quota/bug-1035576.t b/tests/bugs/quota/bug-1035576.t index e3d32d107d0..12f960c46c6 100644 --- a/tests/bugs/quota/bug-1035576.t +++ b/tests/bugs/quota/bug-1035576.t @@ -18,7 +18,6 @@ TEST $CLI volume set $V0 performance.io-cache off TEST $CLI volume set $V0 performance.write-behind off TEST $CLI volume set $V0 performance.stat-prefetch off TEST $CLI volume set $V0 performance.read-ahead off -TEST $CLI volume set $V0 background-self-heal-count 0 TEST $CLI volume set $V0 self-heal-daemon off TEST $CLI volume quota $V0 enable diff --git a/tests/bugs/replicate/bug-802417.t b/tests/bugs/replicate/bug-802417.t index df989b1470b..c5ba98b65fd 100755 --- a/tests/bugs/replicate/bug-802417.t +++ b/tests/bugs/replicate/bug-802417.t @@ -32,7 +32,6 @@ TEST $CLI volume set $V0 performance.stat-prefetch off ## Make sure automatic self-heal doesn't perturb our results. TEST $CLI volume set $V0 cluster.self-heal-daemon off TEST $CLI volume set $V0 cluster.data-self-heal on -TEST $CLI volume set $V0 cluster.background-self-heal-count 0 ## Start volume and verify TEST $CLI volume start $V0; @@ -70,8 +69,8 @@ tgt_xattr_2="trusted.afr.${V0}-client-2" actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_0) EXPECT "0x000000000000000000000000|^\$" echo $actual -actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_1) -EXPECT "0x000000000000000000000000|^\$" echo $actual +EXPECT_WITHIN $HEAL_TIMEOUT "0x000000000000000000000000" \ +afr_get_changelog_xattr $obs_path_0 $tgt_xattr_1 actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_2) EXPECT "0x000000030000000000000000" echo $actual diff --git a/tests/bugs/replicate/bug-977797.t b/tests/bugs/replicate/bug-977797.t index 3ff14ecf3d5..72c616ba68e 100755 --- a/tests/bugs/replicate/bug-977797.t +++ b/tests/bugs/replicate/bug-977797.t @@ -26,7 +26,6 @@ TEST $CLI volume set $V0 quick-read off TEST $CLI volume set $V0 read-ahead off TEST $CLI volume set $V0 write-behind off TEST $CLI volume set $V0 io-cache off -TEST $CLI volume set $V0 background-self-heal-count 0 TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 @@ -56,34 +55,29 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1; TEST dd if=$M0/a/file of=/dev/null bs=1024k -b1c0dir=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a \ - trusted.afr.$V0-client-0 "entry") -b1c1dir=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a \ - trusted.afr.$V0-client-1 "entry") -b2c0dir=$(afr_get_specific_changelog_xattr \ - $B0/$V0"2"/a trusted.afr.$V0-client-0 "entry") -b2c1dir=$(afr_get_specific_changelog_xattr \ - $B0/$V0"2"/a trusted.afr.$V0-client-1 "entry") - - -b1c0f=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a/file \ - trusted.afr.$V0-client-0 "data") -b1c1f=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a/file \ - trusted.afr.$V0-client-1 "data") -b2c0f=$(afr_get_specific_changelog_xattr $B0/$V0"2"/a/file \ - trusted.afr.$V0-client-0 "data") -b2c1f=$(afr_get_specific_changelog_xattr $B0/$V0"2"/a/file \ - trusted.afr.$V0-client-1 "data") - -EXPECT "00000000|^$" echo $b1c0f -EXPECT "00000000|^$" echo $b1c1f -EXPECT "00000000|^$" echo $b2c0f -EXPECT "00000000|^$" echo $b2c1f - -EXPECT "00000000|^$" echo $b1c0dir -EXPECT "00000000|^$" echo $b1c1dir -EXPECT "00000000|^$" echo $b2c0dir -EXPECT "00000000|^$" echo $b2c1dir +EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \ +afr_get_specific_changelog_xattr $B0/$V0"1"/a/file trusted.afr.$V0-client-0 "data" + +EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \ +afr_get_specific_changelog_xattr $B0/$V0"1"/a/file trusted.afr.$V0-client-1 "data" + +EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \ +afr_get_specific_changelog_xattr $B0/$V0"2"/a/file trusted.afr.$V0-client-0 "data" + +EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \ +afr_get_specific_changelog_xattr $B0/$V0"2"/a/file trusted.afr.$V0-client-1 "data" + +EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \ +afr_get_specific_changelog_xattr $B0/$V0"1"/a trusted.afr.$V0-client-0 "entry" + +EXPECT_WITHIN HEAL_TIMEOUT "00000000" \ +afr_get_specific_changelog_xattr $B0/$V0"1"/a trusted.afr.$V0-client-1 "entry" + +EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \ +afr_get_specific_changelog_xattr $B0/$V0"2"/a trusted.afr.$V0-client-0 "entry" + +EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \ +afr_get_specific_changelog_xattr $B0/$V0"2"/a trusted.afr.$V0-client-1 "entry" ## Finish up TEST $CLI volume stop $V0; -- cgit