From 83803b4b2d70e9e6e16bb050d7ac8e49ba420893 Mon Sep 17 00:00:00 2001 From: Jeff Darcy Date: Tue, 31 Jan 2017 14:49:45 -0500 Subject: core: run many bricks within one glusterfsd process This patch adds support for multiple brick translator stacks running in a single brick server process. This reduces our per-brick memory usage by approximately 3x, and our appetite for TCP ports even more. It also creates potential to avoid process/thread thrashing, and to improve QoS by scheduling more carefully across the bricks, but realizing that potential will require further work. Multiplexing is controlled by the "cluster.brick-multiplex" global option. By default it's off, and bricks are started in separate processes as before. If multiplexing is enabled, then *compatible* bricks (mostly those with the same transport options) will be started in the same process. Backport of: > Change-Id: I45059454e51d6f4cbb29a4953359c09a408695cb > BUG: 1385758 > Reviewed-on: https://review.gluster.org/14763 Change-Id: I4bce9080f6c93d50171823298fdf920258317ee8 BUG: 1418091 Signed-off-by: Jeff Darcy Reviewed-on: https://review.gluster.org/16496 Smoke: Gluster Build System NetBSD-regression: NetBSD Build System CentOS-regression: Gluster Build System Reviewed-by: Shyamsundar Ranganathan --- tests/basic/afr/add-brick-self-heal.t | 2 +- tests/basic/afr/arbiter-add-brick.t | 2 +- tests/basic/afr/arbiter-mount.t | 4 ++-- tests/basic/afr/arbiter-remove-brick.t | 2 +- tests/basic/afr/arbiter-statfs.t | 2 +- tests/basic/afr/arbiter.t | 4 ++-- tests/basic/afr/client-side-heal.t | 10 +++++----- tests/basic/afr/data-self-heal.t | 2 +- tests/basic/afr/entry-self-heal.t | 2 +- tests/basic/afr/gfid-mismatch.t | 4 ++++ tests/basic/afr/gfid-self-heal.t | 2 +- tests/basic/afr/heal-quota.t | 2 +- tests/basic/afr/metadata-self-heal.t | 2 +- tests/basic/afr/quorum.t | 4 ++-- tests/basic/afr/replace-brick-self-heal.t | 2 +- tests/basic/afr/root-squash-self-heal.t | 2 +- tests/basic/afr/self-heald.t | 2 +- tests/basic/afr/split-brain-favorite-child-policy.t | 2 +- tests/basic/afr/split-brain-heal-info.t | 2 +- tests/basic/afr/split-brain-healing.t | 2 +- tests/basic/afr/split-brain-resolution.t | 2 +- 21 files changed, 31 insertions(+), 27 deletions(-) (limited to 'tests/basic/afr') diff --git a/tests/basic/afr/add-brick-self-heal.t b/tests/basic/afr/add-brick-self-heal.t index 748d36758e7..a904e22e2a5 100644 --- a/tests/basic/afr/add-brick-self-heal.t +++ b/tests/basic/afr/add-brick-self-heal.t @@ -12,7 +12,7 @@ TEST $CLI volume set $V0 cluster.metadata-self-heal off TEST $CLI volume set $V0 cluster.entry-self-heal off TEST $CLI volume set $V0 self-heal-daemon off -TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; # Create files for i in {1..5} diff --git a/tests/basic/afr/arbiter-add-brick.t b/tests/basic/afr/arbiter-add-brick.t index 69e13267ccd..c6fe18cec16 100644 --- a/tests/basic/afr/arbiter-add-brick.t +++ b/tests/basic/afr/arbiter-add-brick.t @@ -11,7 +11,7 @@ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} TEST $CLI volume set $V0 performance.stat-prefetch off TEST $CLI volume start $V0 TEST $CLI volume set $V0 self-heal-daemon off -TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; TEST mkdir $M0/dir1 TEST dd if=/dev/urandom of=$M0/file1 bs=1024 count=1 diff --git a/tests/basic/afr/arbiter-mount.t b/tests/basic/afr/arbiter-mount.t index 587e808863f..da99096f81f 100644 --- a/tests/basic/afr/arbiter-mount.t +++ b/tests/basic/afr/arbiter-mount.t @@ -22,7 +22,7 @@ TEST kill_brick $V0 $H0 $B0/${V0}1 # Doing `mount -t glusterfs $H0:$V0 $M0` fails right away but doesn't work on NetBSD # So check that stat fails instead. -TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 TEST ! stat $M0 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 @@ -34,7 +34,7 @@ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; -TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 TEST stat $M0 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 diff --git a/tests/basic/afr/arbiter-remove-brick.t b/tests/basic/afr/arbiter-remove-brick.t index 5a6daa95cfd..ec93c8758e4 100644 --- a/tests/basic/afr/arbiter-remove-brick.t +++ b/tests/basic/afr/arbiter-remove-brick.t @@ -11,7 +11,7 @@ TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2} EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks" TEST $CLI volume set $V0 performance.stat-prefetch off TEST $CLI volume start $V0 -TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; #syntax check for remove-brick. TEST ! $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}0 force diff --git a/tests/basic/afr/arbiter-statfs.t b/tests/basic/afr/arbiter-statfs.t index 7d136378f11..61cb9e1d04f 100644 --- a/tests/basic/afr/arbiter-statfs.t +++ b/tests/basic/afr/arbiter-statfs.t @@ -29,7 +29,7 @@ TEST MOUNT_LOOP $LO3 $B0/${V0}3 TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{1,2,3}; TEST $CLI volume start $V0 -TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0 +TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 free_space=$(df -P $M0 | tail -1 | awk '{ print $4}') TEST [ $free_space -gt 100000 ] TEST force_umount $M0 diff --git a/tests/basic/afr/arbiter.t b/tests/basic/afr/arbiter.t index 1abc940b095..7c92a9fe6c9 100644 --- a/tests/basic/afr/arbiter.t +++ b/tests/basic/afr/arbiter.t @@ -16,7 +16,7 @@ EXPECT 'Started' volinfo_field $V0 'Status' EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 -TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; TEST ! stat $M0/.meta/graphs/active/$V0-replicate-0/options/arbiter-count EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 TEST $CLI volume stop $V0 @@ -42,7 +42,7 @@ EXPECT 'Started' volinfo_field $V0 'Status' EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 -TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0; +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; TEST stat $M0/.meta/graphs/active/$V0-replicate-0/options/arbiter-count EXPECT "1" cat $M0/.meta/graphs/active/$V0-replicate-0/options/arbiter-count diff --git a/tests/basic/afr/client-side-heal.t b/tests/basic/afr/client-side-heal.t index d87f4b14063..eba7dc2b3c4 100755 --- a/tests/basic/afr/client-side-heal.t +++ b/tests/basic/afr/client-side-heal.t @@ -13,7 +13,7 @@ TEST $CLI volume set $V0 cluster.data-self-heal off TEST $CLI volume set $V0 cluster.metadata-self-heal off TEST $CLI volume start $V0 -TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; echo "some data" > $M0/datafile EXPECT 0 echo $? TEST touch $M0/mdatafile @@ -46,11 +46,11 @@ TEST ls $M0/mdatafile #To trigger inode refresh for sure, the volume is unmounted and mounted each time. #Check that data heal does not happen. EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 -TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; TEST cat $M0/datafile #Check that entry heal does not happen. EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 -TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; TEST ls $M0/dir #No heal must have happened @@ -68,12 +68,12 @@ EXPECT 7 get_pending_heal_count $V0 #Inode refresh must trigger data and entry heals. #To trigger inode refresh for sure, the volume is unmounted and mounted each time. EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 -TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; TEST cat $M0/datafile EXPECT_WITHIN $HEAL_TIMEOUT 6 get_pending_heal_count $V0 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 -TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; TEST ls $M0/dir EXPECT 5 get_pending_heal_count $V0 diff --git a/tests/basic/afr/data-self-heal.t b/tests/basic/afr/data-self-heal.t index 5db5d770b6f..0f417b4a0ba 100644 --- a/tests/basic/afr/data-self-heal.t +++ b/tests/basic/afr/data-self-heal.t @@ -77,7 +77,7 @@ TEST $CLI volume start $V0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0 EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1 -TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --entry-timeout=0 --attribute-timeout=0; +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; cd $M0 TEST touch pending-changelog biggest-file-source.txt biggest-file-more-prio-than-changelog.txt same-size-more-prio-to-changelog.txt size-and-witness-same.txt self-accusing-vs-source.txt self-accusing-both.txt self-accusing-vs-innocent.txt self-accusing-bigger-exists.txt size-more-prio-than-self-accused.txt v1-dirty.txt split-brain.txt split-brain-all-dirty.txt split-brain-with-dirty.txt diff --git a/tests/basic/afr/entry-self-heal.t b/tests/basic/afr/entry-self-heal.t index 337b9c59f84..3c900fdcf9a 100644 --- a/tests/basic/afr/entry-self-heal.t +++ b/tests/basic/afr/entry-self-heal.t @@ -81,7 +81,7 @@ TEST $CLI volume set $V0 performance.io-cache off TEST $CLI volume set $V0 performance.quick-read off TEST $CLI volume start $V0 -TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --use-readdirp=no +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0 cd $M0 #_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens #spb is split-brain, fool is all fool diff --git a/tests/basic/afr/gfid-mismatch.t b/tests/basic/afr/gfid-mismatch.t index c3399215569..fc15793cf5a 100644 --- a/tests/basic/afr/gfid-mismatch.t +++ b/tests/basic/afr/gfid-mismatch.t @@ -13,6 +13,10 @@ TEST $CLI volume set $V0 self-heal-daemon off TEST $CLI volume set $V0 stat-prefetch off TEST $CLI volume start $V0 TEST $CLI volume set $V0 cluster.background-self-heal-count 0 +# We can't count on brick0 getting a copy of the file immediately without this, +# because (especially with multiplexing) it might not have *come up* +# immediately. +TEST $CLI volume set $V0 cluster.quorum-type auto TEST $GFS --volfile-id=$V0 -s $H0 $M0; #Test diff --git a/tests/basic/afr/gfid-self-heal.t b/tests/basic/afr/gfid-self-heal.t index 0bc53de8a6f..b54edbcae85 100644 --- a/tests/basic/afr/gfid-self-heal.t +++ b/tests/basic/afr/gfid-self-heal.t @@ -15,7 +15,7 @@ TEST $CLI volume set $V0 nfs.disable on TEST touch $B0/${V0}{0,1}/{1,2,3,4} TEST $CLI volume start $V0 -TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 #Test that readdir returns entries even when no gfids are present EXPECT 4 echo $(ls $M0 | grep -v '^\.' | wc -l) sleep 2; diff --git a/tests/basic/afr/heal-quota.t b/tests/basic/afr/heal-quota.t index 2663906f9d5..96e23363da8 100644 --- a/tests/basic/afr/heal-quota.t +++ b/tests/basic/afr/heal-quota.t @@ -13,7 +13,7 @@ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} TEST $CLI volume set $V0 cluster.self-heal-daemon off TEST $CLI volume start $V0 -TEST glusterfs --attribute-timeout=0 --entry-timeout=0 --volfile-id=/$V0 --volfile-server=$H0 $M0; +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; TEST $CLI volume quota $V0 enable TEST $CLI volume quota $V0 limit-usage / 10MB TEST $CLI volume quota $V0 soft-timeout 0 diff --git a/tests/basic/afr/metadata-self-heal.t b/tests/basic/afr/metadata-self-heal.t index b88c16a93e1..275aecd2175 100644 --- a/tests/basic/afr/metadata-self-heal.t +++ b/tests/basic/afr/metadata-self-heal.t @@ -51,7 +51,7 @@ TEST glusterd TEST pidof glusterd TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1} TEST $CLI volume start $V0 -TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 cd $M0 TEST touch a diff --git a/tests/basic/afr/quorum.t b/tests/basic/afr/quorum.t index c105290445a..252e25468d7 100644 --- a/tests/basic/afr/quorum.t +++ b/tests/basic/afr/quorum.t @@ -19,7 +19,7 @@ TEST $CLI volume set $V0 performance.write-behind off TEST $CLI volume set $V0 performance.stat-prefetch off TEST $CLI volume set $V0 performance.read-ahead off TEST $CLI volume start $V0 -TEST $GFS -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable; +TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0; touch $M0/a echo abc > $M0/b @@ -75,7 +75,7 @@ TEST $CLI volume set $V0 performance.write-behind off TEST $CLI volume set $V0 performance.stat-prefetch off TEST $CLI volume set $V0 performance.read-ahead off TEST $CLI volume start $V0 -TEST $GFS -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable; +TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0; touch $M0/a echo abc > $M0/b diff --git a/tests/basic/afr/replace-brick-self-heal.t b/tests/basic/afr/replace-brick-self-heal.t index fef671a3875..a8c01a0f377 100644 --- a/tests/basic/afr/replace-brick-self-heal.t +++ b/tests/basic/afr/replace-brick-self-heal.t @@ -12,7 +12,7 @@ TEST $CLI volume set $V0 cluster.metadata-self-heal off TEST $CLI volume set $V0 cluster.entry-self-heal off TEST $CLI volume set $V0 self-heal-daemon off -TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0; +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; # Create files for i in {1..5} diff --git a/tests/basic/afr/root-squash-self-heal.t b/tests/basic/afr/root-squash-self-heal.t index ff0aa5cecb7..c4fab0a35b2 100644 --- a/tests/basic/afr/root-squash-self-heal.t +++ b/tests/basic/afr/root-squash-self-heal.t @@ -12,7 +12,7 @@ TEST $CLI volume set $V0 performance.stat-prefetch off TEST $CLI volume set $V0 self-heal-daemon off TEST $CLI volume set $V0 server.root-squash on TEST $CLI volume start $V0 -TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --no-root-squash=yes --use-readdirp=no +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --no-root-squash=yes --use-readdirp=no $M0 TEST kill_brick $V0 $H0 $B0/${V0}0 echo abc > $M0/a diff --git a/tests/basic/afr/self-heald.t b/tests/basic/afr/self-heald.t index a0906f97cee..24c82777921 100644 --- a/tests/basic/afr/self-heald.t +++ b/tests/basic/afr/self-heald.t @@ -50,7 +50,7 @@ TEST $CLI volume set $V0 cluster.background-self-heal-count 0 TEST $CLI volume set $V0 cluster.eager-lock off TEST $CLI volume set $V0 performance.flush-behind off TEST $CLI volume start $V0 -TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 decide_kill=$((`date +"%j"|sed 's/^0*//'` % 2 )) diff --git a/tests/basic/afr/split-brain-favorite-child-policy.t b/tests/basic/afr/split-brain-favorite-child-policy.t index 3df8e718bf0..0e321c6f095 100644 --- a/tests/basic/afr/split-brain-favorite-child-policy.t +++ b/tests/basic/afr/split-brain-favorite-child-policy.t @@ -17,7 +17,7 @@ TEST $CLI volume set $V0 cluster.entry-self-heal off TEST $CLI volume set $V0 cluster.data-self-heal off TEST $CLI volume set $V0 cluster.metadata-self-heal off TEST $CLI volume start $V0 -TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 TEST touch $M0/file ############ Healing using favorite-child-policy = ctime ################# diff --git a/tests/basic/afr/split-brain-heal-info.t b/tests/basic/afr/split-brain-heal-info.t index eabfbd0880a..66275c57207 100644 --- a/tests/basic/afr/split-brain-heal-info.t +++ b/tests/basic/afr/split-brain-heal-info.t @@ -20,7 +20,7 @@ TEST pidof glusterd TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} TEST $CLI volume start $V0 TEST $CLI volume set $V0 cluster.self-heal-daemon off -TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 TEST mkdir $M0/dspb TEST mkdir $M0/mspb diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t index c66bb5d44df..403d08faab3 100644 --- a/tests/basic/afr/split-brain-healing.t +++ b/tests/basic/afr/split-brain-healing.t @@ -35,7 +35,7 @@ TEST $CLI volume set $V0 cluster.data-self-heal off TEST $CLI volume set $V0 cluster.metadata-self-heal off TEST $CLI volume set $V0 cluster.entry-self-heal off TEST $CLI volume start $V0 -TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 cd $M0 for i in {1..10} diff --git a/tests/basic/afr/split-brain-resolution.t b/tests/basic/afr/split-brain-resolution.t index 84b2cc8db51..e75e15aaa97 100644 --- a/tests/basic/afr/split-brain-resolution.t +++ b/tests/basic/afr/split-brain-resolution.t @@ -16,7 +16,7 @@ TEST $CLI volume start $V0 #Disable self-heal-daemon TEST $CLI volume set $V0 cluster.self-heal-daemon off -TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0; TEST `echo "some-data" > $M0/data-split-brain.txt` TEST `echo "some-data" > $M0/metadata-split-brain.txt` -- cgit