From bae348ef599463a974781083a5999991dbb8ecbb Mon Sep 17 00:00:00 2001 From: Raghavendra G Date: Mon, 23 Dec 2013 14:54:05 +0530 Subject: mgmt/glusterd: make sure quota enforcer has established connection with quotad before marking quota as enabled. without this patch there is a window of time when quota is marked as enabled in quota-enforcer, but connection to quotad wouldn't have been established. Any checklimit done during this period can result in a failed fop because of unavailability of quotad. Change-Id: I0d509fabc434dd55ce9ec59157123524197fcc80 Signed-off-by: Raghavendra G BUG: 969461 Signed-off-by: Raghavendra G Reviewed-on: http://review.gluster.org/6572 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur Reviewed-on: http://review.gluster.org/6820 --- rpc/rpc-transport/socket/src/socket.c | 28 ++++++++ tests/basic/quota-anon-fd-nfs.t | 84 ++++++++++++++++++++++ tests/basic/quota-nfs-anon.t | 46 ------------ tests/bugs/bug-1035576.t | 51 +++++++++++++ tests/bugs/bug-848251.t | 1 - tests/bugs/bug-990028.t | 1 - xlators/features/quota/src/quota-enforcer-client.c | 43 ++++++++++- xlators/features/quota/src/quota.c | 11 +-- xlators/mgmt/glusterd/src/glusterd-quota.c | 19 ++--- xlators/mgmt/glusterd/src/glusterd-utils.c | 19 +++-- 10 files changed, 231 insertions(+), 72 deletions(-) create mode 100755 tests/basic/quota-anon-fd-nfs.t delete mode 100644 tests/basic/quota-nfs-anon.t create mode 100644 tests/bugs/bug-1035576.t diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c index f9df4ac1d..efbc53162 100644 --- a/rpc/rpc-transport/socket/src/socket.c +++ b/rpc/rpc-transport/socket/src/socket.c @@ -3341,6 +3341,34 @@ reconfigure (rpc_transport_t *this, dict_t *options) priv->windowsize = (int)windowsize; + if (dict_get (this->options, "non-blocking-io")) { + optstr = data_to_str (dict_get (this->options, + "non-blocking-io")); + + if (gf_string2boolean (optstr, &tmp_bool) == -1) { + gf_log (this->name, GF_LOG_ERROR, + "'non-blocking-io' takes only boolean options," + " not taking any action"); + tmp_bool = 1; + } + + if (!tmp_bool) { + priv->bio = 1; + gf_log (this->name, GF_LOG_WARNING, + "disabling non-blocking IO"); + } + } + + if (!priv->bio) { + ret = __socket_nonblock (priv->sock); + if (ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "NBIO on %d failed (%s)", + priv->sock, strerror (errno)); + goto out; + } + } + ret = 0; out: return ret; diff --git a/tests/basic/quota-anon-fd-nfs.t b/tests/basic/quota-anon-fd-nfs.t new file mode 100755 index 000000000..be7bc35db --- /dev/null +++ b/tests/basic/quota-anon-fd-nfs.t @@ -0,0 +1,84 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../fileio.rc + +cleanup; + +TESTS_EXPECTED_IN_LOOP=16 +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +TEST $CLI volume create $V0 $H0:$B0/brick1; +EXPECT 'Created' volinfo_field $V0 'Status'; + + +# The test makes use of inode-lru-limit to hit a scenario, where we +# find an inode whose ancestry is not there. Following is the +# hypothesis (which is confirmed by seeing logs indicating that +# codepath has been executed, but not through a good understanding of +# NFS internals). + +# At the end of an fop, the reference count of an inode would be +# zero. The inode (and its ancestry) persists in memory only +# because of non-zero lookup count. These looked up inodes are put +# in an lru queue of size 1 (here). So, there can be at most one +# such inode in memory. + +# NFS Server makes use of anonymous fds. So, if it cannot find +# valid fd, it does a nameless lookup. This gives us an inode +# whose ancestry is NULL. When a write happens on this inode, +# quota-enforcer/marker finds a NULL ancestry and asks +# storage/posix to build it. + +TEST $CLI volume set $V0 network.inode-lru-limit 1 +TEST $CLI volume set $V0 performance.nfs.write-behind off + +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +TEST $CLI volume quota $V0 enable +TEST $CLI volume quota $V0 limit-usage / 1 + +TEST mount -t nfs -o noac,soft,nolock,vers=3 $H0:/$V0 $N0 +deep=/0/1/2/3/4/5/6/7/8/9 +TEST mkdir -p $N0/$deep + +TEST touch $N0/$deep/file1 $N0/$deep/file2 $N0/$deep/file3 $N0/$deep/file4 + +TEST fd_open 3 'w' "$N0/$deep/file1" +TEST fd_open 4 'w' "$N0/$deep/file2" +TEST fd_open 5 'w' "$N0/$deep/file3" +TEST fd_open 6 'w' "$N0/$deep/file4" + +# consume all quota +TEST ! dd if=/dev/zero of="$N0/$deep/file" bs=1MB count=1 + +# At the end of each fop in server, reference count of the +# inode associated with each of the file above drops to zero and hence +# put into lru queue. Since lru-limit is set to 1, an fop next file +# will displace the current inode from itable. This will ensure that +# when writes happens on same fd, fd resolution results in +# nameless lookup from server and quota_writev encounters an fd +# associated with an inode whose parent is not present in itable. + +for j in $(seq 1 2); do + for i in $(seq 3 6); do + # failing writes indicate that we are enforcing quota set on / + # even with anonymous fds. + TEST_IN_LOOP ! fd_write $i "content" + TEST_IN_LOOP sync + done +done + +exec 3>&- +exec 4>&- +exec 5>&- +exec 6>&- + +$CLI volume statedump $V0 all + +TEST umount -l $N0 + +cleanup; diff --git a/tests/basic/quota-nfs-anon.t b/tests/basic/quota-nfs-anon.t deleted file mode 100644 index 7b5ea5f28..000000000 --- a/tests/basic/quota-nfs-anon.t +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -. $(dirname $0)/../include.rc -. $(dirname $0)/../volume.rc - -cleanup; - -TEST glusterd -TEST $CLI volume create $V0 $H0:$B0/${V0}{1} - -function volinfo_field() -{ - local vol=$1; - local field=$2; - - $CLI volume info $vol | grep "^$field: " | sed 's/.*: //'; -} - - -## Verify volume is is created -EXPECT "$V0" volinfo_field $V0 'Volume Name'; -EXPECT 'Created' volinfo_field $V0 'Status'; - - -## Start volume and verify -TEST $CLI volume start $V0; -EXPECT 'Started' volinfo_field $V0 'Status'; - -TEST $CLI volume quota $V0 enable; - -## Mount NFS -TEST mount -t nfs -o nolock,soft,intr $H0:/$V0 $N0; -mkdir -p $N0/0/1 -TEST $CLI volume quota $V0 limit-usage /0/1 1GB 75%; - -deep=/0/1/2/3/4/5/6/7/8/9 -mkdir -p $N0/$deep -dd if=/dev/zero of=$N0/$deep/file bs=1M count=502 & - -kill_brick $V0 $H0 $B0/${V0}{1} -kill -TERM $(get_nfs_pid) - -$CLI volume start $V0 force; - - -cleanup; diff --git a/tests/bugs/bug-1035576.t b/tests/bugs/bug-1035576.t new file mode 100644 index 000000000..52d93dd87 --- /dev/null +++ b/tests/bugs/bug-1035576.t @@ -0,0 +1,51 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +#This script tests that self-heal of limit-set xattr is happening on a directory +#but self-heal of quota.size xattr is not happening + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1} +TEST $CLI volume start $V0 +#Lets disable perf-xls so that lookup would reach afr +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.io-cache off +TEST $CLI volume set $V0 performance.write-behind off +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume set $V0 background-self-heal-count 0 +TEST $CLI volume set $V0 self-heal-daemon off +TEST $CLI volume quota $V0 enable + +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +cd $M0 +TEST mkdir $M0/a +TEST $CLI volume quota $V0 limit-usage /a 1GB +echo abc > $M0/a/f +$CLI volume start $V0 force +EXPECT_WITHIN 20 "1" afr_child_up_status $V0 0 +quota_limit_val1=$(get_hex_xattr trusted.glusterfs.quota.limit-set $B0/${V0}1/a) +quota_size_val1=$(get_hex_xattr trusted.glusterfs.quota.size $B0/${V0}1/a) + +#Trigger entry,metadata self-heal +TEST stat $M0/a +quota_limit_val0=$(get_hex_xattr trusted.glusterfs.quota.limit-set $B0/${V0}0/a) +quota_size_val0=$(get_hex_xattr trusted.glusterfs.quota.size $B0/${V0}0/a) + +#Test that limit-set xattr is healed +TEST [ $quota_limit_val0 == $quota_limit_val1 ] + +#Only entry, metadata self-heal is done quota size value should not be same +TEST [ $quota_size_val0 != $quota_size_val1 ] +TEST stat $M0/a/f + +#Now that data self-heal is done quota size value should be same +quota_size_val0=$(get_hex_xattr trusted.glusterfs.quota.size $B0/${V0}0/a) +TEST [ $quota_size_val0 == $quota_size_val1 ] +cleanup diff --git a/tests/bugs/bug-848251.t b/tests/bugs/bug-848251.t index 844162283..a5c80b1b4 100644 --- a/tests/bugs/bug-848251.t +++ b/tests/bugs/bug-848251.t @@ -19,7 +19,6 @@ TEST $CLI volume quota $V0 enable; TEST MOUNTDIR="/tmp/$RANDOM" TEST mkdir $MOUNTDIR TEST glusterfs -s $H0 --volfile-id=$V0 $MOUNTDIR -sleep 10 function set_quota(){ mkdir "$MOUNTDIR/$name" diff --git a/tests/bugs/bug-990028.t b/tests/bugs/bug-990028.t index ece7235cd..fbf4175be 100755 --- a/tests/bugs/bug-990028.t +++ b/tests/bugs/bug-990028.t @@ -22,7 +22,6 @@ function __init() TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 TEST $CLI volume quota $V0 enable - sleep 15 } #CASE-1 diff --git a/xlators/features/quota/src/quota-enforcer-client.c b/xlators/features/quota/src/quota-enforcer-client.c index bfea5e420..7d8ab937d 100644 --- a/xlators/features/quota/src/quota-enforcer-client.c +++ b/xlators/features/quota/src/quota-enforcer-client.c @@ -295,6 +295,37 @@ quota_enforcer_notify (struct rpc_clnt *rpc, void *mydata, return ret; } +int +quota_enforcer_blocking_connect (rpc_clnt_t *rpc) +{ + dict_t *options = NULL; + int ret = -1; + + options = dict_new (); + if (options == NULL) + goto out; + + ret = dict_set_str (options, "non-blocking-io", "no"); + if (ret) + goto out; + + rpc->conn.trans->reconfigure (rpc->conn.trans, options); + + rpc_clnt_start (rpc); + + ret = dict_set_str (options, "non-blocking-io", "yes"); + if (ret) + goto out; + + rpc->conn.trans->reconfigure (rpc->conn.trans, options); + + ret = 0; +out: + dict_unref (options); + + return ret; +} + //Returns a started rpc_clnt. Creates a new rpc_clnt if quota_priv doesn't have //one already struct rpc_clnt * @@ -309,9 +340,13 @@ quota_enforcer_init (xlator_t *this, dict_t *options) gf_log (this->name, GF_LOG_TRACE, "quota enforcer clnt already " "inited"); //Turns out to be a NOP if the clnt is already connected. - rpc_clnt_start (priv->rpc_clnt); + ret = quota_enforcer_blocking_connect (priv->rpc_clnt); + if (ret) + goto out; + return priv->rpc_clnt; } + priv->quota_enforcer = "a_enforcer_clnt; ret = dict_set_str (options, "transport.address-family", "unix"); @@ -339,7 +374,11 @@ quota_enforcer_init (xlator_t *this, dict_t *options) goto out; } - rpc_clnt_start (rpc); + ret = quota_enforcer_blocking_connect (rpc); + if (ret) + goto out; + + ret = 0; out: if (ret) { if (rpc) diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c index f563873d0..2cac87082 100644 --- a/xlators/features/quota/src/quota.c +++ b/xlators/features/quota/src/quota.c @@ -4126,14 +4126,15 @@ err: int reconfigure (xlator_t *this, dict_t *options) { - int32_t ret = -1; - quota_priv_t *priv = NULL; + int32_t ret = -1; + quota_priv_t *priv = NULL; + gf_boolean_t quota_on = _gf_false; priv = this->private; GF_OPTION_RECONF ("deem-statfs", priv->consider_statfs, options, bool, out); - GF_OPTION_RECONF ("server-quota", priv->is_quota_on, options, bool, + GF_OPTION_RECONF ("server-quota", quota_on, options, bool, out); GF_OPTION_RECONF ("default-soft-limit", priv->default_soft_lim, options, percent, out); @@ -4144,7 +4145,7 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("hard-timeout", priv->hard_timeout, options, time, out); - if (priv->is_quota_on) { + if (quota_on) { priv->rpc_clnt = quota_enforcer_init (this, this->options); if (priv->rpc_clnt == NULL) { @@ -4165,6 +4166,8 @@ reconfigure (xlator_t *this, dict_t *options) } } + priv->is_quota_on = quota_on; + ret = 0; out: return ret; diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c index 979dac69b..67ff65a3a 100644 --- a/xlators/mgmt/glusterd/src/glusterd-quota.c +++ b/xlators/mgmt/glusterd/src/glusterd-quota.c @@ -996,14 +996,6 @@ glusterd_quotad_op (int opcode) ret = glusterd_check_generate_start_quotad (); break; - case GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT: - case GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT: - case GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT: - case GF_QUOTA_OPTION_TYPE_ALERT_TIME: - - ret = glusterd_reconfigure_quotad (); - break; - default: ret = 0; break; @@ -1131,6 +1123,12 @@ glusterd_op_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict) goto out; } + if (priv->op_version > GD_OP_VERSION_MIN) { + ret = glusterd_quotad_op (type); + if (ret) + goto out; + } + ret = glusterd_create_volfiles_and_notify_services (volinfo); if (ret) { gf_log (this->name, GF_LOG_ERROR, "Unable to re-create " @@ -1151,11 +1149,6 @@ glusterd_op_quota (dict_t *dict, char **op_errstr, dict_t *rsp_dict) if (rsp_dict && start_crawl == _gf_true) glusterd_quota_initiate_fs_crawl (priv, volname, type); - if (priv->op_version > GD_OP_VERSION_MIN) { - ret = glusterd_quotad_op (type); - if (ret) - goto out; - } ret = 0; out: return ret; diff --git a/xlators/mgmt/glusterd/src/glusterd-utils.c b/xlators/mgmt/glusterd/src/glusterd-utils.c index e2e363bc3..593bddfda 100644 --- a/xlators/mgmt/glusterd/src/glusterd-utils.c +++ b/xlators/mgmt/glusterd/src/glusterd-utils.c @@ -3967,7 +3967,7 @@ glusterd_nodesvc_disconnect (char *server) } int32_t -glusterd_nodesvc_start (char *server) +glusterd_nodesvc_start (char *server, gf_boolean_t wait) { int32_t ret = -1; xlator_t *this = NULL; @@ -4053,7 +4053,16 @@ glusterd_nodesvc_start (char *server) runner_log (&runner, "", GF_LOG_DEBUG, "Starting the nfs/glustershd services"); - ret = runner_run_nowait (&runner); + if (!wait) { + ret = runner_run_nowait (&runner); + } else { + synclock_unlock (&priv->big_lock); + { + ret = runner_run (&runner); + } + synclock_lock (&priv->big_lock); + } + if (ret == 0) { glusterd_nodesvc_connect (server, sockfpath); } @@ -4064,19 +4073,19 @@ out: int glusterd_nfs_server_start () { - return glusterd_nodesvc_start ("nfs"); + return glusterd_nodesvc_start ("nfs", _gf_false); } int glusterd_shd_start () { - return glusterd_nodesvc_start ("glustershd"); + return glusterd_nodesvc_start ("glustershd", _gf_false); } int glusterd_quotad_start () { - return glusterd_nodesvc_start ("quotad"); + return glusterd_nodesvc_start ("quotad", _gf_true); } gf_boolean_t -- cgit