diff options
author | Krutika Dhananjay <kdhananj@redhat.com> | 2014-05-21 17:47:03 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2014-06-02 03:56:46 -0700 |
commit | db022ef7ecca77cbecbcc4c046b6d3aafd2cb86f (patch) | |
tree | 0abb9a8bfb99ef814d1f994a60826d2a1c63b64c | |
parent | 0d26de1b0a4a0fbf7e33dab8bc46c38c179e8645 (diff) |
cluster/dht: Fix min-free-disk calculations when quota-deem-statfs is on
PROBLEM:
As part of file creation, DHT sends a statfs call to all of its
sub-volumes and expects in return the local space consumption and
availability on each one of them. This information is used by DHT to
ensure that atleast min-free-disk amount of space is left on each
sub-volume in the event that there ARE other sub-volumes with more
space available.
But when quota-deem-statfs is enabled, quota xlator on every brick
unwinds the statfs call with volume-wide consumption of disk space.
This leads to miscalculation in min-free-disk algo, thereby misleading
DHT at some point, into thinking all sub-volumes have equal available
space, in which case DHT keeps sending new file creates to subvol-0,
causing it to become 100% full at some point although there ARE other
subvols with ample space available.
FIX:
The fix is to make quota_statfs() behave as if quota xlator weren't
enabled, thereby making every brick return only its local consumption
and disk space availability.
Change-Id: I211371a1eddb220037bd36a128973938ea8124c2
BUG: 1099890
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Reviewed-on: http://review.gluster.org/7845
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r-- | libglusterfs/src/glusterfs.h | 1 | ||||
-rw-r--r-- | tests/bugs/bug-1099890.t | 120 | ||||
-rw-r--r-- | tests/dht.rc | 19 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-diskusage.c | 16 | ||||
-rw-r--r-- | xlators/features/quota/src/quota.c | 16 |
5 files changed, 167 insertions, 5 deletions
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 93d7260cbe9..aad23158a1d 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -89,6 +89,7 @@ #define GF_XATTR_USER_PATHINFO_KEY "glusterfs.pathinfo" #define QUOTA_LIMIT_KEY "trusted.glusterfs.quota.limit-set" #define VIRTUAL_QUOTA_XATTR_CLEANUP_KEY "glusterfs.quota-xattr-cleanup" +#define GF_INTERNAL_IGNORE_DEEM_STATFS "ignore-deem-statfs" #define GF_READDIR_SKIP_DIRS "readdir-filter-directories" diff --git a/tests/bugs/bug-1099890.t b/tests/bugs/bug-1099890.t new file mode 100644 index 00000000000..1c07b62c02f --- /dev/null +++ b/tests/bugs/bug-1099890.t @@ -0,0 +1,120 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc +. $(dirname $0)/../dht.rc + +## TO-DO: Fix the following once the dht du refresh interval issue is fixed: +## 1. Do away with sleep(1). +## 2. Do away with creation of empty files. + +cleanup; + +TEST glusterd; +TEST pidof glusterd; + +# Create 2 loop devices, one per brick. +TEST truncate -s 100M $B0/brick1 +TEST truncate -s 100M $B0/brick2 + +TEST L1=`losetup --find --show $B0/brick1` +TEST mkfs.xfs $L1 + +TEST L2=`losetup --find --show $B0/brick2` +TEST mkfs.xfs $L2 + +TEST mkdir -p $B0/${V0}{1,2} + +TEST mount -t xfs $L1 $B0/${V0}1 +TEST mount -t xfs $L2 $B0/${V0}2 + +# Create a plain distribute volume with 2 subvols. +TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}; + +TEST $CLI volume start $V0; +EXPECT "Started" volinfo_field $V0 'Status'; + +TEST $CLI volume quota $V0 enable; + +TEST $CLI volume set $V0 features.quota-deem-statfs on + +TEST $CLI volume quota $V0 limit-usage / 150MB; + +TEST $CLI volume set $V0 cluster.min-free-disk 50% + +TEST glusterfs -s $H0 --volfile-id=$V0 $M0 + +# Make sure quota-deem-statfs is working as expected +EXPECT "150M" echo `df -h $M0 -P | tail -1 | awk {'print $2'}` + +# Create a new file 'foo' under the root of the volume, which hashes to subvol-0 +# of DHT, that consumes 40M +TEST dd if=/dev/zero of=$M0/foo bs=5M count=8 + +TEST stat $B0/${V0}1/foo +TEST ! stat $B0/${V0}2/foo + +# Create a new file 'bar' under the root of the volume, which hashes to subvol-1 +# of DHT, that consumes 40M +TEST dd if=/dev/zero of=$M0/bar bs=5M count=8 + +TEST ! stat $B0/${V0}1/bar +TEST stat $B0/${V0}2/bar + +# Touch a zero-byte file on the root of the volume to make sure the statfs data +# on DHT is refreshed +sleep 1; +TEST touch $M0/empty1; + +# At this point, the available space on each subvol {60M,60M} is greater than +# their min-free-disk {50M,50M}, but if this bug still exists, then +# the total available space on the volume as perceived by DHT should be less +# than min-free-disk, i.e., +# +# consumed space returned per subvol by quota = (40M + 40M) = 80M +# +# Therefore, consumed space per subvol computed by DHT WITHOUT the fix would be: +# (80M/150M)*100 = 53% +# +# Available space per subvol as perceived by DHT with the bug = 47% +# which is less than min-free-disk + +# Now I create a file that hashes to subvol-1 (counting from 0) of DHT. +# If this bug still exists,then DHT should be routing this creation to subvol-0. +# If this bug is fixed, then DHT should be routing the creation to subvol-1 only +# as it has more than min-free-disk space available. + +TEST dd if=/dev/zero of=$M0/file bs=1K count=1 +sleep 1; +TEST ! stat $B0/${V0}1/file +TEST stat $B0/${V0}2/file + +# Touch another zero-byte file on the root of the volume to refresh statfs +# values stored by DHT. + +TEST touch $M0/empty2; + +# Now I create a new file that hashes to subvol-0, at the end of which, there +# will be less than min-free-disk space available on it. +TEST dd if=/dev/zero of=$M0/fil bs=5M count=4 +sleep 1; +TEST stat $B0/${V0}1/fil +TEST ! stat $B0/${V0}2/fil + +# Touch to refresh statfs info cached by DHT + +TEST touch $M0/empty3; + +# Now I create a file that hashes to subvol-0 but since it has less than +# min-free-disk space available, its data will be cached on subvol-1. + +TEST dd if=/dev/zero of=$M0/zz bs=5M count=1 + +TEST stat $B0/${V0}1/zz +TEST stat $B0/${V0}2/zz + +EXPECT "$V0-client-1" dht_get_linkto_target "$B0/${V0}1/zz" + +EXPECT "1" is_dht_linkfile "$B0/${V0}1/zz" + +cleanup; diff --git a/tests/dht.rc b/tests/dht.rc index a11bbfd8a97..50c4532617e 100644 --- a/tests/dht.rc +++ b/tests/dht.rc @@ -91,3 +91,22 @@ function remove_brick_completed() echo $val return $val } + +function dht_get_linkto_target() +{ + local path=$1; + echo `getfattr -d -m . -e text --only-values --absolute-names --name=trusted.glusterfs.dht.linkto $path` +} + +function is_dht_linkfile() +{ + local path=$1 + retval=0 + local output=`stat --format=%a $path` + if [ $output -eq 1000 ]; then + retval=1 + fi + + echo $retval + return $retval +} diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index fe3955ecbb7..d035aca2665 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -135,6 +135,7 @@ int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc) { int i = 0; + int ret = -1; dht_conf_t *conf = NULL; call_frame_t *statfs_frame = NULL; dht_local_t *statfs_local = NULL; @@ -164,12 +165,25 @@ dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc) goto err; } + statfs_local->params = dict_new (); + if (!statfs_local->params) + goto err; + + ret = dict_set_int8 (statfs_local->params, + GF_INTERNAL_IGNORE_DEEM_STATFS, 1); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "Failed to set " + GF_INTERNAL_IGNORE_DEEM_STATFS" in dict"); + goto err; + } + statfs_local->call_cnt = conf->subvolume_cnt; for (i = 0; i < conf->subvolume_cnt; i++) { STACK_WIND (statfs_frame, dht_du_info_cbk, conf->subvolumes[i], conf->subvolumes[i]->fops->statfs, - &tmp_loc, NULL); + &tmp_loc, statfs_local->params); } conf->last_stat_fetch.tv_sec = tv.tv_sec; diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c index c3e730bd1fd..06ef9615a19 100644 --- a/xlators/features/quota/src/quota.c +++ b/xlators/features/quota/src/quota.c @@ -3808,16 +3808,24 @@ out: int32_t quota_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - quota_local_t *local = NULL; - int op_errno = 0; - int ret = -1; - quota_priv_t *priv = NULL; + int op_errno = 0; + int ret = -1; + int8_t ignore_deem_statfs = 0; + quota_priv_t *priv = NULL; + quota_local_t *local = NULL; priv = this->private; GF_ASSERT (loc); WIND_IF_QUOTAOFF (priv->is_quota_on, off); + ret = dict_get_int8 (xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, + &ignore_deem_statfs); + ret = 0; + + if (ignore_deem_statfs) + goto off; + if (priv->consider_statfs && loc->inode) { local = quota_local_new (); if (!local) { |