From 2a46c8769bc2b6ad491a305ea1d38023d0e22617 Mon Sep 17 00:00:00 2001 From: Raghavendra Talur Date: Thu, 24 Jan 2013 11:26:37 +0530 Subject: cluster/dht: Correct min_free_disk behaviour Problem: Files were being created in subvol which had less than min_free_disk available even in the cases where other subvols with more space were available. Solution: Changed the logic to look for subvol which has more space available. In cases where all the subvols have lesser than Min_free_disk available , the one with max space and atleast one inode is available. Known Issue: Cannot ensure that first file that is created right after min-free-value is crossed on a brick will get created in other brick because disk usage stat takes some time to update in glusterprocess. Will fix that as part of another bug. Change-Id: If3ae0bf5a44f8739ce35b3ee3f191009ddd44455 BUG: 858488 Signed-off-by: Raghavendra Talur Reviewed-on: http://review.gluster.org/4420 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- tests/bugs/bug-858488-min-free-disk.t | 114 ++++++++++++++++++++++++++++++++ tests/include.rc | 15 +++++ xlators/cluster/dht/src/dht-common.h | 4 ++ xlators/cluster/dht/src/dht-diskusage.c | 112 +++++++++++++++++++++++-------- 4 files changed, 218 insertions(+), 27 deletions(-) create mode 100644 tests/bugs/bug-858488-min-free-disk.t diff --git a/tests/bugs/bug-858488-min-free-disk.t b/tests/bugs/bug-858488-min-free-disk.t new file mode 100644 index 000000000..43ef1496b --- /dev/null +++ b/tests/bugs/bug-858488-min-free-disk.t @@ -0,0 +1,114 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +function pidgrep() +{ + ps ax | grep "$1" | awk '{print $1}' | head -1 +} + +## Start glusterd +TEST glusterd; +TEST pidof glusterd; +TEST $CLI volume info; + +## Lets create partitions for bricks +TEST truncate -s 100M $B0/brick1 +TEST truncate -s 200M $B0/brick2 +TEST LO1=`losetup --find --show $B0/brick1` +TEST mkfs.xfs $LO1 +TEST LO2=`losetup --find --show $B0/brick2` +TEST mkfs.xfs $LO2 +TEST mkdir -p $B0/${V0}1 $B0/${V0}2 +TEST mount -t xfs $LO1 $B0/${V0}1 +TEST mount -t xfs $LO2 $B0/${V0}2 + + +## Lets create volume +TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}; + +## Verify volume is created +EXPECT "$V0" volinfo_field $V0 'Volume Name'; +EXPECT 'Created' volinfo_field $V0 'Status'; + +## Start volume and verify +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; +TEST glusterfs -s $H0 --volfile-id=$V0 --acl $M0 +MOUNT_PID=`ps ax |grep "glusterfs -s $H0 --volfile-id=$V0 --acl $M0" | awk '{print $1}' | head -1` +## Real test starts here +## ---------------------------------------------------------------------------- + +MINFREEDISKVALUE=90 + +## Set min free disk to MINFREEDISKVALUE percent +TEST $CLI volume set $V0 cluster.min-free-disk $MINFREEDISKVALUE + +## We need to have file name to brick map based on hash. +## We will use this info in test case 0. +i=1 +CONTINUE=2 +BRICK1FILE=0 +BRICK2FILE=0 +while [[ $CONTINUE -ne 0 ]] +do + dd if=/dev/zero of=$M0/file$i.data bs=1024 count=1024 1>/dev/null 2>&1 + + if [[ -e $B0/${V0}1/file$i.data && $BRICK1FILE = "0" ]] + then + BRICK1FILE=file$i.data + CONTINUE=$CONTINUE-1 + fi + + if [[ -e $B0/${V0}2/file$i.data && $BRICK2FILE = "0" ]] + then + BRICK2FILE=file$i.data + CONTINUE=$CONTINUE-1 + fi + + rm $M0/file$i.data + let i++ +done + + +## Bring free space on one of the bricks to less than minfree value by +## creating one big file. +dd if=/dev/zero of=$M0/fillonebrick.data bs=1024 count=25600 1>/dev/null 2>&1 + +#Lets find out where it was created +if [ -f $B0/${V0}1/fillonebrick.data ] +then + FILETOCREATE=$BRICK1FILE + OTHERBRICK=$B0/${V0}2 +else + FILETOCREATE=$BRICK2FILE + OTHERBRICK=$B0/${V0}1 +fi + +##--------------------------------TEST CASE 0----------------------------------- +## If we try to create a file which should go into full brick as per hash, it +## should go into the other brick instead. + +## Before that let us create files just to make gluster refresh the stat +## Using touch so it should not change the disk usage stats +for k in {1..20}; +do + touch $M0/dummyfile$k +done + +dd if=/dev/zero of=$M0/$FILETOCREATE bs=1024 count=2048 1>/dev/null 2>&1 +TEST [ -e $OTHERBRICK/$FILETOCREATE ] + +## Done testing, lets clean up +EXPECT "$MOUNT_PID" pidgrep $MOUNT_PID +TEST rm -rf $M0/* + +## Finish up +TEST $CLI volume stop $V0; +EXPECT 'Stopped' volinfo_field $V0 'Status'; +$CLI volume delete $V0; + +cleanup; diff --git a/tests/include.rc b/tests/include.rc index 1d1386f98..5d9c96cea 100644 --- a/tests/include.rc +++ b/tests/include.rc @@ -168,6 +168,21 @@ function cleanup() { killall -15 glusterfs glusterfsd glusterd 2>/dev/null || true; killall -9 glusterfs glusterfsd glusterd 2>/dev/null || true; + + MOUNTPOINTS=`mount | grep "$B0/" | awk '{print $3}'` + for m in $MOUNTPOINTS; + do + umount $m + done + + + LOOPDEVICES=`losetup -a | grep "$B0/" | awk '{print $1}' | tr -d :` + for l in $LOOPDEVICES; + do + losetup -d $l + done + + rm -rf /var/lib/glusterd/* $B0/* /etc/glusterd/*; umount -l $M0 2>/dev/null || true; diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 764b37ac4..0dd654650 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -724,4 +724,8 @@ int dht_dir_has_layout (dict_t *xattr); gf_boolean_t dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator); +xlator_t * +dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol); +xlator_t * +dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol); #endif/* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 52ea3a32a..0c87f4a64 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -248,12 +248,11 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol) return is_subvol_filled; } + +/*Get the best subvolume to create the file in*/ xlator_t * dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol) { - int i = 0; - double max = 0; - double max_inodes = 0; xlator_t *avail_subvol = NULL; dht_conf_t *conf = NULL; @@ -261,37 +260,96 @@ dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol) LOCK (&conf->subvolume_lock); { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->disk_unit == 'p') { - if ((conf->du_stats[i].avail_percent > max) - && (conf->du_stats[i].avail_inodes > max_inodes)) { - max = conf->du_stats[i].avail_percent; - max_inodes = conf->du_stats[i].avail_inodes; - avail_subvol = conf->subvolumes[i]; - } - } else { - if ((conf->du_stats[i].avail_space > max) - && (conf->du_stats[i].avail_inodes > max_inodes)) { - max = conf->du_stats[i].avail_space; - max_inodes = conf->du_stats[i].avail_inodes; - avail_subvol = conf->subvolumes[i]; - } + avail_subvol = dht_subvol_with_free_space_inodes(this, subvol); + if(!avail_subvol) + { + avail_subvol = dht_subvol_maxspace_nonzeroinode(this, + subvol); + } - } - } } UNLOCK (&conf->subvolume_lock); if (!avail_subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no subvolume has enough free space and inodes to create"); + gf_log (this->name, + GF_LOG_DEBUG, + "no subvolume has enough free space and/or inodes\ + to create"); + avail_subvol = subvol; } - if ((max < conf->min_free_disk) && (max_inodes < conf->min_free_inodes)) - avail_subvol = subvol; - - if (!avail_subvol) - avail_subvol = subvol; return avail_subvol; } + +/*Get subvolume which has both space and inodes more than the min criteria*/ +xlator_t * +dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol) +{ + int i = 0; + double max = 0; + double max_inodes = 0; + + xlator_t *avail_subvol = NULL; + dht_conf_t *conf = NULL; + + conf = this->private; + + for(i=0; i < conf->subvolume_cnt; i++) { + if ((conf->disk_unit == 'p') && + (conf->du_stats[i].avail_percent > conf->min_free_disk) && + (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) { + if ((conf->du_stats[i].avail_inodes > max_inodes) || + (conf->du_stats[i].avail_percent > max)) { + max = conf->du_stats[i].avail_percent; + max_inodes = conf->du_stats[i].avail_inodes; + avail_subvol = conf->subvolumes[i]; + } + } + + if ((conf->disk_unit != 'p') && + (conf->du_stats[i].avail_space > conf->min_free_disk) && + (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) { + if ((conf->du_stats[i].avail_inodes > max_inodes) || + (conf->du_stats[i].avail_space > max)) { + max = conf->du_stats[i].avail_space; + max_inodes = conf->du_stats[i].avail_inodes; + avail_subvol = conf->subvolumes[i]; + } + } + } + + return avail_subvol; +} + + +/* Get subvol which has atleast one inode and maximum space */ +xlator_t * +dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol) +{ + int i = 0; + double max = 0; + + xlator_t *avail_subvol = NULL; + dht_conf_t *conf = NULL; + + conf = this->private; + + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->disk_unit == 'p') { + if ((conf->du_stats[i].avail_percent > max) + && (conf->du_stats[i].avail_inodes > 0 )) { + max = conf->du_stats[i].avail_percent; + avail_subvol = conf->subvolumes[i]; + } + } else { + if ((conf->du_stats[i].avail_space > max) + && (conf->du_stats[i].avail_inodes > 0)) { + max = conf->du_stats[i].avail_space; + avail_subvol = conf->subvolumes[i]; + } + } + } + + return avail_subvol; +} -- cgit