diff options
| author | Raghavendra Talur <rtalur@redhat.com> | 2013-01-24 11:26:37 +0530 | 
|---|---|---|
| committer | Anand Avati <avati@redhat.com> | 2013-02-04 08:43:50 -0800 | 
| commit | 2a46c8769bc2b6ad491a305ea1d38023d0e22617 (patch) | |
| tree | 1c4f5ef50b148c13b5f00bbcca2195578ef8c08a | |
| parent | 50f0882051dff81882115bf72abb67577099944f (diff) | |
cluster/dht: Correct min_free_disk behaviour
Problem:
Files were being created in subvol which had less than
min_free_disk available even in the cases where other
subvols with more space were available.
Solution:
Changed the logic to look for subvol which has more
space available.
In cases where all the subvols have lesser than
Min_free_disk available , the one with max space and
atleast one inode is available.
Known Issue: Cannot ensure that first file that is
created right after min-free-value is crossed on a
brick will get created in other brick because disk
usage stat takes some time to update in glusterprocess.
Will fix that as part of another bug.
Change-Id: If3ae0bf5a44f8739ce35b3ee3f191009ddd44455
BUG: 858488
Signed-off-by: Raghavendra Talur <rtalur@redhat.com>
Reviewed-on: http://review.gluster.org/4420
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
| -rw-r--r-- | tests/bugs/bug-858488-min-free-disk.t | 114 | ||||
| -rw-r--r-- | tests/include.rc | 15 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 4 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-diskusage.c | 112 | 
4 files changed, 218 insertions, 27 deletions
diff --git a/tests/bugs/bug-858488-min-free-disk.t b/tests/bugs/bug-858488-min-free-disk.t new file mode 100644 index 00000000000..43ef1496ba1 --- /dev/null +++ b/tests/bugs/bug-858488-min-free-disk.t @@ -0,0 +1,114 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +cleanup; + +function pidgrep() +{ +    ps ax | grep "$1" | awk '{print $1}' | head -1 +} + +## Start glusterd +TEST glusterd; +TEST pidof glusterd; +TEST $CLI volume info; + +## Lets create partitions for bricks +TEST truncate -s 100M $B0/brick1 +TEST truncate -s 200M $B0/brick2 +TEST LO1=`losetup --find --show $B0/brick1` +TEST mkfs.xfs $LO1 +TEST LO2=`losetup --find --show $B0/brick2` +TEST mkfs.xfs $LO2 +TEST mkdir -p $B0/${V0}1 $B0/${V0}2 +TEST mount -t xfs $LO1 $B0/${V0}1 +TEST mount -t xfs $LO2 $B0/${V0}2 + + +## Lets create volume +TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}; + +## Verify volume is created +EXPECT "$V0" volinfo_field $V0 'Volume Name'; +EXPECT 'Created' volinfo_field $V0 'Status'; + +## Start volume and verify +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; +TEST glusterfs -s $H0 --volfile-id=$V0 --acl $M0 +MOUNT_PID=`ps ax |grep "glusterfs -s $H0 --volfile-id=$V0 --acl $M0" | awk '{print $1}' | head -1` +## Real test starts here +## ---------------------------------------------------------------------------- + +MINFREEDISKVALUE=90 + +## Set min free disk to MINFREEDISKVALUE percent +TEST $CLI volume set $V0 cluster.min-free-disk $MINFREEDISKVALUE + +## We need to have file name to brick map based on hash. +## We will use this info in test case 0. +i=1 +CONTINUE=2 +BRICK1FILE=0 +BRICK2FILE=0 +while [[ $CONTINUE -ne 0 ]] +do +        dd if=/dev/zero of=$M0/file$i.data bs=1024 count=1024 1>/dev/null 2>&1 + +        if  [[ -e  $B0/${V0}1/file$i.data &&  $BRICK1FILE = "0" ]] +        then +                BRICK1FILE=file$i.data +                CONTINUE=$CONTINUE-1 +        fi + +        if [[ -e  $B0/${V0}2/file$i.data &&  $BRICK2FILE = "0" ]] +        then +                BRICK2FILE=file$i.data +                CONTINUE=$CONTINUE-1 +        fi + +        rm $M0/file$i.data +        let i++ +done + + +## Bring free space on one of the bricks to less than minfree value by +## creating one big file. +dd if=/dev/zero of=$M0/fillonebrick.data bs=1024 count=25600 1>/dev/null 2>&1 + +#Lets find out where it was created +if [ -f $B0/${V0}1/fillonebrick.data ] +then +        FILETOCREATE=$BRICK1FILE +        OTHERBRICK=$B0/${V0}2 +else +        FILETOCREATE=$BRICK2FILE +        OTHERBRICK=$B0/${V0}1 +fi + +##--------------------------------TEST CASE 0----------------------------------- +## If we try to create a file which should go into full brick as per hash, it +## should go into the other brick instead. + +## Before that let us create files just to make gluster refresh the stat +## Using touch so it should not change the disk usage stats +for k in {1..20}; +do +        touch $M0/dummyfile$k +done + +dd if=/dev/zero of=$M0/$FILETOCREATE bs=1024 count=2048 1>/dev/null 2>&1 +TEST [ -e $OTHERBRICK/$FILETOCREATE ] + +## Done testing, lets clean up +EXPECT "$MOUNT_PID" pidgrep $MOUNT_PID +TEST rm -rf $M0/* + +## Finish up +TEST $CLI volume stop $V0; +EXPECT 'Stopped' volinfo_field $V0 'Status'; +$CLI volume delete $V0; + +cleanup; diff --git a/tests/include.rc b/tests/include.rc index 1d1386f9826..5d9c96cea0e 100644 --- a/tests/include.rc +++ b/tests/include.rc @@ -168,6 +168,21 @@ function cleanup()  {  	killall -15 glusterfs glusterfsd glusterd 2>/dev/null || true;  	killall -9 glusterfs glusterfsd glusterd 2>/dev/null || true; + +        MOUNTPOINTS=`mount | grep "$B0/" | awk '{print $3}'` +        for m in $MOUNTPOINTS; +        do +                umount $m +        done + + +        LOOPDEVICES=`losetup -a | grep "$B0/" | awk '{print $1}' | tr -d :` +        for l in $LOOPDEVICES; +        do +                losetup -d $l +        done + +  	rm -rf /var/lib/glusterd/* $B0/* /etc/glusterd/*;  	umount -l $M0 2>/dev/null || true; diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 764b37ac4a0..0dd654650c0 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -724,4 +724,8 @@ int  dht_dir_has_layout (dict_t *xattr);  gf_boolean_t  dht_is_subvol_in_layout (dht_layout_t *layout, xlator_t *xlator); +xlator_t * +dht_subvol_with_free_space_inodes (xlator_t *this, xlator_t *subvol); +xlator_t * +dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol);  #endif/* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 52ea3a32aca..0c87f4a647c 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -248,12 +248,11 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)  	return is_subvol_filled;  } + +/*Get the best subvolume to create the file in*/  xlator_t *  dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)  { -	int         i = 0; -	double      max = 0; -	double      max_inodes = 0;  	xlator_t   *avail_subvol = NULL;  	dht_conf_t *conf = NULL; @@ -261,37 +260,96 @@ dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)  	LOCK (&conf->subvolume_lock);  	{ -		for (i = 0; i < conf->subvolume_cnt; i++) { -			if (conf->disk_unit == 'p') { -				if ((conf->du_stats[i].avail_percent > max) -				    && (conf->du_stats[i].avail_inodes > max_inodes)) { -					max = conf->du_stats[i].avail_percent; -					max_inodes = conf->du_stats[i].avail_inodes; -					avail_subvol = conf->subvolumes[i]; -				} -			} else { -				if ((conf->du_stats[i].avail_space > max) -				    && (conf->du_stats[i].avail_inodes > max_inodes)) { -					max = conf->du_stats[i].avail_space; -					max_inodes = conf->du_stats[i].avail_inodes; -					avail_subvol = conf->subvolumes[i]; -				} +                avail_subvol = dht_subvol_with_free_space_inodes(this, subvol); +                if(!avail_subvol) +                { +                        avail_subvol = dht_subvol_maxspace_nonzeroinode(this, +                                                                        subvol); +                } -			} -		}  	}  	UNLOCK (&conf->subvolume_lock);  	if (!avail_subvol) { -		gf_log (this->name, GF_LOG_DEBUG, -			"no subvolume has enough free space and inodes to create"); +		gf_log (this->name, +                        GF_LOG_DEBUG, +			"no subvolume has enough free space and/or inodes\ +                         to create"); +                avail_subvol = subvol;  	} -	if ((max < conf->min_free_disk) && (max_inodes < conf->min_free_inodes)) -		avail_subvol = subvol; - -	if (!avail_subvol) -		avail_subvol = subvol;  	return avail_subvol;  } + +/*Get subvolume which has both space and inodes more than the min criteria*/ +xlator_t * +dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol) +{ +        int i = 0; +        double max = 0; +        double max_inodes = 0; + +        xlator_t *avail_subvol = NULL; +        dht_conf_t *conf = NULL; + +        conf = this->private; + +        for(i=0; i < conf->subvolume_cnt; i++) { +                if ((conf->disk_unit == 'p') && +                    (conf->du_stats[i].avail_percent > conf->min_free_disk) && +                    (conf->du_stats[i].avail_inodes  > conf->min_free_inodes)) { +                        if ((conf->du_stats[i].avail_inodes > max_inodes) || +                            (conf->du_stats[i].avail_percent > max)) { +                                max = conf->du_stats[i].avail_percent; +                                max_inodes = conf->du_stats[i].avail_inodes; +                                avail_subvol = conf->subvolumes[i]; +                        } +                } + +                if ((conf->disk_unit != 'p') && +                    (conf->du_stats[i].avail_space > conf->min_free_disk) && +                    (conf->du_stats[i].avail_inodes  > conf->min_free_inodes)) { +                        if ((conf->du_stats[i].avail_inodes > max_inodes) || +                            (conf->du_stats[i].avail_space > max)) { +                                max = conf->du_stats[i].avail_space; +                                max_inodes = conf->du_stats[i].avail_inodes; +                                avail_subvol = conf->subvolumes[i]; +                        } +                } +        } + +        return avail_subvol; +} + + +/* Get subvol which has atleast one inode and maximum space */ +xlator_t * +dht_subvol_maxspace_nonzeroinode (xlator_t *this, xlator_t *subvol) +{ +        int         i = 0; +        double      max = 0; + +        xlator_t   *avail_subvol = NULL; +        dht_conf_t *conf = NULL; + +        conf = this->private; + +        for (i = 0; i < conf->subvolume_cnt; i++) { +                if (conf->disk_unit == 'p') { +                        if ((conf->du_stats[i].avail_percent > max) +                            && (conf->du_stats[i].avail_inodes > 0 )) { +                                max = conf->du_stats[i].avail_percent; +                                avail_subvol = conf->subvolumes[i]; +                        } +               } else { +                         if ((conf->du_stats[i].avail_space > max) +                            && (conf->du_stats[i].avail_inodes > 0)) { +                                 max = conf->du_stats[i].avail_space; +                                 avail_subvol = conf->subvolumes[i]; +                         } +               } +        } + +        return avail_subvol; +}  | 
