diff options
| -rw-r--r-- | xlators/cluster/dht/src/Makefile.am | 2 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 182 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 17 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-diskusage.c | 187 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht.c | 22 | 
5 files changed, 384 insertions, 26 deletions
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am index d4e0752a585..f87212699c9 100644 --- a/xlators/cluster/dht/src/Makefile.am +++ b/xlators/cluster/dht/src/Makefile.am @@ -4,7 +4,7 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster  dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c \ -		dht-selfheal.c dht-rename.c dht-hashfn.c +		dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c  dht_la_SOURCES = $(dht_common_source) dht.c  diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 41daac677d2..b163f48d71b 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -30,6 +30,7 @@  #include "dht-common.h"  #include "defaults.h" +#include <sys/time.h>  /* TODO:     - use volumename in xattr instead of "dht" @@ -38,6 +39,7 @@     - complete linkfile selfheal  */ +  int  dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,  			 xlator_t *this, @@ -52,7 +54,8 @@ dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,  	if (ret == 0) {  		layout = local->selfheal.layout; -		ret = inode_ctx_put (local->inode, this, (uint64_t)(long)layout); +		ret = inode_ctx_put (local->inode, this,  +                                     (uint64_t)(long)layout);  		if (ret == 0)  			local->selfheal.layout = NULL; @@ -2342,19 +2345,49 @@ out:  	return 0;  } +int +dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie, +                               xlator_t *this, +                               int32_t op_ret, int32_t op_errno, +                               inode_t *inode, struct stat *stbuf) +{ +	dht_local_t  *local = NULL; +	xlator_t     *cached_subvol = NULL; + +        if (op_ret == -1) +                goto err; + +	local = frame->local; +	cached_subvol = local->cached_subvol; + +        STACK_WIND (frame, dht_newfile_cbk, +                    cached_subvol, cached_subvol->fops->mknod, +                    &local->loc, local->mode, local->rdev); + +        return 0; + err: + 	DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);	 + 	return 0; +}  int  dht_mknod (call_frame_t *frame, xlator_t *this,  	   loc_t *loc, mode_t mode, dev_t rdev)  { -	xlator_t  *subvol = NULL; -	int        op_errno = -1; - +	xlator_t    *subvol = NULL; +	int          op_errno = -1; +        xlator_t    *avail_subvol = NULL; +	dht_conf_t  *conf = NULL; +	dht_local_t *local = NULL;  	VALIDATE_OR_GOTO (frame, err);  	VALIDATE_OR_GOTO (this, err);  	VALIDATE_OR_GOTO (loc, err); +	conf = this->private; + +        dht_get_du_info (frame, this, loc); +  	subvol = dht_subvol_get_hashed (this, loc);  	if (!subvol) {  		gf_log (this->name, GF_LOG_ERROR, @@ -2364,12 +2397,31 @@ dht_mknod (call_frame_t *frame, xlator_t *this,  		goto err;  	} -	gf_log (this->name, GF_LOG_DEBUG, -		"creating %s on %s", loc->path, subvol->name); - -	STACK_WIND (frame, dht_newfile_cbk, -		    subvol, subvol->fops->mknod, -		    loc, mode, rdev); +        if (!dht_is_subvol_filled (this, subvol)) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "creating %s on %s", loc->path, subvol->name); +                 +                STACK_WIND (frame, dht_newfile_cbk, +                            subvol, subvol->fops->mknod, +                            loc, mode, rdev); +        } else { +                /* Choose the minimum filled volume, and create the  +                   files there */ +                local = dht_local_init (frame); +                if (!local) { +                        op_errno = ENOMEM; +                        gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +                        goto err; +                } +                avail_subvol = dht_free_disk_available_subvol (this, subvol); +                local->cached_subvol = avail_subvol; +                local->mode = mode;  +                local->rdev = rdev; +                 +		dht_linkfile_create (frame, dht_mknod_linkfile_create_cbk, +				     avail_subvol, subvol, loc); +        }  	return 0; @@ -2625,7 +2677,6 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  	dht_layout_t *layout = NULL;  	int           ret = -1; -  	if (op_ret == -1)  		goto out; @@ -2659,17 +2710,57 @@ out:  int +dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie, +				xlator_t *this, +				int32_t op_ret, int32_t op_errno, +				inode_t *inode, struct stat *stbuf) +{ +	dht_local_t  *local = NULL; +	xlator_t     *cached_subvol = NULL; + +        if (op_ret == -1) +                goto err; + +	local = frame->local; +	cached_subvol = local->cached_subvol; + +        STACK_WIND (frame, dht_create_cbk, +                    cached_subvol, cached_subvol->fops->create, +                    &local->loc, local->flags, local->mode, local->fd); + +        return 0; + err: + 	DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);	 + 	return 0; +} + +int  dht_create (call_frame_t *frame, xlator_t *this,  	    loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)  { -	xlator_t  *subvol = NULL; -	int        op_errno = -1; - +	int          op_errno = -1; +        int          ret = -1; +	xlator_t    *subvol = NULL; +	dht_conf_t  *conf = NULL; +        dht_local_t *local = NULL; +        xlator_t    *avail_subvol = NULL;  	VALIDATE_OR_GOTO (frame, err);  	VALIDATE_OR_GOTO (this, err);  	VALIDATE_OR_GOTO (loc, err); +	conf = this->private; + +        dht_get_du_info (frame, this, loc); + +	local = dht_local_init (frame); +	if (!local) { +		gf_log (this->name, GF_LOG_ERROR, +			"memory allocation failed :("); +		op_errno = ENOMEM; +		goto err; +	} +  	subvol = dht_subvol_get_hashed (this, loc);  	if (!subvol) {  		gf_log (this->name, GF_LOG_ERROR, @@ -2679,12 +2770,37 @@ dht_create (call_frame_t *frame, xlator_t *this,  		goto err;  	} -	gf_log (this->name, GF_LOG_DEBUG, -		"creating %s on %s", loc->path, subvol->name); - -	STACK_WIND (frame, dht_create_cbk, -		    subvol, subvol->fops->create, -		    loc, flags, mode, fd); +        if (!dht_is_subvol_filled (this, subvol)) { +                gf_log (this->name, GF_LOG_DEBUG, +                        "creating %s on %s", loc->path, subvol->name); +                STACK_WIND (frame, dht_create_cbk, +                            subvol, subvol->fops->create, +                            loc, flags, mode, fd); +        } else { +                /* Choose the minimum filled volume, and create the  +                   files there */ +                /* TODO */ +            	ret = loc_dup (loc, &local->loc); +                if (ret == -1) { +                        op_errno = ENOMEM; +                        gf_log (this->name, GF_LOG_ERROR, +                                "memory allocation failed :("); +                        goto err; +                } +                local->fd = fd_ref (fd); +                local->flags = flags; +                local->mode = mode; +                avail_subvol = dht_free_disk_available_subvol (this, subvol); + +                local->cached_subvol = avail_subvol; +                local->hashed_subvol = subvol; +                gf_log (this->name, GF_LOG_DEBUG, +                        "creating %s on %s (link at %s)", loc->path,  +                        avail_subvol->name, subvol->name); +		dht_linkfile_create (frame, dht_create_linkfile_create_cbk, +                                     avail_subvol, subvol, loc); +                 +        }  	return 0; @@ -2727,17 +2843,27 @@ dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  	dht_local_t  *local = NULL;  	int           this_call_cnt = 0;  	int           ret = -1; +        int           subvol_filled = 0;  	call_frame_t *prev = NULL;  	dht_layout_t *layout = NULL; +	dht_conf_t   *conf = NULL; +	conf = this->private;  	local = frame->local;  	prev  = cookie;  	layout = local->layout; +        subvol_filled = dht_is_subvol_filled (this, prev->this); +  	LOCK (&frame->lock);  	{ -		ret = dht_layout_merge (this, layout, prev->this, -					op_ret, op_errno, NULL); +                if (subvol_filled && (op_ret != -1)) { +                        ret = dht_layout_merge (this, layout, prev->this, +                                                -1, ENOTCONN, NULL); +                } else { +                        ret = dht_layout_merge (this, layout, prev->this, +                                                op_ret, op_errno, NULL); +                }  		if (op_ret == -1) {  			local->op_errno = op_errno; @@ -2777,9 +2903,13 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,  	conf = this->private;  	hashed_subvol = local->hashed_subvol; -	ret = dht_layout_merge (this, layout, prev->this, -				op_ret, op_errno, NULL); - +        if (dht_is_subvol_filled (this, hashed_subvol)) +                ret = dht_layout_merge (this, layout, prev->this, +                                        -1, ENOTCONN, NULL); +        else +                ret = dht_layout_merge (this, layout, prev->this, +                                        op_ret, op_errno, NULL); +          	if (op_ret == -1) {  		local->op_errno = op_errno;  		goto err; @@ -2830,6 +2960,8 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,  	conf = this->private; +        dht_get_du_info (frame, this, loc); +  	local = dht_local_init (frame);  	if (!local) {  		gf_log (this->name, GF_LOG_ERROR, diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 0eb57a196c3..b6959d86d14 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -97,6 +97,13 @@ struct dht_local {  };  typedef struct dht_local dht_local_t; +/* du - disk-usage */ +struct dht_du { +        double   avail_percent; +        uint64_t avail_space; +        uint32_t log; +}; +typedef struct dht_du dht_du_t;  struct dht_conf {  	gf_lock_t      subvolume_lock; @@ -109,6 +116,10 @@ struct dht_conf {  	dht_layout_t  *default_dir_layout;  	gf_boolean_t   search_unhashed;  	int            gen; +        dht_du_t      *du_stats; +        uint32_t       min_free_disk; +        int32_t        refresh_interval; +	struct timeval last_stat_fetch;  };  typedef struct dht_conf dht_conf_t; @@ -218,4 +229,10 @@ dht_layout_sort_volname (dht_layout_t *layout);  int dht_rename (call_frame_t *frame, xlator_t *this,  		loc_t *oldloc, loc_t *newloc); + +int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc); + +int dht_is_subvol_filled (xlator_t *this, xlator_t *subvol); +xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol); +  #endif /* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c new file mode 100644 index 00000000000..330e93699d7 --- /dev/null +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -0,0 +1,187 @@ +/* +   Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com> +   This file is part of GlusterFS. + +   GlusterFS is free software; you can redistribute it and/or modify +   it under the terms of the GNU General Public License as published +   by the Free Software Foundation; either version 3 of the License, +   or (at your option) any later version. + +   GlusterFS is distributed in the hope that it will be useful, but +   WITHOUT ANY WARRANTY; without even the implied warranty of +   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU +   General Public License for more details. + +   You should have received a copy of the GNU General Public License +   along with this program.  If not, see +   <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +/* TODO: add NS locking */ + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" +#include "defaults.h" + +#include <sys/time.h> + + +int  +dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                 int op_ret, int op_errno, struct statvfs *statvfs) +{ +	dht_conf_t    *conf         = NULL; +	dht_local_t   *local = NULL; +        call_frame_t  *prev          = NULL; +	int            this_call_cnt = 0; +        int            i = 0; +        double         percent = 0; + +	local = frame->local; +        conf = this->private; +        prev = cookie; + +        if (op_ret == -1)  +                goto out; +         +        percent = (statvfs->f_bfree * 100) / statvfs->f_blocks; +         +        LOCK (&conf->subvolume_lock); +        { +                for (i = 0; i < conf->subvolume_cnt; i++) +                        if (prev->this == conf->subvolumes[i]) +                                conf->du_stats[i].avail_percent = percent; +        } +        UNLOCK (&conf->subvolume_lock); + + out: +	this_call_cnt = dht_frame_return (frame); +	if (is_last_call (this_call_cnt)) +		DHT_STACK_DESTROY (frame); + +        return 0; +} + +int +dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ +        int            i = 0; +	dht_conf_t    *conf         = NULL; +	call_frame_t  *statfs_frame = NULL; +	dht_local_t   *statfs_local = NULL; +        struct timeval tv = {0,}; + +	conf  = this->private; + +	gettimeofday (&tv, NULL); +	if (tv.tv_sec > (conf->refresh_interval  +			 + conf->last_stat_fetch.tv_sec)) { + +                statfs_frame = copy_frame (frame); +                if (!statfs_frame) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "memory allocation failed :("); +                        goto err; +                } + +                statfs_local = dht_local_init (statfs_frame); +                if (!statfs_local) { +                        gf_log (this->name, GF_LOG_ERROR, +                                "memory allocation failed :("); +                        goto err; +                } + +                loc_copy (&statfs_local->loc, loc); +                loc_t tmp_loc = { .inode = NULL, +                              .path = "/", +                }; +                 +                for (i = 0; i < conf->subvolume_cnt; i++) { +                        STACK_WIND (statfs_frame, dht_du_info_cbk, +                                    conf->subvolumes[i], +                                    conf->subvolumes[i]->fops->statfs, +                                    &tmp_loc); +                } + +                conf->last_stat_fetch.tv_sec = tv.tv_sec; +        } +        return 0; + err: +	if (statfs_frame) +		DHT_STACK_DESTROY (statfs_frame); + +        return -1; +} + + +int +dht_is_subvol_filled (xlator_t *this, xlator_t *subvol) +{ +        int         i = 0; +        int         subvol_filled = 0; +	dht_conf_t *conf = NULL; + +        conf = this->private; + +        /* Check for values above 90% free disk */ +        LOCK (&conf->subvolume_lock); +        { +                for (i = 0; i < conf->subvolume_cnt; i++) { +                        if ((subvol == conf->subvolumes[i]) && +                            (conf->du_stats[i].avail_percent <  +                             conf->min_free_disk)) { +                                subvol_filled = 1; +                                break; +                        } +                } +        } +        UNLOCK (&conf->subvolume_lock); + +        if (subvol_filled) { +                if (!(conf->du_stats[i].log++ % GF_UNIVERSAL_ANSWER)) { +                        gf_log (this->name, GF_LOG_CRITICAL, +                                "disk space on subvolume '%s' is getting " +                                "full(%f), consider adding more nodes",  +                                subvol->name, conf->du_stats[i].avail_percent); +                } +        } + +        return subvol_filled; +} + +xlator_t * +dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)  +{ +        int         i = 0; +        double      max_avail = 0; +        xlator_t   *avail_subvol = NULL; +	dht_conf_t *conf = NULL; + +        conf = this->private; +        avail_subvol = subvol; + +        LOCK (&conf->subvolume_lock); +        { +                for (i = 0; i < conf->subvolume_cnt; i++) { +                        if (conf->du_stats[i].avail_percent > max_avail) { +                                max_avail  = conf->du_stats[i].avail_percent; +                                avail_subvol = conf->subvolumes[i]; +                        } +                } +        } +        UNLOCK (&conf->subvolume_lock); + +        if (avail_subvol == subvol) { +                gf_log (this->name, GF_LOG_CRITICAL,  +                        "no node has enough free space :O"); +        } +                 +        return avail_subvol; +} diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index 3be6312937c..ddf95832f66 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -110,6 +110,15 @@ init (xlator_t *this)  		gf_string2boolean (lookup_unhashed_str,  				   &conf->search_unhashed);  	} +         +        conf->min_free_disk = 10; + +	if (dict_get_str (this->options, "min-free-disk", +			  &lookup_unhashed_str) == 0) { +		gf_string2percent (lookup_unhashed_str, +				   &conf->min_free_disk); +	} +          ret = dht_init_subvolumes (this, conf);          if (ret == -1) { @@ -121,6 +130,13 @@ init (xlator_t *this)                  goto err;          } +        conf->du_stats = CALLOC (conf->subvolume_cnt, sizeof (dht_du_t)); +        if (!conf->du_stats) { +                gf_log (this->name, GF_LOG_ERROR, +                        "memory allocation failed :("); +                goto err; +        } +  	LOCK_INIT (&conf->subvolume_lock);  	conf->gen = 1; @@ -147,6 +163,9 @@ err:  		if (conf->subvolume_status)  			FREE (conf->subvolume_status); +                if (conf->du_stats) +                        FREE (conf->du_stats); +                  FREE (conf);          } @@ -218,5 +237,8 @@ struct volume_options options[] = {          { .key  = {"lookup-unhashed"},   	  .type = GF_OPTION_TYPE_BOOL   	}, +        { .key  = {"min-free-disk"}, +          .type = GF_OPTION_TYPE_PERCENT +        },  	{ .key  = {NULL} },  };  | 
