diff options
| author | Harshavardhana <fharshav@redhat.com> | 2011-11-15 13:44:43 -0800 | 
|---|---|---|
| committer | Vijay Bellur <vijay@gluster.com> | 2011-11-23 04:16:16 -0800 | 
| commit | 896fc241850aaa021f6f8958da4e37e37679c0cd (patch) | |
| tree | 6bce29400dfcfce50f53abbcb72d56df796d3f57 /xlators/cluster | |
| parent | af7d85074fc05afdee3ff48f62b0ec5c057a3e6b (diff) | |
cluster/distribute: Add support for 'min-free-inodes' on each distribute subvolume.
This change is required as increasingly large number of small files
would cause inodes to run out before they run out on available disk space.
It is highly necessary to support algorithmic checking of inodes too
just as we do for disk space.
Change-Id: I9b87405328d443825e239ee80ab664aceb50ee68
BUG: 3799
Signed-off-by: Harshavardhana <fharshav@redhat.com>
Reviewed-on: http://review.gluster.com/730
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-by: Amar Tumballi <amar@gluster.com>
Diffstat (limited to 'xlators/cluster')
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 4 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-diskusage.c | 432 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht.c | 19 | 
4 files changed, 257 insertions, 200 deletions
| diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 7508684aaf2..fb149e7635e 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -3203,7 +3203,7 @@ dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          dht_local_t  *local = NULL;          int           this_call_cnt = 0;          int           ret = -1; -        int           subvol_filled = 0; +        gf_boolean_t subvol_filled = _gf_false;          call_frame_t *prev = NULL;          dht_layout_t *layout = NULL; diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 8af6dbdcdfd..54cef6cd9d4 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -169,6 +169,7 @@ typedef struct dht_local dht_local_t;  /* du - disk-usage */  struct dht_du {          double   avail_percent; +	double   avail_inodes;          uint64_t avail_space;          uint32_t log;  }; @@ -186,6 +187,7 @@ struct dht_conf {          int            gen;          dht_du_t      *du_stats;          uint64_t       min_free_disk; +	uint32_t       min_free_inodes;          char           disk_unit;          int32_t        refresh_interval;          gf_boolean_t   unhashed_sticky_bit; @@ -355,7 +357,7 @@ int dht_rename (call_frame_t *frame, xlator_t *this,  int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc); -int       dht_is_subvol_filled (xlator_t *this, xlator_t *subvol); +gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);  xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol);  int       dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx); diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 0b8c116ca40..5453e3b107b 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -35,227 +35,269 @@  int  dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -                 int op_ret, int op_errno, struct statvfs *statvfs) +		 int op_ret, int op_errno, struct statvfs *statvfs)  { -        dht_conf_t    *conf         = NULL; -        call_frame_t  *prev          = NULL; -        int            this_call_cnt = 0; -        int            i = 0; -        double         percent = 0; -        uint64_t       bytes = 0; - -        conf = this->private; -        prev = cookie; - -        if (op_ret == -1) { -                gf_log (this->name, GF_LOG_WARNING, -                        "failed to get disk info from %s", prev->this->name); -                goto out; -        } - -        if (statvfs && statvfs->f_blocks) { -                percent = (statvfs->f_bavail * 100) / statvfs->f_blocks; -                bytes = (statvfs->f_bavail * statvfs->f_frsize); -        } - -        LOCK (&conf->subvolume_lock); -        { -                for (i = 0; i < conf->subvolume_cnt; i++) -                        if (prev->this == conf->subvolumes[i]) { -                                conf->du_stats[i].avail_percent = percent; -                                conf->du_stats[i].avail_space   = bytes; -                                gf_log (this->name, GF_LOG_TRACE, -                                        "on subvolume '%s': avail_percent is: " -                                        "%.2f and avail_space is: %"PRIu64"", -                                        prev->this->name, -                                        conf->du_stats[i].avail_percent, -                                        conf->du_stats[i].avail_space); -                        } -        } -        UNLOCK (&conf->subvolume_lock); +	dht_conf_t    *conf         = NULL; +	call_frame_t  *prev          = NULL; +	int            this_call_cnt = 0; +	int            i = 0; +	double         percent = 0; +	double         percent_inodes = 0; +	uint64_t       bytes = 0; + +	conf = this->private; +	prev = cookie; + +	if (op_ret == -1) { +		gf_log (this->name, GF_LOG_WARNING, +			"failed to get disk info from %s", prev->this->name); +		goto out; +	} + +	if (statvfs && statvfs->f_blocks) { +		percent = (statvfs->f_bavail * 100) / statvfs->f_blocks; +		bytes = (statvfs->f_bavail * statvfs->f_frsize); +	} + +	if (statvfs && statvfs->f_files) { +		percent_inodes = (statvfs->f_ffree * 100) / statvfs->f_files; +	} else { +		/* set percent inodes to 100 for dynamically allocated inode filesystems +		   this logic holds good so that, distribute has nothing to worry about +		   total inodes rather let the 'create()' to be scheduled on the hashed +		   subvol regardless of the total inodes. since we have no awareness on +		   loosing inodes this logic fits well +		*/ +		percent_inodes = 100; +	} + +	LOCK (&conf->subvolume_lock); +	{ +		for (i = 0; i < conf->subvolume_cnt; i++) +			if (prev->this == conf->subvolumes[i]) { +				conf->du_stats[i].avail_percent = percent; +				conf->du_stats[i].avail_space   = bytes; +				conf->du_stats[i].avail_inodes  = percent_inodes; +				gf_log (this->name, GF_LOG_DEBUG, +					"on subvolume '%s': avail_percent is: " +					"%.2f and avail_space is: %"PRIu64" " +					"and avail_inodes is: %.2f", +					prev->this->name, +					conf->du_stats[i].avail_percent, +					conf->du_stats[i].avail_space, +					conf->du_stats[i].avail_inodes); +			} +	} +	UNLOCK (&conf->subvolume_lock);  out: -        this_call_cnt = dht_frame_return (frame); -        if (is_last_call (this_call_cnt)) -                DHT_STACK_DESTROY (frame); +	this_call_cnt = dht_frame_return (frame); +	if (is_last_call (this_call_cnt)) +		DHT_STACK_DESTROY (frame); -        return 0; +	return 0;  }  int  dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx)  { -        dht_conf_t    *conf         = NULL; -        call_frame_t  *statfs_frame = NULL; -        dht_local_t   *statfs_local = NULL; -        call_pool_t   *pool         = NULL; - -        conf = this->private; -        pool = this->ctx->pool; - -        statfs_frame = create_frame (this, pool); -        if (!statfs_frame) { -                goto err; -        } - -        /* local->fop value is not used in this case */ -        statfs_local = dht_local_init (statfs_frame, NULL, NULL, -                                       GF_FOP_MAXVALUE); -        if (!statfs_local) { -                goto err; -        } - -        loc_t tmp_loc = { .inode = NULL, -                          .path = "/", -        }; - -        statfs_local->call_cnt = 1; -        STACK_WIND (statfs_frame, dht_du_info_cbk, -                    conf->subvolumes[subvol_idx], -                    conf->subvolumes[subvol_idx]->fops->statfs, -                    &tmp_loc); - -        return 0; +	dht_conf_t    *conf         = NULL; +	call_frame_t  *statfs_frame = NULL; +	dht_local_t   *statfs_local = NULL; +	call_pool_t   *pool         = NULL; + +	conf = this->private; +	pool = this->ctx->pool; + +	statfs_frame = create_frame (this, pool); +	if (!statfs_frame) { +		goto err; +	} + +	/* local->fop value is not used in this case */ +	statfs_local = dht_local_init (statfs_frame, NULL, NULL, +				       GF_FOP_MAXVALUE); +	if (!statfs_local) { +		goto err; +	} + +	loc_t tmp_loc = { .inode = NULL, +			  .path = "/", +	}; + +	statfs_local->call_cnt = 1; +	STACK_WIND (statfs_frame, dht_du_info_cbk, +		    conf->subvolumes[subvol_idx], +		    conf->subvolumes[subvol_idx]->fops->statfs, +		    &tmp_loc); + +	return 0;  err: -        if (statfs_frame) -                DHT_STACK_DESTROY (statfs_frame); +	if (statfs_frame) +		DHT_STACK_DESTROY (statfs_frame); -        return -1; +	return -1;  }  int  dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)  { -        int            i = 0; -        dht_conf_t    *conf         = NULL; -        call_frame_t  *statfs_frame = NULL; -        dht_local_t   *statfs_local = NULL; -        struct timeval tv = {0,}; - -        conf  = this->private; - -        gettimeofday (&tv, NULL); -        if (tv.tv_sec > (conf->refresh_interval -                         + conf->last_stat_fetch.tv_sec)) { - -                statfs_frame = copy_frame (frame); -                if (!statfs_frame) { -                        goto err; -                } - -                /* In this case, 'local->fop' is not used */ -                statfs_local = dht_local_init (statfs_frame, loc, NULL, -                                               GF_FOP_MAXVALUE); -                if (!statfs_local) { -                        goto err; -                } - -                loc_t tmp_loc = { .inode = NULL, -                                  .path = "/", -                }; - -                statfs_local->call_cnt = conf->subvolume_cnt; -                for (i = 0; i < conf->subvolume_cnt; i++) { -                        STACK_WIND (statfs_frame, dht_du_info_cbk, -                                    conf->subvolumes[i], -                                    conf->subvolumes[i]->fops->statfs, -                                    &tmp_loc); -                } - -                conf->last_stat_fetch.tv_sec = tv.tv_sec; -        } -        return 0; +	int            i = 0; +	dht_conf_t    *conf         = NULL; +	call_frame_t  *statfs_frame = NULL; +	dht_local_t   *statfs_local = NULL; +	struct timeval tv = {0,}; + +	conf  = this->private; + +	gettimeofday (&tv, NULL); +	if (tv.tv_sec > (conf->refresh_interval +			 + conf->last_stat_fetch.tv_sec)) { + +		statfs_frame = copy_frame (frame); +		if (!statfs_frame) { +			goto err; +		} + +		/* In this case, 'local->fop' is not used */ +		statfs_local = dht_local_init (statfs_frame, loc, NULL, +					       GF_FOP_MAXVALUE); +		if (!statfs_local) { +			goto err; +		} + +		loc_t tmp_loc = { .inode = NULL, +				  .path = "/", +		}; + +		statfs_local->call_cnt = conf->subvolume_cnt; +		for (i = 0; i < conf->subvolume_cnt; i++) { +			STACK_WIND (statfs_frame, dht_du_info_cbk, +				    conf->subvolumes[i], +				    conf->subvolumes[i]->fops->statfs, +				    &tmp_loc); +		} + +		conf->last_stat_fetch.tv_sec = tv.tv_sec; +	} +	return 0;  err: -        if (statfs_frame) -                DHT_STACK_DESTROY (statfs_frame); +	if (statfs_frame) +		DHT_STACK_DESTROY (statfs_frame); -        return -1; +	return -1;  } -int +gf_boolean_t  dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)  { -        int         i = 0; -        int         subvol_filled = 0; -        dht_conf_t *conf = NULL; - -        conf = this->private; - -        /* Check for values above specified percent or free disk */ -        LOCK (&conf->subvolume_lock); -        { -                for (i = 0; i < conf->subvolume_cnt; i++) { -                        if (subvol == conf->subvolumes[i]) { -                                if (conf->disk_unit == 'p') { -                                        if (conf->du_stats[i].avail_percent < -                                            conf->min_free_disk) { -                                                subvol_filled = 1; -                                                break; -                                        } -                                } else { -                                        if (conf->du_stats[i].avail_space < -                                            conf->min_free_disk) { -                                                subvol_filled = 1; -                                                break; -                                        } -                                } -                        } -                } -        } -        UNLOCK (&conf->subvolume_lock); - -        if (subvol_filled && conf->subvolume_status[i]) { -                if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) { -                        gf_log (this->name, GF_LOG_WARNING, -                                "disk space on subvolume '%s' is getting " -                                "full (%.2f %%), consider adding more nodes", -                                subvol->name, -                                (100 - conf->du_stats[i].avail_percent)); -                } -        } - -        return subvol_filled; +	int         i = 0; +	dht_conf_t *conf = NULL; +	gf_boolean_t subvol_filled_inodes = _gf_false; +	gf_boolean_t subvol_filled_space = _gf_false; +	gf_boolean_t is_subvol_filled = _gf_false; + +	conf = this->private; + +	/* Check for values above specified percent or free disk */ +	LOCK (&conf->subvolume_lock); +	{ +		for (i = 0; i < conf->subvolume_cnt; i++) { +			if (subvol == conf->subvolumes[i]) { +				if (conf->disk_unit == 'p') { +					if (conf->du_stats[i].avail_percent < +					    conf->min_free_disk) { +						subvol_filled_space = _gf_true; +						break; +					} + +				} else { +					if (conf->du_stats[i].avail_space < +					    conf->min_free_disk) { +						subvol_filled_space = _gf_true; +						break; +					} +				} +				if (conf->du_stats[i].avail_inodes < +				    conf->min_free_inodes) { +					subvol_filled_inodes = _gf_true; +					break; +				} +			} +		} +	} +	UNLOCK (&conf->subvolume_lock); + +	if (subvol_filled_space && conf->subvolume_status[i]) { +		if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) { +			gf_log (this->name, GF_LOG_WARNING, +				"disk space on subvolume '%s' is getting " +				"full (%.2f %%), consider adding more nodes", +				subvol->name, +				(100 - conf->du_stats[i].avail_percent)); +		} +	} + +	if (subvol_filled_inodes && conf->subvolume_status[i]) { +		if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) { +			gf_log (this->name, GF_LOG_CRITICAL, +				"inodes on subvolume '%s' are at " +				"(%.2f %%), consider adding more nodes", +				subvol->name, +				(100 - conf->du_stats[i].avail_inodes)); +		} +	} + +	is_subvol_filled = (subvol_filled_space || subvol_filled_inodes); + +	return is_subvol_filled;  }  xlator_t *  dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)  { -        int         i = 0; -        double      max= 0; -        xlator_t   *avail_subvol = NULL; -        dht_conf_t *conf = NULL; - -        conf = this->private; - -        LOCK (&conf->subvolume_lock); -        { -                for (i = 0; i < conf->subvolume_cnt; i++) { -                        if (conf->disk_unit == 'p') { -                                if (conf->du_stats[i].avail_percent > max) { -                                        max = conf->du_stats[i].avail_percent; -                                        avail_subvol = conf->subvolumes[i]; -                                } -                        } else { -                                if (conf->du_stats[i].avail_space > max) { -                                        max = conf->du_stats[i].avail_space; -                                        avail_subvol = conf->subvolumes[i]; -                                } -                        } -                } -        } -        UNLOCK (&conf->subvolume_lock); - -        if (!avail_subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no subvolume has enough free space to create"); -        } - -        if (max < conf->min_free_disk) -                avail_subvol = subvol; - -        if (!avail_subvol) -                avail_subvol = subvol; - -        return avail_subvol; +	int         i = 0; +	double      max = 0; +	double      max_inodes = 0; +	xlator_t   *avail_subvol = NULL; +	dht_conf_t *conf = NULL; + +	conf = this->private; + +	LOCK (&conf->subvolume_lock); +	{ +		for (i = 0; i < conf->subvolume_cnt; i++) { +			if (conf->disk_unit == 'p') { +				if ((conf->du_stats[i].avail_percent > max) +				    && (conf->du_stats[i].avail_inodes > max_inodes)) { +					max = conf->du_stats[i].avail_percent; +					max_inodes = conf->du_stats[i].avail_inodes; +					avail_subvol = conf->subvolumes[i]; +				} +			} else { +				if ((conf->du_stats[i].avail_space > max) +				    && (conf->du_stats[i].avail_inodes > max_inodes)) { +					max = conf->du_stats[i].avail_space; +					max_inodes = conf->du_stats[i].avail_inodes; +					avail_subvol = conf->subvolumes[i]; +				} + +			} +		} +	} +	UNLOCK (&conf->subvolume_lock); + +	if (!avail_subvol) { +		gf_log (this->name, GF_LOG_DEBUG, +			"no subvolume has enough free space and inodes to create"); +	} + +	if ((max < conf->min_free_disk) && (max_inodes < conf->min_free_inodes)) +		avail_subvol = subvol; + +	if (!avail_subvol) +		avail_subvol = subvol; + +	return avail_subvol;  } diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index c5bb36be05c..8be573f5165 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -131,6 +131,7 @@ dht_priv_dump (xlator_t *this)          gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);          gf_proc_dump_write("gen", "%d", conf->gen);          gf_proc_dump_write("min_free_disk", "%lu", conf->min_free_disk); +	gf_proc_dump_write("min_free_inodes", "%lu", conf->min_free_inodes);          gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);          gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);          gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit); @@ -139,6 +140,8 @@ dht_priv_dump (xlator_t *this)                                     conf->du_stats->avail_percent);                  gf_proc_dump_write("du_stats.avail_space", "%lu",                                     conf->du_stats->avail_space); +		gf_proc_dump_write("du_stats.avail_inodes", "%lf", +                                   conf->du_stats->avail_inodes);                  gf_proc_dump_write("du_stats.log", "%lu", conf->du_stats->log);          }          gf_proc_dump_write("last_stat_fetch", "%s", ctime(&conf->last_stat_fetch.tv_sec)); @@ -318,9 +321,10 @@ reconfigure (xlator_t *this, dict_t *options)                  }          } -        GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options, +	GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,                            percent_or_size, out); - +	GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options, +                          percent, out);          GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,                            options, uint32, out); @@ -376,7 +380,10 @@ init (xlator_t *this)          GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err); -        GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size, +	GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size, +			err); + +        GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,                          err);          conf->dir_spread_cnt = conf->subvolume_cnt; @@ -517,6 +524,12 @@ struct volume_options options[] = {            .description = "Percentage/Size of disk space that must be "                           "kept free."          }, +	{ .key  = {"min-free-inodes"}, +          .type = GF_OPTION_TYPE_PERCENT, +          .default_value = "5%", +          .description = "Percentage inodes that must be " +                         "kept free." +        },          { .key = {"unhashed-sticky-bit"},            .type = GF_OPTION_TYPE_BOOL,            .default_value = "off", | 
