diff options
Diffstat (limited to 'xlators/cluster')
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 4 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-diskusage.c | 432 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht.c | 19 | 
4 files changed, 257 insertions, 200 deletions
| diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 7508684aaf2..fb149e7635e 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -3203,7 +3203,7 @@ dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          dht_local_t  *local = NULL;          int           this_call_cnt = 0;          int           ret = -1; -        int           subvol_filled = 0; +        gf_boolean_t subvol_filled = _gf_false;          call_frame_t *prev = NULL;          dht_layout_t *layout = NULL; diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 8af6dbdcdfd..54cef6cd9d4 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -169,6 +169,7 @@ typedef struct dht_local dht_local_t;  /* du - disk-usage */  struct dht_du {          double   avail_percent; +	double   avail_inodes;          uint64_t avail_space;          uint32_t log;  }; @@ -186,6 +187,7 @@ struct dht_conf {          int            gen;          dht_du_t      *du_stats;          uint64_t       min_free_disk; +	uint32_t       min_free_inodes;          char           disk_unit;          int32_t        refresh_interval;          gf_boolean_t   unhashed_sticky_bit; @@ -355,7 +357,7 @@ int dht_rename (call_frame_t *frame, xlator_t *this,  int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc); -int       dht_is_subvol_filled (xlator_t *this, xlator_t *subvol); +gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);  xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol);  int       dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx); diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 0b8c116ca40..5453e3b107b 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -35,227 +35,269 @@  int  dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this, -                 int op_ret, int op_errno, struct statvfs *statvfs) +		 int op_ret, int op_errno, struct statvfs *statvfs)  { -        dht_conf_t    *conf         = NULL; -        call_frame_t  *prev          = NULL; -        int            this_call_cnt = 0; -        int            i = 0; -        double         percent = 0; -        uint64_t       bytes = 0; - -        conf = this->private; -        prev = cookie; - -        if (op_ret == -1) { -                gf_log (this->name, GF_LOG_WARNING, -                        "failed to get disk info from %s", prev->this->name); -                goto out; -        } - -        if (statvfs && statvfs->f_blocks) { -                percent = (statvfs->f_bavail * 100) / statvfs->f_blocks; -                bytes = (statvfs->f_bavail * statvfs->f_frsize); -        } - -        LOCK (&conf->subvolume_lock); -        { -                for (i = 0; i < conf->subvolume_cnt; i++) -                        if (prev->this == conf->subvolumes[i]) { -                                conf->du_stats[i].avail_percent = percent; -                                conf->du_stats[i].avail_space   = bytes; -                                gf_log (this->name, GF_LOG_TRACE, -                                        "on subvolume '%s': avail_percent is: " -                                        "%.2f and avail_space is: %"PRIu64"", -                                        prev->this->name, -                                        conf->du_stats[i].avail_percent, -                                        conf->du_stats[i].avail_space); -                        } -        } -        UNLOCK (&conf->subvolume_lock); +	dht_conf_t    *conf         = NULL; +	call_frame_t  *prev          = NULL; +	int            this_call_cnt = 0; +	int            i = 0; +	double         percent = 0; +	double         percent_inodes = 0; +	uint64_t       bytes = 0; + +	conf = this->private; +	prev = cookie; + +	if (op_ret == -1) { +		gf_log (this->name, GF_LOG_WARNING, +			"failed to get disk info from %s", prev->this->name); +		goto out; +	} + +	if (statvfs && statvfs->f_blocks) { +		percent = (statvfs->f_bavail * 100) / statvfs->f_blocks; +		bytes = (statvfs->f_bavail * statvfs->f_frsize); +	} + +	if (statvfs && statvfs->f_files) { +		percent_inodes = (statvfs->f_ffree * 100) / statvfs->f_files; +	} else { +		/* set percent inodes to 100 for dynamically allocated inode filesystems +		   this logic holds good so that, distribute has nothing to worry about +		   total inodes rather let the 'create()' to be scheduled on the hashed +		   subvol regardless of the total inodes. since we have no awareness on +		   loosing inodes this logic fits well +		*/ +		percent_inodes = 100; +	} + +	LOCK (&conf->subvolume_lock); +	{ +		for (i = 0; i < conf->subvolume_cnt; i++) +			if (prev->this == conf->subvolumes[i]) { +				conf->du_stats[i].avail_percent = percent; +				conf->du_stats[i].avail_space   = bytes; +				conf->du_stats[i].avail_inodes  = percent_inodes; +				gf_log (this->name, GF_LOG_DEBUG, +					"on subvolume '%s': avail_percent is: " +					"%.2f and avail_space is: %"PRIu64" " +					"and avail_inodes is: %.2f", +					prev->this->name, +					conf->du_stats[i].avail_percent, +					conf->du_stats[i].avail_space, +					conf->du_stats[i].avail_inodes); +			} +	} +	UNLOCK (&conf->subvolume_lock);  out: -        this_call_cnt = dht_frame_return (frame); -        if (is_last_call (this_call_cnt)) -                DHT_STACK_DESTROY (frame); +	this_call_cnt = dht_frame_return (frame); +	if (is_last_call (this_call_cnt)) +		DHT_STACK_DESTROY (frame); -        return 0; +	return 0;  }  int  dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx)  { -        dht_conf_t    *conf         = NULL; -        call_frame_t  *statfs_frame = NULL; -        dht_local_t   *statfs_local = NULL; -        call_pool_t   *pool         = NULL; - -        conf = this->private; -        pool = this->ctx->pool; - -        statfs_frame = create_frame (this, pool); -        if (!statfs_frame) { -                goto err; -        } - -        /* local->fop value is not used in this case */ -        statfs_local = dht_local_init (statfs_frame, NULL, NULL, -                                       GF_FOP_MAXVALUE); -        if (!statfs_local) { -                goto err; -        } - -        loc_t tmp_loc = { .inode = NULL, -                          .path = "/", -        }; - -        statfs_local->call_cnt = 1; -        STACK_WIND (statfs_frame, dht_du_info_cbk, -                    conf->subvolumes[subvol_idx], -                    conf->subvolumes[subvol_idx]->fops->statfs, -                    &tmp_loc); - -        return 0; +	dht_conf_t    *conf         = NULL; +	call_frame_t  *statfs_frame = NULL; +	dht_local_t   *statfs_local = NULL; +	call_pool_t   *pool         = NULL; + +	conf = this->private; +	pool = this->ctx->pool; + +	statfs_frame = create_frame (this, pool); +	if (!statfs_frame) { +		goto err; +	} + +	/* local->fop value is not used in this case */ +	statfs_local = dht_local_init (statfs_frame, NULL, NULL, +				       GF_FOP_MAXVALUE); +	if (!statfs_local) { +		goto err; +	} + +	loc_t tmp_loc = { .inode = NULL, +			  .path = "/", +	}; + +	statfs_local->call_cnt = 1; +	STACK_WIND (statfs_frame, dht_du_info_cbk, +		    conf->subvolumes[subvol_idx], +		    conf->subvolumes[subvol_idx]->fops->statfs, +		    &tmp_loc); + +	return 0;  err: -        if (statfs_frame) -                DHT_STACK_DESTROY (statfs_frame); +	if (statfs_frame) +		DHT_STACK_DESTROY (statfs_frame); -        return -1; +	return -1;  }  int  dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)  { -        int            i = 0; -        dht_conf_t    *conf         = NULL; -        call_frame_t  *statfs_frame = NULL; -        dht_local_t   *statfs_local = NULL; -        struct timeval tv = {0,}; - -        conf  = this->private; - -        gettimeofday (&tv, NULL); -        if (tv.tv_sec > (conf->refresh_interval -                         + conf->last_stat_fetch.tv_sec)) { - -                statfs_frame = copy_frame (frame); -                if (!statfs_frame) { -                        goto err; -                } - -                /* In this case, 'local->fop' is not used */ -                statfs_local = dht_local_init (statfs_frame, loc, NULL, -                                               GF_FOP_MAXVALUE); -                if (!statfs_local) { -                        goto err; -                } - -                loc_t tmp_loc = { .inode = NULL, -                                  .path = "/", -                }; - -                statfs_local->call_cnt = conf->subvolume_cnt; -                for (i = 0; i < conf->subvolume_cnt; i++) { -                        STACK_WIND (statfs_frame, dht_du_info_cbk, -                                    conf->subvolumes[i], -                                    conf->subvolumes[i]->fops->statfs, -                                    &tmp_loc); -                } - -                conf->last_stat_fetch.tv_sec = tv.tv_sec; -        } -        return 0; +	int            i = 0; +	dht_conf_t    *conf         = NULL; +	call_frame_t  *statfs_frame = NULL; +	dht_local_t   *statfs_local = NULL; +	struct timeval tv = {0,}; + +	conf  = this->private; + +	gettimeofday (&tv, NULL); +	if (tv.tv_sec > (conf->refresh_interval +			 + conf->last_stat_fetch.tv_sec)) { + +		statfs_frame = copy_frame (frame); +		if (!statfs_frame) { +			goto err; +		} + +		/* In this case, 'local->fop' is not used */ +		statfs_local = dht_local_init (statfs_frame, loc, NULL, +					       GF_FOP_MAXVALUE); +		if (!statfs_local) { +			goto err; +		} + +		loc_t tmp_loc = { .inode = NULL, +				  .path = "/", +		}; + +		statfs_local->call_cnt = conf->subvolume_cnt; +		for (i = 0; i < conf->subvolume_cnt; i++) { +			STACK_WIND (statfs_frame, dht_du_info_cbk, +				    conf->subvolumes[i], +				    conf->subvolumes[i]->fops->statfs, +				    &tmp_loc); +		} + +		conf->last_stat_fetch.tv_sec = tv.tv_sec; +	} +	return 0;  err: -        if (statfs_frame) -                DHT_STACK_DESTROY (statfs_frame); +	if (statfs_frame) +		DHT_STACK_DESTROY (statfs_frame); -        return -1; +	return -1;  } -int +gf_boolean_t  dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)  { -        int         i = 0; -        int         subvol_filled = 0; -        dht_conf_t *conf = NULL; - -        conf = this->private; - -        /* Check for values above specified percent or free disk */ -        LOCK (&conf->subvolume_lock); -        { -                for (i = 0; i < conf->subvolume_cnt; i++) { -                        if (subvol == conf->subvolumes[i]) { -                                if (conf->disk_unit == 'p') { -                                        if (conf->du_stats[i].avail_percent < -                                            conf->min_free_disk) { -                                                subvol_filled = 1; -                                                break; -                                        } -                                } else { -                                        if (conf->du_stats[i].avail_space < -                                            conf->min_free_disk) { -                                                subvol_filled = 1; -                                                break; -                                        } -                                } -                        } -                } -        } -        UNLOCK (&conf->subvolume_lock); - -        if (subvol_filled && conf->subvolume_status[i]) { -                if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) { -                        gf_log (this->name, GF_LOG_WARNING, -                                "disk space on subvolume '%s' is getting " -                                "full (%.2f %%), consider adding more nodes", -                                subvol->name, -                                (100 - conf->du_stats[i].avail_percent)); -                } -        } - -        return subvol_filled; +	int         i = 0; +	dht_conf_t *conf = NULL; +	gf_boolean_t subvol_filled_inodes = _gf_false; +	gf_boolean_t subvol_filled_space = _gf_false; +	gf_boolean_t is_subvol_filled = _gf_false; + +	conf = this->private; + +	/* Check for values above specified percent or free disk */ +	LOCK (&conf->subvolume_lock); +	{ +		for (i = 0; i < conf->subvolume_cnt; i++) { +			if (subvol == conf->subvolumes[i]) { +				if (conf->disk_unit == 'p') { +					if (conf->du_stats[i].avail_percent < +					    conf->min_free_disk) { +						subvol_filled_space = _gf_true; +						break; +					} + +				} else { +					if (conf->du_stats[i].avail_space < +					    conf->min_free_disk) { +						subvol_filled_space = _gf_true; +						break; +					} +				} +				if (conf->du_stats[i].avail_inodes < +				    conf->min_free_inodes) { +					subvol_filled_inodes = _gf_true; +					break; +				} +			} +		} +	} +	UNLOCK (&conf->subvolume_lock); + +	if (subvol_filled_space && conf->subvolume_status[i]) { +		if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) { +			gf_log (this->name, GF_LOG_WARNING, +				"disk space on subvolume '%s' is getting " +				"full (%.2f %%), consider adding more nodes", +				subvol->name, +				(100 - conf->du_stats[i].avail_percent)); +		} +	} + +	if (subvol_filled_inodes && conf->subvolume_status[i]) { +		if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) { +			gf_log (this->name, GF_LOG_CRITICAL, +				"inodes on subvolume '%s' are at " +				"(%.2f %%), consider adding more nodes", +				subvol->name, +				(100 - conf->du_stats[i].avail_inodes)); +		} +	} + +	is_subvol_filled = (subvol_filled_space || subvol_filled_inodes); + +	return is_subvol_filled;  }  xlator_t *  dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)  { -        int         i = 0; -        double      max= 0; -        xlator_t   *avail_subvol = NULL; -        dht_conf_t *conf = NULL; - -        conf = this->private; - -        LOCK (&conf->subvolume_lock); -        { -                for (i = 0; i < conf->subvolume_cnt; i++) { -                        if (conf->disk_unit == 'p') { -                                if (conf->du_stats[i].avail_percent > max) { -                                        max = conf->du_stats[i].avail_percent; -                                        avail_subvol = conf->subvolumes[i]; -                                } -                        } else { -                                if (conf->du_stats[i].avail_space > max) { -                                        max = conf->du_stats[i].avail_space; -                                        avail_subvol = conf->subvolumes[i]; -                                } -                        } -                } -        } -        UNLOCK (&conf->subvolume_lock); - -        if (!avail_subvol) { -                gf_log (this->name, GF_LOG_DEBUG, -                        "no subvolume has enough free space to create"); -        } - -        if (max < conf->min_free_disk) -                avail_subvol = subvol; - -        if (!avail_subvol) -                avail_subvol = subvol; - -        return avail_subvol; +	int         i = 0; +	double      max = 0; +	double      max_inodes = 0; +	xlator_t   *avail_subvol = NULL; +	dht_conf_t *conf = NULL; + +	conf = this->private; + +	LOCK (&conf->subvolume_lock); +	{ +		for (i = 0; i < conf->subvolume_cnt; i++) { +			if (conf->disk_unit == 'p') { +				if ((conf->du_stats[i].avail_percent > max) +				    && (conf->du_stats[i].avail_inodes > max_inodes)) { +					max = conf->du_stats[i].avail_percent; +					max_inodes = conf->du_stats[i].avail_inodes; +					avail_subvol = conf->subvolumes[i]; +				} +			} else { +				if ((conf->du_stats[i].avail_space > max) +				    && (conf->du_stats[i].avail_inodes > max_inodes)) { +					max = conf->du_stats[i].avail_space; +					max_inodes = conf->du_stats[i].avail_inodes; +					avail_subvol = conf->subvolumes[i]; +				} + +			} +		} +	} +	UNLOCK (&conf->subvolume_lock); + +	if (!avail_subvol) { +		gf_log (this->name, GF_LOG_DEBUG, +			"no subvolume has enough free space and inodes to create"); +	} + +	if ((max < conf->min_free_disk) && (max_inodes < conf->min_free_inodes)) +		avail_subvol = subvol; + +	if (!avail_subvol) +		avail_subvol = subvol; + +	return avail_subvol;  } diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index c5bb36be05c..8be573f5165 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -131,6 +131,7 @@ dht_priv_dump (xlator_t *this)          gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);          gf_proc_dump_write("gen", "%d", conf->gen);          gf_proc_dump_write("min_free_disk", "%lu", conf->min_free_disk); +	gf_proc_dump_write("min_free_inodes", "%lu", conf->min_free_inodes);          gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);          gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);          gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit); @@ -139,6 +140,8 @@ dht_priv_dump (xlator_t *this)                                     conf->du_stats->avail_percent);                  gf_proc_dump_write("du_stats.avail_space", "%lu",                                     conf->du_stats->avail_space); +		gf_proc_dump_write("du_stats.avail_inodes", "%lf", +                                   conf->du_stats->avail_inodes);                  gf_proc_dump_write("du_stats.log", "%lu", conf->du_stats->log);          }          gf_proc_dump_write("last_stat_fetch", "%s", ctime(&conf->last_stat_fetch.tv_sec)); @@ -318,9 +321,10 @@ reconfigure (xlator_t *this, dict_t *options)                  }          } -        GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options, +	GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,                            percent_or_size, out); - +	GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options, +                          percent, out);          GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,                            options, uint32, out); @@ -376,7 +380,10 @@ init (xlator_t *this)          GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err); -        GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size, +	GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size, +			err); + +        GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,                          err);          conf->dir_spread_cnt = conf->subvolume_cnt; @@ -517,6 +524,12 @@ struct volume_options options[] = {            .description = "Percentage/Size of disk space that must be "                           "kept free."          }, +	{ .key  = {"min-free-inodes"}, +          .type = GF_OPTION_TYPE_PERCENT, +          .default_value = "5%", +          .description = "Percentage inodes that must be " +                         "kept free." +        },          { .key = {"unhashed-sticky-bit"},            .type = GF_OPTION_TYPE_BOOL,            .default_value = "off", | 
