diff options
Diffstat (limited to 'xlators/cluster/dht/src')
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 2 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 4 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-diskusage.c | 432 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht.c | 19 |
4 files changed, 257 insertions, 200 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 7508684aaf2..fb149e7635e 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -3203,7 +3203,7 @@ dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, dht_local_t *local = NULL; int this_call_cnt = 0; int ret = -1; - int subvol_filled = 0; + gf_boolean_t subvol_filled = _gf_false; call_frame_t *prev = NULL; dht_layout_t *layout = NULL; diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 8af6dbdcdfd..54cef6cd9d4 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -169,6 +169,7 @@ typedef struct dht_local dht_local_t; /* du - disk-usage */ struct dht_du { double avail_percent; + double avail_inodes; uint64_t avail_space; uint32_t log; }; @@ -186,6 +187,7 @@ struct dht_conf { int gen; dht_du_t *du_stats; uint64_t min_free_disk; + uint32_t min_free_inodes; char disk_unit; int32_t refresh_interval; gf_boolean_t unhashed_sticky_bit; @@ -355,7 +357,7 @@ int dht_rename (call_frame_t *frame, xlator_t *this, int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc); -int dht_is_subvol_filled (xlator_t *this, xlator_t *subvol); +gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol); xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol); int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx); diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 0b8c116ca40..5453e3b107b 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -35,227 +35,269 @@ int dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int op_ret, int op_errno, struct statvfs *statvfs) + int op_ret, int op_errno, struct statvfs *statvfs) { - dht_conf_t *conf = NULL; - call_frame_t *prev = NULL; - int this_call_cnt = 0; - int i = 0; - double percent = 0; - uint64_t bytes = 0; - - conf = this->private; - prev = cookie; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "failed to get disk info from %s", prev->this->name); - goto out; - } - - if (statvfs && statvfs->f_blocks) { - percent = (statvfs->f_bavail * 100) / statvfs->f_blocks; - bytes = (statvfs->f_bavail * statvfs->f_frsize); - } - - LOCK (&conf->subvolume_lock); - { - for (i = 0; i < conf->subvolume_cnt; i++) - if (prev->this == conf->subvolumes[i]) { - conf->du_stats[i].avail_percent = percent; - conf->du_stats[i].avail_space = bytes; - gf_log (this->name, GF_LOG_TRACE, - "on subvolume '%s': avail_percent is: " - "%.2f and avail_space is: %"PRIu64"", - prev->this->name, - conf->du_stats[i].avail_percent, - conf->du_stats[i].avail_space); - } - } - UNLOCK (&conf->subvolume_lock); + dht_conf_t *conf = NULL; + call_frame_t *prev = NULL; + int this_call_cnt = 0; + int i = 0; + double percent = 0; + double percent_inodes = 0; + uint64_t bytes = 0; + + conf = this->private; + prev = cookie; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "failed to get disk info from %s", prev->this->name); + goto out; + } + + if (statvfs && statvfs->f_blocks) { + percent = (statvfs->f_bavail * 100) / statvfs->f_blocks; + bytes = (statvfs->f_bavail * statvfs->f_frsize); + } + + if (statvfs && statvfs->f_files) { + percent_inodes = (statvfs->f_ffree * 100) / statvfs->f_files; + } else { + /* set percent inodes to 100 for dynamically allocated inode filesystems + this logic holds good so that, distribute has nothing to worry about + total inodes rather let the 'create()' to be scheduled on the hashed + subvol regardless of the total inodes. since we have no awareness on + loosing inodes this logic fits well + */ + percent_inodes = 100; + } + + LOCK (&conf->subvolume_lock); + { + for (i = 0; i < conf->subvolume_cnt; i++) + if (prev->this == conf->subvolumes[i]) { + conf->du_stats[i].avail_percent = percent; + conf->du_stats[i].avail_space = bytes; + conf->du_stats[i].avail_inodes = percent_inodes; + gf_log (this->name, GF_LOG_DEBUG, + "on subvolume '%s': avail_percent is: " + "%.2f and avail_space is: %"PRIu64" " + "and avail_inodes is: %.2f", + prev->this->name, + conf->du_stats[i].avail_percent, + conf->du_stats[i].avail_space, + conf->du_stats[i].avail_inodes); + } + } + UNLOCK (&conf->subvolume_lock); out: - this_call_cnt = dht_frame_return (frame); - if (is_last_call (this_call_cnt)) - DHT_STACK_DESTROY (frame); + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) + DHT_STACK_DESTROY (frame); - return 0; + return 0; } int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx) { - dht_conf_t *conf = NULL; - call_frame_t *statfs_frame = NULL; - dht_local_t *statfs_local = NULL; - call_pool_t *pool = NULL; - - conf = this->private; - pool = this->ctx->pool; - - statfs_frame = create_frame (this, pool); - if (!statfs_frame) { - goto err; - } - - /* local->fop value is not used in this case */ - statfs_local = dht_local_init (statfs_frame, NULL, NULL, - GF_FOP_MAXVALUE); - if (!statfs_local) { - goto err; - } - - loc_t tmp_loc = { .inode = NULL, - .path = "/", - }; - - statfs_local->call_cnt = 1; - STACK_WIND (statfs_frame, dht_du_info_cbk, - conf->subvolumes[subvol_idx], - conf->subvolumes[subvol_idx]->fops->statfs, - &tmp_loc); - - return 0; + dht_conf_t *conf = NULL; + call_frame_t *statfs_frame = NULL; + dht_local_t *statfs_local = NULL; + call_pool_t *pool = NULL; + + conf = this->private; + pool = this->ctx->pool; + + statfs_frame = create_frame (this, pool); + if (!statfs_frame) { + goto err; + } + + /* local->fop value is not used in this case */ + statfs_local = dht_local_init (statfs_frame, NULL, NULL, + GF_FOP_MAXVALUE); + if (!statfs_local) { + goto err; + } + + loc_t tmp_loc = { .inode = NULL, + .path = "/", + }; + + statfs_local->call_cnt = 1; + STACK_WIND (statfs_frame, dht_du_info_cbk, + conf->subvolumes[subvol_idx], + conf->subvolumes[subvol_idx]->fops->statfs, + &tmp_loc); + + return 0; err: - if (statfs_frame) - DHT_STACK_DESTROY (statfs_frame); + if (statfs_frame) + DHT_STACK_DESTROY (statfs_frame); - return -1; + return -1; } int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc) { - int i = 0; - dht_conf_t *conf = NULL; - call_frame_t *statfs_frame = NULL; - dht_local_t *statfs_local = NULL; - struct timeval tv = {0,}; - - conf = this->private; - - gettimeofday (&tv, NULL); - if (tv.tv_sec > (conf->refresh_interval - + conf->last_stat_fetch.tv_sec)) { - - statfs_frame = copy_frame (frame); - if (!statfs_frame) { - goto err; - } - - /* In this case, 'local->fop' is not used */ - statfs_local = dht_local_init (statfs_frame, loc, NULL, - GF_FOP_MAXVALUE); - if (!statfs_local) { - goto err; - } - - loc_t tmp_loc = { .inode = NULL, - .path = "/", - }; - - statfs_local->call_cnt = conf->subvolume_cnt; - for (i = 0; i < conf->subvolume_cnt; i++) { - STACK_WIND (statfs_frame, dht_du_info_cbk, - conf->subvolumes[i], - conf->subvolumes[i]->fops->statfs, - &tmp_loc); - } - - conf->last_stat_fetch.tv_sec = tv.tv_sec; - } - return 0; + int i = 0; + dht_conf_t *conf = NULL; + call_frame_t *statfs_frame = NULL; + dht_local_t *statfs_local = NULL; + struct timeval tv = {0,}; + + conf = this->private; + + gettimeofday (&tv, NULL); + if (tv.tv_sec > (conf->refresh_interval + + conf->last_stat_fetch.tv_sec)) { + + statfs_frame = copy_frame (frame); + if (!statfs_frame) { + goto err; + } + + /* In this case, 'local->fop' is not used */ + statfs_local = dht_local_init (statfs_frame, loc, NULL, + GF_FOP_MAXVALUE); + if (!statfs_local) { + goto err; + } + + loc_t tmp_loc = { .inode = NULL, + .path = "/", + }; + + statfs_local->call_cnt = conf->subvolume_cnt; + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (statfs_frame, dht_du_info_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->statfs, + &tmp_loc); + } + + conf->last_stat_fetch.tv_sec = tv.tv_sec; + } + return 0; err: - if (statfs_frame) - DHT_STACK_DESTROY (statfs_frame); + if (statfs_frame) + DHT_STACK_DESTROY (statfs_frame); - return -1; + return -1; } -int +gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol) { - int i = 0; - int subvol_filled = 0; - dht_conf_t *conf = NULL; - - conf = this->private; - - /* Check for values above specified percent or free disk */ - LOCK (&conf->subvolume_lock); - { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (subvol == conf->subvolumes[i]) { - if (conf->disk_unit == 'p') { - if (conf->du_stats[i].avail_percent < - conf->min_free_disk) { - subvol_filled = 1; - break; - } - } else { - if (conf->du_stats[i].avail_space < - conf->min_free_disk) { - subvol_filled = 1; - break; - } - } - } - } - } - UNLOCK (&conf->subvolume_lock); - - if (subvol_filled && conf->subvolume_status[i]) { - if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) { - gf_log (this->name, GF_LOG_WARNING, - "disk space on subvolume '%s' is getting " - "full (%.2f %%), consider adding more nodes", - subvol->name, - (100 - conf->du_stats[i].avail_percent)); - } - } - - return subvol_filled; + int i = 0; + dht_conf_t *conf = NULL; + gf_boolean_t subvol_filled_inodes = _gf_false; + gf_boolean_t subvol_filled_space = _gf_false; + gf_boolean_t is_subvol_filled = _gf_false; + + conf = this->private; + + /* Check for values above specified percent or free disk */ + LOCK (&conf->subvolume_lock); + { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + if (conf->disk_unit == 'p') { + if (conf->du_stats[i].avail_percent < + conf->min_free_disk) { + subvol_filled_space = _gf_true; + break; + } + + } else { + if (conf->du_stats[i].avail_space < + conf->min_free_disk) { + subvol_filled_space = _gf_true; + break; + } + } + if (conf->du_stats[i].avail_inodes < + conf->min_free_inodes) { + subvol_filled_inodes = _gf_true; + break; + } + } + } + } + UNLOCK (&conf->subvolume_lock); + + if (subvol_filled_space && conf->subvolume_status[i]) { + if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) { + gf_log (this->name, GF_LOG_WARNING, + "disk space on subvolume '%s' is getting " + "full (%.2f %%), consider adding more nodes", + subvol->name, + (100 - conf->du_stats[i].avail_percent)); + } + } + + if (subvol_filled_inodes && conf->subvolume_status[i]) { + if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) { + gf_log (this->name, GF_LOG_CRITICAL, + "inodes on subvolume '%s' are at " + "(%.2f %%), consider adding more nodes", + subvol->name, + (100 - conf->du_stats[i].avail_inodes)); + } + } + + is_subvol_filled = (subvol_filled_space || subvol_filled_inodes); + + return is_subvol_filled; } xlator_t * dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol) { - int i = 0; - double max= 0; - xlator_t *avail_subvol = NULL; - dht_conf_t *conf = NULL; - - conf = this->private; - - LOCK (&conf->subvolume_lock); - { - for (i = 0; i < conf->subvolume_cnt; i++) { - if (conf->disk_unit == 'p') { - if (conf->du_stats[i].avail_percent > max) { - max = conf->du_stats[i].avail_percent; - avail_subvol = conf->subvolumes[i]; - } - } else { - if (conf->du_stats[i].avail_space > max) { - max = conf->du_stats[i].avail_space; - avail_subvol = conf->subvolumes[i]; - } - } - } - } - UNLOCK (&conf->subvolume_lock); - - if (!avail_subvol) { - gf_log (this->name, GF_LOG_DEBUG, - "no subvolume has enough free space to create"); - } - - if (max < conf->min_free_disk) - avail_subvol = subvol; - - if (!avail_subvol) - avail_subvol = subvol; - - return avail_subvol; + int i = 0; + double max = 0; + double max_inodes = 0; + xlator_t *avail_subvol = NULL; + dht_conf_t *conf = NULL; + + conf = this->private; + + LOCK (&conf->subvolume_lock); + { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->disk_unit == 'p') { + if ((conf->du_stats[i].avail_percent > max) + && (conf->du_stats[i].avail_inodes > max_inodes)) { + max = conf->du_stats[i].avail_percent; + max_inodes = conf->du_stats[i].avail_inodes; + avail_subvol = conf->subvolumes[i]; + } + } else { + if ((conf->du_stats[i].avail_space > max) + && (conf->du_stats[i].avail_inodes > max_inodes)) { + max = conf->du_stats[i].avail_space; + max_inodes = conf->du_stats[i].avail_inodes; + avail_subvol = conf->subvolumes[i]; + } + + } + } + } + UNLOCK (&conf->subvolume_lock); + + if (!avail_subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no subvolume has enough free space and inodes to create"); + } + + if ((max < conf->min_free_disk) && (max_inodes < conf->min_free_inodes)) + avail_subvol = subvol; + + if (!avail_subvol) + avail_subvol = subvol; + + return avail_subvol; } diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index c5bb36be05c..8be573f5165 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -131,6 +131,7 @@ dht_priv_dump (xlator_t *this) gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed); gf_proc_dump_write("gen", "%d", conf->gen); gf_proc_dump_write("min_free_disk", "%lu", conf->min_free_disk); + gf_proc_dump_write("min_free_inodes", "%lu", conf->min_free_inodes); gf_proc_dump_write("disk_unit", "%c", conf->disk_unit); gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval); gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit); @@ -139,6 +140,8 @@ dht_priv_dump (xlator_t *this) conf->du_stats->avail_percent); gf_proc_dump_write("du_stats.avail_space", "%lu", conf->du_stats->avail_space); + gf_proc_dump_write("du_stats.avail_inodes", "%lf", + conf->du_stats->avail_inodes); gf_proc_dump_write("du_stats.log", "%lu", conf->du_stats->log); } gf_proc_dump_write("last_stat_fetch", "%s", ctime(&conf->last_stat_fetch.tv_sec)); @@ -318,9 +321,10 @@ reconfigure (xlator_t *this, dict_t *options) } } - GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options, + GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options, percent_or_size, out); - + GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options, + percent, out); GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt, options, uint32, out); @@ -376,7 +380,10 @@ init (xlator_t *this) GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err); - GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size, + GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size, + err); + + GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent, err); conf->dir_spread_cnt = conf->subvolume_cnt; @@ -517,6 +524,12 @@ struct volume_options options[] = { .description = "Percentage/Size of disk space that must be " "kept free." }, + { .key = {"min-free-inodes"}, + .type = GF_OPTION_TYPE_PERCENT, + .default_value = "5%", + .description = "Percentage inodes that must be " + "kept free." + }, { .key = {"unhashed-sticky-bit"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "off", |