diff options
-rw-r--r-- | xlators/cluster/dht/src/Makefile.am | 2 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 182 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 17 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-diskusage.c | 187 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht.c | 22 |
5 files changed, 384 insertions, 26 deletions
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am index d4e0752a5..f87212699 100644 --- a/xlators/cluster/dht/src/Makefile.am +++ b/xlators/cluster/dht/src/Makefile.am @@ -4,7 +4,7 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c \ - dht-selfheal.c dht-rename.c dht-hashfn.c + dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c dht_la_SOURCES = $(dht_common_source) dht.c diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index 41daac677..b163f48d7 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -30,6 +30,7 @@ #include "dht-common.h" #include "defaults.h" +#include <sys/time.h> /* TODO: - use volumename in xattr instead of "dht" @@ -38,6 +39,7 @@ - complete linkfile selfheal */ + int dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -52,7 +54,8 @@ dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie, if (ret == 0) { layout = local->selfheal.layout; - ret = inode_ctx_put (local->inode, this, (uint64_t)(long)layout); + ret = inode_ctx_put (local->inode, this, + (uint64_t)(long)layout); if (ret == 0) local->selfheal.layout = NULL; @@ -2342,19 +2345,49 @@ out: return 0; } +int +dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + xlator_t *cached_subvol = NULL; + + if (op_ret == -1) + goto err; + + local = frame->local; + cached_subvol = local->cached_subvol; + + STACK_WIND (frame, dht_newfile_cbk, + cached_subvol, cached_subvol->fops->mknod, + &local->loc, local->mode, local->rdev); + + return 0; + err: + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + return 0; +} int dht_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, dev_t rdev) { - xlator_t *subvol = NULL; - int op_errno = -1; - + xlator_t *subvol = NULL; + int op_errno = -1; + xlator_t *avail_subvol = NULL; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); + conf = this->private; + + dht_get_du_info (frame, this, loc); + subvol = dht_subvol_get_hashed (this, loc); if (!subvol) { gf_log (this->name, GF_LOG_ERROR, @@ -2364,12 +2397,31 @@ dht_mknod (call_frame_t *frame, xlator_t *this, goto err; } - gf_log (this->name, GF_LOG_DEBUG, - "creating %s on %s", loc->path, subvol->name); - - STACK_WIND (frame, dht_newfile_cbk, - subvol, subvol->fops->mknod, - loc, mode, rdev); + if (!dht_is_subvol_filled (this, subvol)) { + gf_log (this->name, GF_LOG_DEBUG, + "creating %s on %s", loc->path, subvol->name); + + STACK_WIND (frame, dht_newfile_cbk, + subvol, subvol->fops->mknod, + loc, mode, rdev); + } else { + /* Choose the minimum filled volume, and create the + files there */ + local = dht_local_init (frame); + if (!local) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + avail_subvol = dht_free_disk_available_subvol (this, subvol); + local->cached_subvol = avail_subvol; + local->mode = mode; + local->rdev = rdev; + + dht_linkfile_create (frame, dht_mknod_linkfile_create_cbk, + avail_subvol, subvol, loc); + } return 0; @@ -2625,7 +2677,6 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, dht_layout_t *layout = NULL; int ret = -1; - if (op_ret == -1) goto out; @@ -2659,17 +2710,57 @@ out: int +dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, + int32_t op_ret, int32_t op_errno, + inode_t *inode, struct stat *stbuf) +{ + dht_local_t *local = NULL; + xlator_t *cached_subvol = NULL; + + if (op_ret == -1) + goto err; + + local = frame->local; + cached_subvol = local->cached_subvol; + + STACK_WIND (frame, dht_create_cbk, + cached_subvol, cached_subvol->fops->create, + &local->loc, local->flags, local->mode, local->fd); + + return 0; + err: + DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + +int dht_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, mode_t mode, fd_t *fd) { - xlator_t *subvol = NULL; - int op_errno = -1; - + int op_errno = -1; + int ret = -1; + xlator_t *subvol = NULL; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + xlator_t *avail_subvol = NULL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (loc, err); + conf = this->private; + + dht_get_du_info (frame, this, loc); + + local = dht_local_init (frame); + if (!local) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + op_errno = ENOMEM; + goto err; + } + subvol = dht_subvol_get_hashed (this, loc); if (!subvol) { gf_log (this->name, GF_LOG_ERROR, @@ -2679,12 +2770,37 @@ dht_create (call_frame_t *frame, xlator_t *this, goto err; } - gf_log (this->name, GF_LOG_DEBUG, - "creating %s on %s", loc->path, subvol->name); - - STACK_WIND (frame, dht_create_cbk, - subvol, subvol->fops->create, - loc, flags, mode, fd); + if (!dht_is_subvol_filled (this, subvol)) { + gf_log (this->name, GF_LOG_DEBUG, + "creating %s on %s", loc->path, subvol->name); + STACK_WIND (frame, dht_create_cbk, + subvol, subvol->fops->create, + loc, flags, mode, fd); + } else { + /* Choose the minimum filled volume, and create the + files there */ + /* TODO */ + ret = loc_dup (loc, &local->loc); + if (ret == -1) { + op_errno = ENOMEM; + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + local->fd = fd_ref (fd); + local->flags = flags; + local->mode = mode; + avail_subvol = dht_free_disk_available_subvol (this, subvol); + + local->cached_subvol = avail_subvol; + local->hashed_subvol = subvol; + gf_log (this->name, GF_LOG_DEBUG, + "creating %s on %s (link at %s)", loc->path, + avail_subvol->name, subvol->name); + dht_linkfile_create (frame, dht_create_linkfile_create_cbk, + avail_subvol, subvol, loc); + + } return 0; @@ -2727,17 +2843,27 @@ dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, dht_local_t *local = NULL; int this_call_cnt = 0; int ret = -1; + int subvol_filled = 0; call_frame_t *prev = NULL; dht_layout_t *layout = NULL; + dht_conf_t *conf = NULL; + conf = this->private; local = frame->local; prev = cookie; layout = local->layout; + subvol_filled = dht_is_subvol_filled (this, prev->this); + LOCK (&frame->lock); { - ret = dht_layout_merge (this, layout, prev->this, - op_ret, op_errno, NULL); + if (subvol_filled && (op_ret != -1)) { + ret = dht_layout_merge (this, layout, prev->this, + -1, ENOTCONN, NULL); + } else { + ret = dht_layout_merge (this, layout, prev->this, + op_ret, op_errno, NULL); + } if (op_ret == -1) { local->op_errno = op_errno; @@ -2777,9 +2903,13 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie, conf = this->private; hashed_subvol = local->hashed_subvol; - ret = dht_layout_merge (this, layout, prev->this, - op_ret, op_errno, NULL); - + if (dht_is_subvol_filled (this, hashed_subvol)) + ret = dht_layout_merge (this, layout, prev->this, + -1, ENOTCONN, NULL); + else + ret = dht_layout_merge (this, layout, prev->this, + op_ret, op_errno, NULL); + if (op_ret == -1) { local->op_errno = op_errno; goto err; @@ -2830,6 +2960,8 @@ dht_mkdir (call_frame_t *frame, xlator_t *this, conf = this->private; + dht_get_du_info (frame, this, loc); + local = dht_local_init (frame); if (!local) { gf_log (this->name, GF_LOG_ERROR, diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 0eb57a196..b6959d86d 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -97,6 +97,13 @@ struct dht_local { }; typedef struct dht_local dht_local_t; +/* du - disk-usage */ +struct dht_du { + double avail_percent; + uint64_t avail_space; + uint32_t log; +}; +typedef struct dht_du dht_du_t; struct dht_conf { gf_lock_t subvolume_lock; @@ -109,6 +116,10 @@ struct dht_conf { dht_layout_t *default_dir_layout; gf_boolean_t search_unhashed; int gen; + dht_du_t *du_stats; + uint32_t min_free_disk; + int32_t refresh_interval; + struct timeval last_stat_fetch; }; typedef struct dht_conf dht_conf_t; @@ -218,4 +229,10 @@ dht_layout_sort_volname (dht_layout_t *layout); int dht_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc); + +int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc); + +int dht_is_subvol_filled (xlator_t *this, xlator_t *subvol); +xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol); + #endif /* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c new file mode 100644 index 000000000..330e93699 --- /dev/null +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -0,0 +1,187 @@ +/* + Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com> + This file is part of GlusterFS. + + GlusterFS is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published + by the Free Software Foundation; either version 3 of the License, + or (at your option) any later version. + + GlusterFS is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + <http://www.gnu.org/licenses/>. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +/* TODO: add NS locking */ + +#include "glusterfs.h" +#include "xlator.h" +#include "dht-common.h" +#include "defaults.h" + +#include <sys/time.h> + + +int +dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct statvfs *statvfs) +{ + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + int this_call_cnt = 0; + int i = 0; + double percent = 0; + + local = frame->local; + conf = this->private; + prev = cookie; + + if (op_ret == -1) + goto out; + + percent = (statvfs->f_bfree * 100) / statvfs->f_blocks; + + LOCK (&conf->subvolume_lock); + { + for (i = 0; i < conf->subvolume_cnt; i++) + if (prev->this == conf->subvolumes[i]) + conf->du_stats[i].avail_percent = percent; + } + UNLOCK (&conf->subvolume_lock); + + out: + this_call_cnt = dht_frame_return (frame); + if (is_last_call (this_call_cnt)) + DHT_STACK_DESTROY (frame); + + return 0; +} + +int +dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc) +{ + int i = 0; + dht_conf_t *conf = NULL; + call_frame_t *statfs_frame = NULL; + dht_local_t *statfs_local = NULL; + struct timeval tv = {0,}; + + conf = this->private; + + gettimeofday (&tv, NULL); + if (tv.tv_sec > (conf->refresh_interval + + conf->last_stat_fetch.tv_sec)) { + + statfs_frame = copy_frame (frame); + if (!statfs_frame) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + statfs_local = dht_local_init (statfs_frame); + if (!statfs_local) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + + loc_copy (&statfs_local->loc, loc); + loc_t tmp_loc = { .inode = NULL, + .path = "/", + }; + + for (i = 0; i < conf->subvolume_cnt; i++) { + STACK_WIND (statfs_frame, dht_du_info_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->statfs, + &tmp_loc); + } + + conf->last_stat_fetch.tv_sec = tv.tv_sec; + } + return 0; + err: + if (statfs_frame) + DHT_STACK_DESTROY (statfs_frame); + + return -1; +} + + +int +dht_is_subvol_filled (xlator_t *this, xlator_t *subvol) +{ + int i = 0; + int subvol_filled = 0; + dht_conf_t *conf = NULL; + + conf = this->private; + + /* Check for values above 90% free disk */ + LOCK (&conf->subvolume_lock); + { + for (i = 0; i < conf->subvolume_cnt; i++) { + if ((subvol == conf->subvolumes[i]) && + (conf->du_stats[i].avail_percent < + conf->min_free_disk)) { + subvol_filled = 1; + break; + } + } + } + UNLOCK (&conf->subvolume_lock); + + if (subvol_filled) { + if (!(conf->du_stats[i].log++ % GF_UNIVERSAL_ANSWER)) { + gf_log (this->name, GF_LOG_CRITICAL, + "disk space on subvolume '%s' is getting " + "full(%f), consider adding more nodes", + subvol->name, conf->du_stats[i].avail_percent); + } + } + + return subvol_filled; +} + +xlator_t * +dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol) +{ + int i = 0; + double max_avail = 0; + xlator_t *avail_subvol = NULL; + dht_conf_t *conf = NULL; + + conf = this->private; + avail_subvol = subvol; + + LOCK (&conf->subvolume_lock); + { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (conf->du_stats[i].avail_percent > max_avail) { + max_avail = conf->du_stats[i].avail_percent; + avail_subvol = conf->subvolumes[i]; + } + } + } + UNLOCK (&conf->subvolume_lock); + + if (avail_subvol == subvol) { + gf_log (this->name, GF_LOG_CRITICAL, + "no node has enough free space :O"); + } + + return avail_subvol; +} diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index 3be631293..ddf95832f 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -110,6 +110,15 @@ init (xlator_t *this) gf_string2boolean (lookup_unhashed_str, &conf->search_unhashed); } + + conf->min_free_disk = 10; + + if (dict_get_str (this->options, "min-free-disk", + &lookup_unhashed_str) == 0) { + gf_string2percent (lookup_unhashed_str, + &conf->min_free_disk); + } + ret = dht_init_subvolumes (this, conf); if (ret == -1) { @@ -121,6 +130,13 @@ init (xlator_t *this) goto err; } + conf->du_stats = CALLOC (conf->subvolume_cnt, sizeof (dht_du_t)); + if (!conf->du_stats) { + gf_log (this->name, GF_LOG_ERROR, + "memory allocation failed :("); + goto err; + } + LOCK_INIT (&conf->subvolume_lock); conf->gen = 1; @@ -147,6 +163,9 @@ err: if (conf->subvolume_status) FREE (conf->subvolume_status); + if (conf->du_stats) + FREE (conf->du_stats); + FREE (conf); } @@ -218,5 +237,8 @@ struct volume_options options[] = { { .key = {"lookup-unhashed"}, .type = GF_OPTION_TYPE_BOOL }, + { .key = {"min-free-disk"}, + .type = GF_OPTION_TYPE_PERCENT + }, { .key = {NULL} }, }; |