summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht
diff options
context:
space:
mode:
authorAmar Tumballi <amar@gluster.com>2009-04-03 08:27:44 -0700
committerAnand V. Avati <avati@amp.gluster.com>2009-04-03 22:02:25 +0530
commit8a5005ecf06f23f1607c0ff4111a21c00bd74205 (patch)
treed918103b0efafab231c2f8cff65e38e29c9e9c40 /xlators/cluster/dht
parentf235826215205ac626abb0ad475e7a89f7003da5 (diff)
distribute to take care of available disk space while creating new dirs, and files.
distribute gets awareness about disk-space while creating the files Signed-off-by: Anand V. Avati <avati@amp.gluster.com>
Diffstat (limited to 'xlators/cluster/dht')
-rw-r--r--xlators/cluster/dht/src/Makefile.am2
-rw-r--r--xlators/cluster/dht/src/dht-common.c182
-rw-r--r--xlators/cluster/dht/src/dht-common.h17
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c187
-rw-r--r--xlators/cluster/dht/src/dht.c22
5 files changed, 384 insertions, 26 deletions
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
index d4e0752a5..f87212699 100644
--- a/xlators/cluster/dht/src/Makefile.am
+++ b/xlators/cluster/dht/src/Makefile.am
@@ -4,7 +4,7 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c \
- dht-selfheal.c dht-rename.c dht-hashfn.c
+ dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c
dht_la_SOURCES = $(dht_common_source) dht.c
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index 41daac677..b163f48d7 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -30,6 +30,7 @@
#include "dht-common.h"
#include "defaults.h"
+#include <sys/time.h>
/* TODO:
- use volumename in xattr instead of "dht"
@@ -38,6 +39,7 @@
- complete linkfile selfheal
*/
+
int
dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
xlator_t *this,
@@ -52,7 +54,8 @@ dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
if (ret == 0) {
layout = local->selfheal.layout;
- ret = inode_ctx_put (local->inode, this, (uint64_t)(long)layout);
+ ret = inode_ctx_put (local->inode, this,
+ (uint64_t)(long)layout);
if (ret == 0)
local->selfheal.layout = NULL;
@@ -2342,19 +2345,49 @@ out:
return 0;
}
+int
+dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+
+ if (op_ret == -1)
+ goto err;
+
+ local = frame->local;
+ cached_subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_newfile_cbk,
+ cached_subvol, cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev);
+
+ return 0;
+ err:
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
int
dht_mknod (call_frame_t *frame, xlator_t *this,
loc_t *loc, mode_t mode, dev_t rdev)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
+ conf = this->private;
+
+ dht_get_du_info (frame, this, loc);
+
subvol = dht_subvol_get_hashed (this, loc);
if (!subvol) {
gf_log (this->name, GF_LOG_ERROR,
@@ -2364,12 +2397,31 @@ dht_mknod (call_frame_t *frame, xlator_t *this,
goto err;
}
- gf_log (this->name, GF_LOG_DEBUG,
- "creating %s on %s", loc->path, subvol->name);
-
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev);
+ if (!dht_is_subvol_filled (this, subvol)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND (frame, dht_newfile_cbk,
+ subvol, subvol->fops->mknod,
+ loc, mode, rdev);
+ } else {
+ /* Choose the minimum filled volume, and create the
+ files there */
+ local = dht_local_init (frame);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+ avail_subvol = dht_free_disk_available_subvol (this, subvol);
+ local->cached_subvol = avail_subvol;
+ local->mode = mode;
+ local->rdev = rdev;
+
+ dht_linkfile_create (frame, dht_mknod_linkfile_create_cbk,
+ avail_subvol, subvol, loc);
+ }
return 0;
@@ -2625,7 +2677,6 @@ dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dht_layout_t *layout = NULL;
int ret = -1;
-
if (op_ret == -1)
goto out;
@@ -2659,17 +2710,57 @@ out:
int
+dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
+ xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct stat *stbuf)
+{
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+
+ if (op_ret == -1)
+ goto err;
+
+ local = frame->local;
+ cached_subvol = local->cached_subvol;
+
+ STACK_WIND (frame, dht_create_cbk,
+ cached_subvol, cached_subvol->fops->create,
+ &local->loc, local->flags, local->mode, local->fd);
+
+ return 0;
+ err:
+ DHT_STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
dht_create (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, mode_t mode, fd_t *fd)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
+ int op_errno = -1;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *avail_subvol = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (loc, err);
+ conf = this->private;
+
+ dht_get_du_info (frame, this, loc);
+
+ local = dht_local_init (frame);
+ if (!local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
subvol = dht_subvol_get_hashed (this, loc);
if (!subvol) {
gf_log (this->name, GF_LOG_ERROR,
@@ -2679,12 +2770,37 @@ dht_create (call_frame_t *frame, xlator_t *this,
goto err;
}
- gf_log (this->name, GF_LOG_DEBUG,
- "creating %s on %s", loc->path, subvol->name);
-
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, fd);
+ if (!dht_is_subvol_filled (this, subvol)) {
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating %s on %s", loc->path, subvol->name);
+ STACK_WIND (frame, dht_create_cbk,
+ subvol, subvol->fops->create,
+ loc, flags, mode, fd);
+ } else {
+ /* Choose the minimum filled volume, and create the
+ files there */
+ /* TODO */
+ ret = loc_dup (loc, &local->loc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+ local->fd = fd_ref (fd);
+ local->flags = flags;
+ local->mode = mode;
+ avail_subvol = dht_free_disk_available_subvol (this, subvol);
+
+ local->cached_subvol = avail_subvol;
+ local->hashed_subvol = subvol;
+ gf_log (this->name, GF_LOG_DEBUG,
+ "creating %s on %s (link at %s)", loc->path,
+ avail_subvol->name, subvol->name);
+ dht_linkfile_create (frame, dht_create_linkfile_create_cbk,
+ avail_subvol, subvol, loc);
+
+ }
return 0;
@@ -2727,17 +2843,27 @@ dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dht_local_t *local = NULL;
int this_call_cnt = 0;
int ret = -1;
+ int subvol_filled = 0;
call_frame_t *prev = NULL;
dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ conf = this->private;
local = frame->local;
prev = cookie;
layout = local->layout;
+ subvol_filled = dht_is_subvol_filled (this, prev->this);
+
LOCK (&frame->lock);
{
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, NULL);
+ if (subvol_filled && (op_ret != -1)) {
+ ret = dht_layout_merge (this, layout, prev->this,
+ -1, ENOTCONN, NULL);
+ } else {
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, NULL);
+ }
if (op_ret == -1) {
local->op_errno = op_errno;
@@ -2777,9 +2903,13 @@ dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
conf = this->private;
hashed_subvol = local->hashed_subvol;
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, NULL);
-
+ if (dht_is_subvol_filled (this, hashed_subvol))
+ ret = dht_layout_merge (this, layout, prev->this,
+ -1, ENOTCONN, NULL);
+ else
+ ret = dht_layout_merge (this, layout, prev->this,
+ op_ret, op_errno, NULL);
+
if (op_ret == -1) {
local->op_errno = op_errno;
goto err;
@@ -2830,6 +2960,8 @@ dht_mkdir (call_frame_t *frame, xlator_t *this,
conf = this->private;
+ dht_get_du_info (frame, this, loc);
+
local = dht_local_init (frame);
if (!local) {
gf_log (this->name, GF_LOG_ERROR,
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 0eb57a196..b6959d86d 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -97,6 +97,13 @@ struct dht_local {
};
typedef struct dht_local dht_local_t;
+/* du - disk-usage */
+struct dht_du {
+ double avail_percent;
+ uint64_t avail_space;
+ uint32_t log;
+};
+typedef struct dht_du dht_du_t;
struct dht_conf {
gf_lock_t subvolume_lock;
@@ -109,6 +116,10 @@ struct dht_conf {
dht_layout_t *default_dir_layout;
gf_boolean_t search_unhashed;
int gen;
+ dht_du_t *du_stats;
+ uint32_t min_free_disk;
+ int32_t refresh_interval;
+ struct timeval last_stat_fetch;
};
typedef struct dht_conf dht_conf_t;
@@ -218,4 +229,10 @@ dht_layout_sort_volname (dht_layout_t *layout);
int dht_rename (call_frame_t *frame, xlator_t *this,
loc_t *oldloc, loc_t *newloc);
+
+int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc);
+
+int dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);
+xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol);
+
#endif /* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
new file mode 100644
index 000000000..330e93699
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -0,0 +1,187 @@
+/*
+ Copyright (c) 2009 Z RESEARCH, Inc. <http://www.zresearch.com>
+ This file is part of GlusterFS.
+
+ GlusterFS is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published
+ by the Free Software Foundation; either version 3 of the License,
+ or (at your option) any later version.
+
+ GlusterFS is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see
+ <http://www.gnu.org/licenses/>.
+*/
+
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+/* TODO: add NS locking */
+
+#include "glusterfs.h"
+#include "xlator.h"
+#include "dht-common.h"
+#include "defaults.h"
+
+#include <sys/time.h>
+
+
+int
+dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct statvfs *statvfs)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+ int this_call_cnt = 0;
+ int i = 0;
+ double percent = 0;
+
+ local = frame->local;
+ conf = this->private;
+ prev = cookie;
+
+ if (op_ret == -1)
+ goto out;
+
+ percent = (statvfs->f_bfree * 100) / statvfs->f_blocks;
+
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++)
+ if (prev->this == conf->subvolumes[i])
+ conf->du_stats[i].avail_percent = percent;
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+ out:
+ this_call_cnt = dht_frame_return (frame);
+ if (is_last_call (this_call_cnt))
+ DHT_STACK_DESTROY (frame);
+
+ return 0;
+}
+
+int
+dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ call_frame_t *statfs_frame = NULL;
+ dht_local_t *statfs_local = NULL;
+ struct timeval tv = {0,};
+
+ conf = this->private;
+
+ gettimeofday (&tv, NULL);
+ if (tv.tv_sec > (conf->refresh_interval
+ + conf->last_stat_fetch.tv_sec)) {
+
+ statfs_frame = copy_frame (frame);
+ if (!statfs_frame) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ statfs_local = dht_local_init (statfs_frame);
+ if (!statfs_local) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
+ loc_copy (&statfs_local->loc, loc);
+ loc_t tmp_loc = { .inode = NULL,
+ .path = "/",
+ };
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND (statfs_frame, dht_du_info_cbk,
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->statfs,
+ &tmp_loc);
+ }
+
+ conf->last_stat_fetch.tv_sec = tv.tv_sec;
+ }
+ return 0;
+ err:
+ if (statfs_frame)
+ DHT_STACK_DESTROY (statfs_frame);
+
+ return -1;
+}
+
+
+int
+dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
+{
+ int i = 0;
+ int subvol_filled = 0;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ /* Check for values above 90% free disk */
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if ((subvol == conf->subvolumes[i]) &&
+ (conf->du_stats[i].avail_percent <
+ conf->min_free_disk)) {
+ subvol_filled = 1;
+ break;
+ }
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+ if (subvol_filled) {
+ if (!(conf->du_stats[i].log++ % GF_UNIVERSAL_ANSWER)) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "disk space on subvolume '%s' is getting "
+ "full(%f), consider adding more nodes",
+ subvol->name, conf->du_stats[i].avail_percent);
+ }
+ }
+
+ return subvol_filled;
+}
+
+xlator_t *
+dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)
+{
+ int i = 0;
+ double max_avail = 0;
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ avail_subvol = subvol;
+
+ LOCK (&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->du_stats[i].avail_percent > max_avail) {
+ max_avail = conf->du_stats[i].avail_percent;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+ }
+ UNLOCK (&conf->subvolume_lock);
+
+ if (avail_subvol == subvol) {
+ gf_log (this->name, GF_LOG_CRITICAL,
+ "no node has enough free space :O");
+ }
+
+ return avail_subvol;
+}
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index 3be631293..ddf95832f 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -110,6 +110,15 @@ init (xlator_t *this)
gf_string2boolean (lookup_unhashed_str,
&conf->search_unhashed);
}
+
+ conf->min_free_disk = 10;
+
+ if (dict_get_str (this->options, "min-free-disk",
+ &lookup_unhashed_str) == 0) {
+ gf_string2percent (lookup_unhashed_str,
+ &conf->min_free_disk);
+ }
+
ret = dht_init_subvolumes (this, conf);
if (ret == -1) {
@@ -121,6 +130,13 @@ init (xlator_t *this)
goto err;
}
+ conf->du_stats = CALLOC (conf->subvolume_cnt, sizeof (dht_du_t));
+ if (!conf->du_stats) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "memory allocation failed :(");
+ goto err;
+ }
+
LOCK_INIT (&conf->subvolume_lock);
conf->gen = 1;
@@ -147,6 +163,9 @@ err:
if (conf->subvolume_status)
FREE (conf->subvolume_status);
+ if (conf->du_stats)
+ FREE (conf->du_stats);
+
FREE (conf);
}
@@ -218,5 +237,8 @@ struct volume_options options[] = {
{ .key = {"lookup-unhashed"},
.type = GF_OPTION_TYPE_BOOL
},
+ { .key = {"min-free-disk"},
+ .type = GF_OPTION_TYPE_PERCENT
+ },
{ .key = {NULL} },
};