summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht/src
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/cluster/dht/src')
-rw-r--r--xlators/cluster/dht/src/dht-common.c41
-rw-r--r--xlators/cluster/dht/src/dht-common.h6
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c53
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c10
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c13
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c2
-rw-r--r--xlators/cluster/dht/src/dht-shared.c32
-rw-r--r--xlators/cluster/dht/src/nufa.c10
-rw-r--r--xlators/cluster/dht/src/switch.c10
9 files changed, 140 insertions, 37 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index a9714b02b79..a97d03bb055 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -5559,6 +5559,7 @@ dht_mknod_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
{
dht_local_t *local = NULL;
xlator_t *avail_subvol = NULL;
+ int op_errno = 0;
local = frame->local;
@@ -5571,9 +5572,15 @@ dht_mknod_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
subvol, subvol->fops->mknod, loc, mode,
rdev, umask, params);
} else {
- avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
-
- if (avail_subvol != subvol) {
+ /* This will return NULL if all subvolumes are full
+ * and/or no subvolume needs the min_free_disk limit
+ */
+ avail_subvol = dht_free_disk_available_subvol (this, subvol,
+ local);
+ if (!avail_subvol) {
+ op_errno = ENOSPC;
+ goto err;
+ } else if (avail_subvol != subvol) {
local->params = dict_ref (params);
local->rdev = rdev;
local->mode = mode;
@@ -5603,6 +5610,8 @@ dht_mknod_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
}
out:
return 0;
+err:
+ return op_errno;
}
int32_t
@@ -6242,8 +6251,12 @@ dht_mknod (call_frame_t *frame, xlator_t *this,
}
}
- dht_mknod_wind_to_avail_subvol (frame, this, subvol, loc, rdev, mode,
- umask, params);
+ op_errno = dht_mknod_wind_to_avail_subvol (frame, this, subvol, loc,
+ rdev, mode, umask,
+ params);
+ if (op_errno != 0) {
+ goto err;
+ }
done:
return 0;
@@ -6738,6 +6751,7 @@ dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
{
dht_local_t *local = NULL;
xlator_t *avail_subvol = NULL;
+ int op_errno = 0;
local = frame->local;
@@ -6752,8 +6766,10 @@ dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
} else {
avail_subvol = dht_free_disk_available_subvol (this, subvol, local);
-
- if (avail_subvol != subvol) {
+ if (!avail_subvol) {
+ op_errno = ENOSPC;
+ goto err;
+ } else if (avail_subvol != subvol) {
local->params = dict_ref (params);
local->flags = flags;
local->mode = mode;
@@ -6780,6 +6796,10 @@ dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this,
}
out:
return 0;
+err:
+ DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return op_errno;
}
int
@@ -6882,9 +6902,10 @@ dht_create_do (call_frame_t *frame)
goto err;
}
- dht_create_wind_to_avail_subvol (frame, this, subvol, &local->loc,
- local->flags, local->mode,
- local->umask, local->fd, local->params);
+ dht_create_wind_to_avail_subvol (frame, this, subvol,
+ &local->loc, local->flags,
+ local->mode, local->umask,
+ local->fd, local->params);
return 0;
err:
local->refresh_layout_unlock (frame, this, -1, 1);
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 9e9ca712417..613a9d39816 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -300,6 +300,7 @@ struct dht_du {
uint64_t avail_space;
uint32_t log;
uint32_t chunks;
+ gf_boolean_t is_full;
};
typedef struct dht_du dht_du_t;
@@ -484,6 +485,7 @@ struct dht_conf {
dht_du_t *du_stats;
double min_free_disk;
double min_free_inodes;
+ gf_boolean_t min_free_strict_mode;
char disk_unit;
int32_t refresh_interval;
gf_boolean_t unhashed_sticky_bit;
@@ -549,6 +551,10 @@ struct dht_conf {
gf_boolean_t lock_migration_enabled;
gf_lock_t lock;
+
+ /* du stats */
+ uint32_t du_refresh_interval_sec;
+ gf_lock_t du_refresh_lock;
};
typedef struct dht_conf dht_conf_t;
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 1eb9e63c531..1b20dabc61f 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -153,19 +153,25 @@ dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
call_frame_t *statfs_frame = NULL;
dht_local_t *statfs_local = NULL;
struct timeval tv = {0,};
+ struct timeval cmp_tv = {0,};
loc_t tmp_loc = {0,};
conf = this->private;
+ /* Somebody else is already refreshing the statfs info */
+ if (TRY_LOCK (&conf->du_refresh_lock) != 0)
+ return 0;
+
gettimeofday (&tv, NULL);
+ cmp_tv = conf->last_stat_fetch;
+ cmp_tv.tv_sec += conf->du_refresh_interval_sec;
+
/* make it root gfid, should be enough to get the proper
info back */
tmp_loc.gfid[15] = 1;
- if (tv.tv_sec > (conf->refresh_interval
- + conf->last_stat_fetch.tv_sec)) {
-
+ if (timercmp (&tv, &cmp_tv, >)) {
statfs_frame = copy_frame (frame);
if (!statfs_frame) {
goto err;
@@ -200,14 +206,18 @@ dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
&tmp_loc, statfs_local->params);
}
- conf->last_stat_fetch.tv_sec = tv.tv_sec;
+ conf->last_stat_fetch = tv;
}
- return 0;
+ ret = 0;
+ goto out;
err:
if (statfs_frame)
DHT_STACK_DESTROY (statfs_frame);
- return -1;
+ ret = -1;
+out:
+ UNLOCK (&conf->du_refresh_lock);
+ return ret;
}
@@ -223,8 +233,13 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
conf = this->private;
/* Check for values above specified percent or free disk */
- LOCK (&conf->subvolume_lock);
- {
+ if (TRY_LOCK (&conf->subvolume_lock) != 0) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ return conf->du_stats[i].is_full;
+ }
+ }
+ } else {
for (i = 0; i < conf->subvolume_cnt; i++) {
if (subvol == conf->subvolumes[i]) {
if (conf->disk_unit == 'p') {
@@ -248,7 +263,15 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
}
}
}
- }
+
+ /* i will be less than subvolume_cnt if either of
+ * these booleans are true */
+ is_subvol_filled = (
+ subvol_filled_space || subvol_filled_inodes);
+ if (is_subvol_filled) {
+ conf->du_stats[i].is_full = is_subvol_filled;
+ }
+ }
UNLOCK (&conf->subvolume_lock);
if (subvol_filled_space && conf->subvolume_status[i]) {
@@ -273,8 +296,6 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
}
}
- is_subvol_filled = (subvol_filled_space || subvol_filled_inodes);
-
return is_subvol_filled;
}
@@ -309,15 +330,8 @@ dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol,
LOCK (&conf->subvolume_lock);
{
- avail_subvol = dht_subvol_with_free_space_inodes(this, subvol,
+ avail_subvol = dht_subvol_maxspace_nonzeroinode(this, subvol,
layout);
- if(!avail_subvol)
- {
- avail_subvol = dht_subvol_maxspace_nonzeroinode(this,
- subvol,
- layout);
- }
-
}
UNLOCK (&conf->subvolume_lock);
out:
@@ -325,7 +339,6 @@ out:
gf_msg_debug (this->name, 0,
"No subvolume has enough free space \
and/or inodes to create");
- avail_subvol = subvol;
}
if (layout)
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index 8abf0d59b88..ac0f0e186fa 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -104,10 +104,15 @@ dht_open (call_frame_t *frame, xlator_t *this,
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ conf = this->private;
+
+ if (conf->min_free_strict_mode == _gf_true)
+ dht_get_du_info (frame, this, loc);
local = dht_local_init (frame, loc, fd, GF_FOP_OPEN);
if (!local) {
@@ -121,6 +126,11 @@ dht_open (call_frame_t *frame, xlator_t *this,
"no cached subvolume for fd=%p", fd);
op_errno = EINVAL;
goto err;
+ } else if (conf->min_free_strict_mode == _gf_true &&
+ dht_is_subvol_filled (this, subvol) == _gf_true &&
+ flags & O_APPEND) {
+ op_errno = ENOSPC;
+ goto err;
}
local->rebalance.flags = flags;
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
index 112685b659e..7420461da76 100644
--- a/xlators/cluster/dht/src/dht-inode-write.c
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -161,11 +161,16 @@ dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
xlator_t *subvol = NULL;
int op_errno = -1;
dht_local_t *local = NULL;
+ loc_t *nil_loc = {0,};
+ dht_conf_t *conf = NULL;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
VALIDATE_OR_GOTO (fd, err);
+ conf = this->private;
+
+
local = dht_local_init (frame, NULL, fd, GF_FOP_WRITE);
if (!local) {
@@ -173,15 +178,21 @@ dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
goto err;
}
+ if (conf->min_free_strict_mode == _gf_true)
+ dht_get_du_info (frame, this, nil_loc);
+
subvol = local->cached_subvol;
if (!subvol) {
gf_msg_debug (this->name, 0,
"no cached subvolume for fd=%p", fd);
op_errno = EINVAL;
goto err;
+ } else if (conf->min_free_strict_mode == _gf_true &&
+ dht_is_subvol_filled (this, subvol) == _gf_true) {
+ op_errno = ENOSPC;
+ goto err;
}
-
local->rebalance.vector = iov_dup (vector, count);
local->rebalance.offset = off;
local->rebalance.count = count;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 127996ecf61..ebc8a9c2492 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -20,7 +20,7 @@
#define GF_DISK_SECTOR_SIZE 512
#define DHT_REBALANCE_PID 4242 /* Change it if required */
-#define DHT_REBALANCE_BLKSIZE (128 * 1024)
+#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */
#define MAX_MIGRATE_QUEUE_COUNT 500
#define MIN_MIGRATE_QUEUE_COUNT 200
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 5c810f0dc77..ccbf66b626d 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -443,6 +443,8 @@ dht_reconfigure (xlator_t *this, dict_t *options)
conf->disk_unit = 0;
if (conf->min_free_disk < 100.0)
conf->disk_unit = 'p';
+ GF_OPTION_RECONF ("min-free-strict-mode", conf->min_free_strict_mode,
+ options, bool, out);
GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,
percent, out);
@@ -499,6 +501,9 @@ dht_reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("use-readdirp", conf->use_readdirp, options,
bool, out);
+
+ GF_OPTION_RECONF ("du-refresh-interval-sec",
+ conf->du_refresh_interval_sec, options, uint32, out);
ret = 0;
out:
return ret;
@@ -720,7 +725,10 @@ dht_init (xlator_t *this)
GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err);
GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size,
- err);
+ err);
+
+ GF_OPTION_INIT ("min-free-strict-mode", conf->min_free_strict_mode,
+ bool, err);
GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,
err);
@@ -738,6 +746,11 @@ dht_init (xlator_t *this)
GF_OPTION_INIT ("lock-migration", conf->lock_migration_enabled,
bool, err);
+ GF_OPTION_INIT ("du-refresh-interval-sec",
+ conf->du_refresh_interval_sec, uint32, err);
+
+ LOCK_INIT (&conf->du_refresh_lock);
+
if (defrag) {
defrag->lock_migration_enabled = conf->lock_migration_enabled;
@@ -907,6 +920,14 @@ struct volume_options options[] = {
"process starts balancing out the cluster, and logs will appear "
"in log files",
},
+ { .key = {"min-free-strict-mode"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "When enabled, will reject in-flight writes or "
+ "append operations to files when the target subvolume falls "
+ "below min-free-(disk|inodes). When disabled, these are allowed "
+ "through and only new files will be affected.",
+ },
{ .key = {"min-free-inodes"},
.type = GF_OPTION_TYPE_PERCENT,
.default_value = "5%",
@@ -1089,5 +1110,14 @@ struct volume_options options[] = {
" associated with a file during rebalance"
},
+ { .key = {"du-refresh-interval-sec"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .default_value = "60",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Specifies how many seconds before subvolume statfs "
+ "info is re-validated."
+ },
+
{ .key = {NULL} },
};
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index 56e17d6e884..996faffa37f 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -325,7 +325,10 @@ nufa_create (call_frame_t *frame, xlator_t *this,
local);
}
- if (subvol != avail_subvol) {
+ if (!avail_subvol) {
+ op_errno = ENOSPC;
+ goto err;
+ } else if (subvol != avail_subvol) {
/* create a link file instead of actual file */
local->params = dict_ref (params);
local->mode = mode;
@@ -430,7 +433,10 @@ nufa_mknod (call_frame_t *frame, xlator_t *this,
local);
}
- if (avail_subvol != subvol) {
+ if (!avail_subvol) {
+ op_errno = ENOSPC;
+ goto err;
+ } else if (avail_subvol != subvol) {
/* Create linkfile first */
local->params = dict_ref (params);
diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c
index f1e9a399442..8b14ac99b8f 100644
--- a/xlators/cluster/dht/src/switch.c
+++ b/xlators/cluster/dht/src/switch.c
@@ -440,7 +440,10 @@ switch_create (call_frame_t *frame, xlator_t *this,
local);
}
- if (subvol != avail_subvol) {
+ if (!avail_subvol) {
+ op_errno = ENOSPC;
+ goto err;
+ } else if (subvol != avail_subvol) {
/* create a link file instead of actual file */
local->mode = mode;
local->flags = flags;
@@ -540,7 +543,10 @@ switch_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
local);
}
- if (avail_subvol != subvol) {
+ if (!avail_subvol) {
+ op_errno = ENOSPC;
+ goto err;
+ } else if (avail_subvol != subvol) {
/* Create linkfile first */
local->params = dict_ref (params);