diff options
Diffstat (limited to 'xlators/cluster/dht/src')
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 41 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 6 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-diskusage.c | 53 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-inode-read.c | 10 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-inode-write.c | 13 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-rebalance.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-shared.c | 32 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/nufa.c | 10 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/switch.c | 10 |
9 files changed, 140 insertions, 37 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index a9714b02b79..a97d03bb055 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -5559,6 +5559,7 @@ dht_mknod_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this, { dht_local_t *local = NULL; xlator_t *avail_subvol = NULL; + int op_errno = 0; local = frame->local; @@ -5571,9 +5572,15 @@ dht_mknod_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this, subvol, subvol->fops->mknod, loc, mode, rdev, umask, params); } else { - avail_subvol = dht_free_disk_available_subvol (this, subvol, local); - - if (avail_subvol != subvol) { + /* This will return NULL if all subvolumes are full + * and/or no subvolume needs the min_free_disk limit + */ + avail_subvol = dht_free_disk_available_subvol (this, subvol, + local); + if (!avail_subvol) { + op_errno = ENOSPC; + goto err; + } else if (avail_subvol != subvol) { local->params = dict_ref (params); local->rdev = rdev; local->mode = mode; @@ -5603,6 +5610,8 @@ dht_mknod_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this, } out: return 0; +err: + return op_errno; } int32_t @@ -6242,8 +6251,12 @@ dht_mknod (call_frame_t *frame, xlator_t *this, } } - dht_mknod_wind_to_avail_subvol (frame, this, subvol, loc, rdev, mode, - umask, params); + op_errno = dht_mknod_wind_to_avail_subvol (frame, this, subvol, loc, + rdev, mode, umask, + params); + if (op_errno != 0) { + goto err; + } done: return 0; @@ -6738,6 +6751,7 @@ dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this, { dht_local_t *local = NULL; xlator_t *avail_subvol = NULL; + int op_errno = 0; local = frame->local; @@ -6752,8 +6766,10 @@ dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this, } else { avail_subvol = dht_free_disk_available_subvol (this, subvol, local); - - if (avail_subvol != subvol) { + if (!avail_subvol) { + op_errno = ENOSPC; + goto err; + } else if (avail_subvol != subvol) { local->params = dict_ref (params); local->flags = flags; local->mode = mode; @@ -6780,6 +6796,10 @@ dht_create_wind_to_avail_subvol (call_frame_t *frame, xlator_t *this, } out: return 0; +err: + DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, + NULL, NULL, NULL); + return op_errno; } int @@ -6882,9 +6902,10 @@ dht_create_do (call_frame_t *frame) goto err; } - dht_create_wind_to_avail_subvol (frame, this, subvol, &local->loc, - local->flags, local->mode, - local->umask, local->fd, local->params); + dht_create_wind_to_avail_subvol (frame, this, subvol, + &local->loc, local->flags, + local->mode, local->umask, + local->fd, local->params); return 0; err: local->refresh_layout_unlock (frame, this, -1, 1); diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 9e9ca712417..613a9d39816 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -300,6 +300,7 @@ struct dht_du { uint64_t avail_space; uint32_t log; uint32_t chunks; + gf_boolean_t is_full; }; typedef struct dht_du dht_du_t; @@ -484,6 +485,7 @@ struct dht_conf { dht_du_t *du_stats; double min_free_disk; double min_free_inodes; + gf_boolean_t min_free_strict_mode; char disk_unit; int32_t refresh_interval; gf_boolean_t unhashed_sticky_bit; @@ -549,6 +551,10 @@ struct dht_conf { gf_boolean_t lock_migration_enabled; gf_lock_t lock; + + /* du stats */ + uint32_t du_refresh_interval_sec; + gf_lock_t du_refresh_lock; }; typedef struct dht_conf dht_conf_t; diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c index 1eb9e63c531..1b20dabc61f 100644 --- a/xlators/cluster/dht/src/dht-diskusage.c +++ b/xlators/cluster/dht/src/dht-diskusage.c @@ -153,19 +153,25 @@ dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc) call_frame_t *statfs_frame = NULL; dht_local_t *statfs_local = NULL; struct timeval tv = {0,}; + struct timeval cmp_tv = {0,}; loc_t tmp_loc = {0,}; conf = this->private; + /* Somebody else is already refreshing the statfs info */ + if (TRY_LOCK (&conf->du_refresh_lock) != 0) + return 0; + gettimeofday (&tv, NULL); + cmp_tv = conf->last_stat_fetch; + cmp_tv.tv_sec += conf->du_refresh_interval_sec; + /* make it root gfid, should be enough to get the proper info back */ tmp_loc.gfid[15] = 1; - if (tv.tv_sec > (conf->refresh_interval - + conf->last_stat_fetch.tv_sec)) { - + if (timercmp (&tv, &cmp_tv, >)) { statfs_frame = copy_frame (frame); if (!statfs_frame) { goto err; @@ -200,14 +206,18 @@ dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc) &tmp_loc, statfs_local->params); } - conf->last_stat_fetch.tv_sec = tv.tv_sec; + conf->last_stat_fetch = tv; } - return 0; + ret = 0; + goto out; err: if (statfs_frame) DHT_STACK_DESTROY (statfs_frame); - return -1; + ret = -1; +out: + UNLOCK (&conf->du_refresh_lock); + return ret; } @@ -223,8 +233,13 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol) conf = this->private; /* Check for values above specified percent or free disk */ - LOCK (&conf->subvolume_lock); - { + if (TRY_LOCK (&conf->subvolume_lock) != 0) { + for (i = 0; i < conf->subvolume_cnt; i++) { + if (subvol == conf->subvolumes[i]) { + return conf->du_stats[i].is_full; + } + } + } else { for (i = 0; i < conf->subvolume_cnt; i++) { if (subvol == conf->subvolumes[i]) { if (conf->disk_unit == 'p') { @@ -248,7 +263,15 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol) } } } - } + + /* i will be less than subvolume_cnt if either of + * these booleans are true */ + is_subvol_filled = ( + subvol_filled_space || subvol_filled_inodes); + if (is_subvol_filled) { + conf->du_stats[i].is_full = is_subvol_filled; + } + } UNLOCK (&conf->subvolume_lock); if (subvol_filled_space && conf->subvolume_status[i]) { @@ -273,8 +296,6 @@ dht_is_subvol_filled (xlator_t *this, xlator_t *subvol) } } - is_subvol_filled = (subvol_filled_space || subvol_filled_inodes); - return is_subvol_filled; } @@ -309,15 +330,8 @@ dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol, LOCK (&conf->subvolume_lock); { - avail_subvol = dht_subvol_with_free_space_inodes(this, subvol, + avail_subvol = dht_subvol_maxspace_nonzeroinode(this, subvol, layout); - if(!avail_subvol) - { - avail_subvol = dht_subvol_maxspace_nonzeroinode(this, - subvol, - layout); - } - } UNLOCK (&conf->subvolume_lock); out: @@ -325,7 +339,6 @@ out: gf_msg_debug (this->name, 0, "No subvolume has enough free space \ and/or inodes to create"); - avail_subvol = subvol; } if (layout) diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c index 8abf0d59b88..ac0f0e186fa 100644 --- a/xlators/cluster/dht/src/dht-inode-read.c +++ b/xlators/cluster/dht/src/dht-inode-read.c @@ -104,10 +104,15 @@ dht_open (call_frame_t *frame, xlator_t *this, xlator_t *subvol = NULL; int op_errno = -1; dht_local_t *local = NULL; + dht_conf_t *conf = NULL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); + conf = this->private; + + if (conf->min_free_strict_mode == _gf_true) + dht_get_du_info (frame, this, loc); local = dht_local_init (frame, loc, fd, GF_FOP_OPEN); if (!local) { @@ -121,6 +126,11 @@ dht_open (call_frame_t *frame, xlator_t *this, "no cached subvolume for fd=%p", fd); op_errno = EINVAL; goto err; + } else if (conf->min_free_strict_mode == _gf_true && + dht_is_subvol_filled (this, subvol) == _gf_true && + flags & O_APPEND) { + op_errno = ENOSPC; + goto err; } local->rebalance.flags = flags; diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index 112685b659e..7420461da76 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -161,11 +161,16 @@ dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, xlator_t *subvol = NULL; int op_errno = -1; dht_local_t *local = NULL; + loc_t *nil_loc = {0,}; + dht_conf_t *conf = NULL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); VALIDATE_OR_GOTO (fd, err); + conf = this->private; + + local = dht_local_init (frame, NULL, fd, GF_FOP_WRITE); if (!local) { @@ -173,15 +178,21 @@ dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, goto err; } + if (conf->min_free_strict_mode == _gf_true) + dht_get_du_info (frame, this, nil_loc); + subvol = local->cached_subvol; if (!subvol) { gf_msg_debug (this->name, 0, "no cached subvolume for fd=%p", fd); op_errno = EINVAL; goto err; + } else if (conf->min_free_strict_mode == _gf_true && + dht_is_subvol_filled (this, subvol) == _gf_true) { + op_errno = ENOSPC; + goto err; } - local->rebalance.vector = iov_dup (vector, count); local->rebalance.offset = off; local->rebalance.count = count; diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c index 127996ecf61..ebc8a9c2492 100644 --- a/xlators/cluster/dht/src/dht-rebalance.c +++ b/xlators/cluster/dht/src/dht-rebalance.c @@ -20,7 +20,7 @@ #define GF_DISK_SECTOR_SIZE 512 #define DHT_REBALANCE_PID 4242 /* Change it if required */ -#define DHT_REBALANCE_BLKSIZE (128 * 1024) +#define DHT_REBALANCE_BLKSIZE (1024 * 1024) /* 1 MB */ #define MAX_MIGRATE_QUEUE_COUNT 500 #define MIN_MIGRATE_QUEUE_COUNT 200 diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c index 5c810f0dc77..ccbf66b626d 100644 --- a/xlators/cluster/dht/src/dht-shared.c +++ b/xlators/cluster/dht/src/dht-shared.c @@ -443,6 +443,8 @@ dht_reconfigure (xlator_t *this, dict_t *options) conf->disk_unit = 0; if (conf->min_free_disk < 100.0) conf->disk_unit = 'p'; + GF_OPTION_RECONF ("min-free-strict-mode", conf->min_free_strict_mode, + options, bool, out); GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options, percent, out); @@ -499,6 +501,9 @@ dht_reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("use-readdirp", conf->use_readdirp, options, bool, out); + + GF_OPTION_RECONF ("du-refresh-interval-sec", + conf->du_refresh_interval_sec, options, uint32, out); ret = 0; out: return ret; @@ -720,7 +725,10 @@ dht_init (xlator_t *this) GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err); GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size, - err); + err); + + GF_OPTION_INIT ("min-free-strict-mode", conf->min_free_strict_mode, + bool, err); GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent, err); @@ -738,6 +746,11 @@ dht_init (xlator_t *this) GF_OPTION_INIT ("lock-migration", conf->lock_migration_enabled, bool, err); + GF_OPTION_INIT ("du-refresh-interval-sec", + conf->du_refresh_interval_sec, uint32, err); + + LOCK_INIT (&conf->du_refresh_lock); + if (defrag) { defrag->lock_migration_enabled = conf->lock_migration_enabled; @@ -907,6 +920,14 @@ struct volume_options options[] = { "process starts balancing out the cluster, and logs will appear " "in log files", }, + { .key = {"min-free-strict-mode"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "When enabled, will reject in-flight writes or " + "append operations to files when the target subvolume falls " + "below min-free-(disk|inodes). When disabled, these are allowed " + "through and only new files will be affected.", + }, { .key = {"min-free-inodes"}, .type = GF_OPTION_TYPE_PERCENT, .default_value = "5%", @@ -1089,5 +1110,14 @@ struct volume_options options[] = { " associated with a file during rebalance" }, + { .key = {"du-refresh-interval-sec"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .default_value = "60", + .validate = GF_OPT_VALIDATE_MIN, + .description = "Specifies how many seconds before subvolume statfs " + "info is re-validated." + }, + { .key = {NULL} }, }; diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c index 56e17d6e884..996faffa37f 100644 --- a/xlators/cluster/dht/src/nufa.c +++ b/xlators/cluster/dht/src/nufa.c @@ -325,7 +325,10 @@ nufa_create (call_frame_t *frame, xlator_t *this, local); } - if (subvol != avail_subvol) { + if (!avail_subvol) { + op_errno = ENOSPC; + goto err; + } else if (subvol != avail_subvol) { /* create a link file instead of actual file */ local->params = dict_ref (params); local->mode = mode; @@ -430,7 +433,10 @@ nufa_mknod (call_frame_t *frame, xlator_t *this, local); } - if (avail_subvol != subvol) { + if (!avail_subvol) { + op_errno = ENOSPC; + goto err; + } else if (avail_subvol != subvol) { /* Create linkfile first */ local->params = dict_ref (params); diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c index f1e9a399442..8b14ac99b8f 100644 --- a/xlators/cluster/dht/src/switch.c +++ b/xlators/cluster/dht/src/switch.c @@ -440,7 +440,10 @@ switch_create (call_frame_t *frame, xlator_t *this, local); } - if (subvol != avail_subvol) { + if (!avail_subvol) { + op_errno = ENOSPC; + goto err; + } else if (subvol != avail_subvol) { /* create a link file instead of actual file */ local->mode = mode; local->flags = flags; @@ -540,7 +543,10 @@ switch_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, local); } - if (avail_subvol != subvol) { + if (!avail_subvol) { + op_errno = ENOSPC; + goto err; + } else if (avail_subvol != subvol) { /* Create linkfile first */ local->params = dict_ref (params); |
