summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/dht
diff options
context:
space:
mode:
authorSusant Palai <spalai@redhat.com>2017-10-24 18:35:20 +0530
committerSusant Palai <spalai@redhat.com>2017-11-23 15:16:16 +0530
commit7ac43caefd1a723042362f49b18f6ba0d1b86d0f (patch)
tree5ef052849d12bba3ccc125216a4ab6b4cfb35bcc /xlators/cluster/dht
parentf1071f17e02502c24375c0b480d369d37f4e4054 (diff)
cluster/dht: make rebalance use truncate incase
.. the brick file system does not support fallocate. > Change-Id: Id76cda2d8bb3b223b779e5e7a34f17c8bfa6283c > BUG: 1488103 > Signed-off-by: Susant Palai <spalai@redhat.com> Change-Id: Id76cda2d8bb3b223b779e5e7a34f17c8bfa6283c BUG: 1516691 Signed-off-by: Susant Palai <spalai@redhat.com>
Diffstat (limited to 'xlators/cluster/dht')
-rw-r--r--xlators/cluster/dht/src/dht-common.h3
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c164
-rw-r--r--xlators/cluster/dht/src/dht-shared.c3
3 files changed, 99 insertions, 71 deletions
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index 60560600a02..06de2ef06f5 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -572,8 +572,6 @@ struct gf_defrag_info_ {
pthread_cond_t fc_wakeup_cond;
pthread_mutex_t fc_mutex;
-
-
};
typedef struct gf_defrag_info_ gf_defrag_info_t;
@@ -675,6 +673,7 @@ struct dht_conf {
/* Hard link handle requirement for migration triggered from client*/
synclock_t link_lock;
+ gf_boolean_t use_fallocate;
};
typedef struct dht_conf dht_conf_t;
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index ae367d75e27..72826e5ff32 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -820,18 +820,6 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from,
}
- /* TODO: Need to add a detailed comment about why we moved away from
- ftruncate.
-
- ret = syncop_ftruncate (to, fd, stbuf->ia_size, NULL, NULL);
- if (ret < 0) {
- *fop_errno = -ret;
- gf_msg (this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "ftruncate failed for %s on %s",
- loc->path, to->name);
- */
-
ret = syncop_fsetattr (to, fd, stbuf,
(GF_SET_ATTR_UID | GF_SET_ATTR_GID),
NULL, NULL, NULL, NULL);
@@ -843,18 +831,35 @@ __dht_rebalance_create_dst_file (xlator_t *this, xlator_t *to, xlator_t *from,
loc->path, to->name);
}
- /* Fallocate does not work for size 0, hence the check. Anyway we don't
- * need to care about min-free-disk for 0 byte size file */
+ /* No need to bother about 0 byte size files */
if (stbuf->ia_size > 0) {
- ret = syncop_fallocate (to, fd, 0, 0, stbuf->ia_size, NULL,
- NULL);
- if (ret < 0) {
- gf_msg (this->name, GF_LOG_ERROR, -ret,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "fallocate failed for %s on %s",
- loc->path, to->name);
- ret = -1;
- goto out;
+ if (conf->use_fallocate) {
+ ret = syncop_fallocate (to, fd, 0, 0, stbuf->ia_size,
+ NULL, NULL);
+ if (ret < 0) {
+ if (ret == -EOPNOTSUPP || ret == -EINVAL ||
+ ret == -ENOSYS) {
+ conf->use_fallocate = _gf_false;
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "fallocate failed for %s on %s",
+ loc->path, to->name);
+ *fop_errno = -ret;
+ goto out;
+ }
+ }
+ }
+
+ if (!conf->use_fallocate) {
+ ret = syncop_ftruncate (to, fd, stbuf->ia_size, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg (this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "ftruncate failed for %s on %s",
+ loc->path, to->name);
+ }
}
}
@@ -877,10 +882,10 @@ out:
}
static int
-__dht_check_free_space (xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc,
- struct iatt *stbuf, int flag, dht_conf_t *conf,
- gf_boolean_t *target_changed, xlator_t **new_subvol,
- int *fop_errno)
+__dht_check_free_space (xlator_t *this, xlator_t *to, xlator_t *from,
+ loc_t *loc, struct iatt *stbuf, int flag,
+ dht_conf_t *conf, gf_boolean_t *target_changed,
+ xlator_t **new_subvol, int *fop_errno)
{
struct statvfs src_statfs = {0,};
struct statvfs dst_statfs = {0,};
@@ -890,6 +895,8 @@ __dht_check_free_space (xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc
uint64_t src_statfs_blocks = 1;
uint64_t dst_statfs_blocks = 1;
double post_availspacepercent = 0;
+ uint64_t file_blocks = 0;
+ uint64_t dst_total_blocks = 0;
xdata = dict_new ();
if (!xdata) {
@@ -933,9 +940,21 @@ __dht_check_free_space (xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc
goto out;
}
- gf_msg_debug (this->name, 0, "min_free_disk - %f , block available - %lu ,"
- " block size - %lu ", conf->min_free_disk, dst_statfs.f_bavail,
- dst_statfs.f_bsize);
+ gf_msg_debug (this->name, 0, "min_free_disk - %f , block available - "
+ "%lu , block size - %lu ", conf->min_free_disk,
+ dst_statfs.f_bavail, dst_statfs.f_bsize);
+
+ dst_statfs_blocks = dst_statfs.f_bavail *
+ (dst_statfs.f_frsize /
+ GF_DISK_SECTOR_SIZE);
+
+ src_statfs_blocks = src_statfs.f_bavail *
+ (src_statfs.f_frsize /
+ GF_DISK_SECTOR_SIZE);
+
+ dst_total_blocks = dst_statfs.f_blocks *
+ (dst_statfs.f_frsize /
+ GF_DISK_SECTOR_SIZE);
/* if force option is given, do not check for space @ dst.
* Check only if space is avail for the file */
@@ -950,24 +969,27 @@ __dht_check_free_space (xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc
the space could be scantily available.
*/
if (stbuf) {
- dst_statfs_blocks = ((dst_statfs.f_bavail *
- dst_statfs.f_bsize) /
- GF_DISK_SECTOR_SIZE);
- src_statfs_blocks = ((src_statfs.f_bavail *
- src_statfs.f_bsize) /
- GF_DISK_SECTOR_SIZE);
- if ((dst_statfs_blocks) <
- (src_statfs_blocks + stbuf->ia_blocks)) {
+ if (!conf->use_fallocate) {
+ file_blocks = stbuf->ia_size + GF_DISK_SECTOR_SIZE - 1;
+ file_blocks /= GF_DISK_SECTOR_SIZE;
+ if (file_blocks >= dst_statfs_blocks) {
+ dst_statfs_blocks = 0;
+ } else {
+ dst_statfs_blocks -= file_blocks;
+ }
+ }
+
+ if (dst_statfs_blocks <= src_statfs_blocks) {
gf_msg (this->name, GF_LOG_WARNING, 0,
- DHT_MSG_MIGRATE_FILE_FAILED,
- "data movement of file "
- "{blocks:%"PRIu64" name:(%s) } would result in "
- "dst node (%s:%"PRIu64") having lower disk "
- "space then the source node (%s:%"PRIu64")"
- ".Skipping file.", stbuf->ia_blocks, loc->path,
- to->name, dst_statfs_blocks, from->name,
- src_statfs_blocks);
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "data movement of file "
+ "{blocks:%"PRIu64" name:(%s) } would result in "
+ "dst node (%s:%"PRIu64") having lower disk "
+ "space then the source node (%s:%"PRIu64")"
+ ".Skipping file.", stbuf->ia_blocks, loc->path,
+ to->name, dst_statfs_blocks, from->name,
+ src_statfs_blocks);
/* this is not a 'failure', but we don't want to
consider this as 'success' too :-/ */
@@ -977,14 +999,15 @@ __dht_check_free_space (xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc
}
}
-
check_avail_space:
-
if (conf->disk_unit == 'p' && dst_statfs.f_blocks) {
- post_availspacepercent = (dst_statfs.f_bavail * 100) / dst_statfs.f_blocks;
- gf_msg_debug (this->name, 0, "file : %s, post_availspacepercent : %lf "
- "f_bavail : %lu min-free-disk: %lf", loc->path,
- post_availspacepercent, dst_statfs.f_bavail, conf->min_free_disk);
+ post_availspacepercent =
+ (dst_statfs_blocks * 100) / dst_total_blocks;
+
+ gf_msg_debug (this->name, 0, "file : %s, post_availspacepercent"
+ " : %lf f_bavail : %lu min-free-disk: %lf",
+ loc->path, post_availspacepercent,
+ dst_statfs.f_bavail, conf->min_free_disk);
if (post_availspacepercent < conf->min_free_disk) {
gf_msg (this->name, GF_LOG_WARNING, 0, 0,
@@ -999,22 +1022,27 @@ check_avail_space:
}
}
- if (conf->disk_unit != 'p' &&
- ((dst_statfs.f_bavail * dst_statfs.f_frsize) < conf->min_free_disk)) {
- gf_msg_debug (this->name, 0, "file : %s, destination frsize: %lu "
- "f_bavail : %lu min-free-disk: %lf", loc->path,
- dst_statfs.f_frsize, dst_statfs.f_bavail, conf->min_free_disk);
+ if (conf->disk_unit != 'p') {
+ if ((dst_statfs_blocks * GF_DISK_SECTOR_SIZE) <
+ conf->min_free_disk) {
+ gf_msg_debug (this->name, 0, "file : %s, destination "
+ "frsize: %lu f_bavail : %lu "
+ "min-free-disk: %lf", loc->path,
+ dst_statfs.f_frsize, dst_statfs.f_bavail,
+ conf->min_free_disk);
- gf_msg (this->name, GF_LOG_WARNING, 0, 0, "Write will cross "
- "min-free-disk for file - %s on subvol - %s. Looking "
- "for new subvol", loc->path, to->name);
+ gf_msg (this->name, GF_LOG_WARNING, 0, 0, "write will"
+ " cross min-free-disk for file - %s on subvol -"
+ " %s. looking for new subvol", loc->path,
+ to->name);
- goto find_new_subvol;
- } else {
- ret = 0;
- goto out;
- }
+ goto find_new_subvol;
+ } else {
+ ret = 0;
+ goto out;
+ }
+ }
find_new_subvol:
layout = dht_layout_get (this, loc->parent);
@@ -1030,10 +1058,10 @@ find_new_subvol:
if ((!(*new_subvol)) || (*new_subvol == from)) {
gf_msg (this->name, GF_LOG_WARNING, 0,
DHT_MSG_SUBVOL_INSUFF_SPACE, "Could not find any subvol"
- " with space accomodating the file - %s. Consider adding "
- "bricks", loc->path);
+ " with space accomodating the file - %s. Consider "
+ "adding bricks", loc->path);
- *target_changed = _gf_false;
+ *target_changed = _gf_false;
*fop_errno = ENOSPC;
ret = -1;
goto out;
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
index 0373ebffe5a..2c4523655f0 100644
--- a/xlators/cluster/dht/src/dht-shared.c
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -746,9 +746,10 @@ dht_init (xlator_t *this)
pthread_cond_init (&defrag->fc_wakeup_cond, 0);
defrag->global_error = 0;
-
}
+ conf->use_fallocate = 1;
+
conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
/* If option is not "auto", other options _should_ be boolean */