diff options
author | Shreyas Siravara <sshreyas@fb.com> | 2017-09-07 15:45:55 -0700 |
---|---|---|
committer | Shreyas Siravara <sshreyas@fb.com> | 2017-09-07 23:04:37 +0000 |
commit | 9aca3f636b0fbf3122e6893c9771c08d7eb5c9f6 (patch) | |
tree | 0322388ba22ac12b84e1edaa8604eef55aa66d72 | |
parent | dbd30776f26e9c3c0ce1cf8ad66ee95fc1adf484 (diff) |
posix: fadvise filedescriptors POSIX_FADV_RANDOM to bypass kernel read-size bug
Summary:
- There is a known kernel bug that causes reads to disk to be limited by the RA setting in /sys/block/sd[a-z]/queue/read_ahead_kb.
- The workaround is to fadvise POSIX_FADV_RANDOM on file descriptors before reading.
- This is a port of D4585521 to 3.8
Test Plan: Still need to figure out a good test for this, other than simple inspection.
Reviewers: rwareing, kvigor
Reviewed By: kvigor
Change-Id: I4a307573da620d9a1955fb5f4e8cd67154e11ace
Reviewed-on: https://review.gluster.org/18229
Reviewed-by: Shreyas Siravara <sshreyas@fb.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 4 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix-messages.h | 8 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix.c | 24 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix.h | 1 |
4 files changed, 36 insertions, 1 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 317fa1f4eb9..a4a0096d17d 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -2575,6 +2575,10 @@ struct volopt_map_entry glusterd_volopt_map[] = { .voltype = "storage/posix", .op_version = 2, }, + { .key = "storage.fadvise-random", + .voltype = "storage/posix", + .op_version = 2, + }, { .key = "storage.bd-aio", .voltype = "storage/bd", .op_version = 3 diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h index ba6bf2c43ac..c2f8aa0c26a 100644 --- a/xlators/storage/posix/src/posix-messages.h +++ b/xlators/storage/posix/src/posix-messages.h @@ -944,6 +944,14 @@ * @recommendedaction * */ +#define P_MSG_FADV_FAILED (POSIX_COMP_BASE + 111) + +/*! + * @messageid + * @diagnosis + * @recommendedaction + * + */ /*------------*/ #define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages" diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index e56e71e8c27..4105bff323f 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -3130,6 +3130,7 @@ int32_t posix_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata) { + int32_t local_ret = 0; int32_t op_ret = -1; int32_t op_errno = 0; char *real_path = NULL; @@ -3172,9 +3173,21 @@ posix_open (call_frame_t *frame, xlator_t *this, if (_fd == -1) { op_ret = -1; op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FILE_OP_FAILED, + gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FILE_OP_FAILED , "open on %s, flags: %d", real_path, flags); goto out; + } else { + if (priv->fadvise_random) { + local_ret = posix_fadvise (_fd, 0, 0, + POSIX_FADV_RANDOM); + if (local_ret != 0) { + gf_msg (this->name, GF_LOG_WARNING, 0, + P_MSG_FADV_FAILED, + "Failed to fadvise " + "POSIX_FADV_RANDOM! " + "Error: %s", strerror (errno)); + } + } } pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd); @@ -7491,6 +7504,8 @@ init (xlator_t *this) GF_OPTION_INIT ("min-free-disk", _private->min_free_disk, percent_or_size, out); + GF_OPTION_INIT ("fadvise-random", _private->fadvise_random, bool, out); + pthread_mutex_init (&_private->freespace_check_lock, NULL); sys_statvfs (_private->base_path, &_private->freespace_stats); clock_gettime (CLOCK_MONOTONIC, &_private->freespace_check_last); @@ -7690,6 +7705,13 @@ struct volume_options options[] = { "used for the min-free-disk determination. " "Set to 0 to disable." }, + { + .key = {"fadvise-random"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "fadvise fd's with POSIX_FADV_RANDOM to bypass " + "read-ahead limits", + }, { .key = {NULL} } }; diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index a2e1201dd72..6dde604cc0c 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -182,6 +182,7 @@ struct posix_private { /* mutex protection ends. */ uint32_t freespace_check_interval; gf_boolean_t freespace_check_passed; + gf_boolean_t fadvise_random; }; typedef struct { |