summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShreyas Siravara <sshreyas@fb.com>2017-09-07 15:45:55 -0700
committerShreyas Siravara <sshreyas@fb.com>2017-09-07 23:04:37 +0000
commit9aca3f636b0fbf3122e6893c9771c08d7eb5c9f6 (patch)
tree0322388ba22ac12b84e1edaa8604eef55aa66d72
parentdbd30776f26e9c3c0ce1cf8ad66ee95fc1adf484 (diff)
posix: fadvise filedescriptors POSIX_FADV_RANDOM to bypass kernel read-size bug
Summary: - There is a known kernel bug that causes reads to disk to be limited by the RA setting in /sys/block/sd[a-z]/queue/read_ahead_kb. - The workaround is to fadvise POSIX_FADV_RANDOM on file descriptors before reading. - This is a port of D4585521 to 3.8 Test Plan: Still need to figure out a good test for this, other than simple inspection. Reviewers: rwareing, kvigor Reviewed By: kvigor Change-Id: I4a307573da620d9a1955fb5f4e8cd67154e11ace Reviewed-on: https://review.gluster.org/18229 Reviewed-by: Shreyas Siravara <sshreyas@fb.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Smoke: Gluster Build System <jenkins@build.gluster.org>
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c4
-rw-r--r--xlators/storage/posix/src/posix-messages.h8
-rw-r--r--xlators/storage/posix/src/posix.c24
-rw-r--r--xlators/storage/posix/src/posix.h1
4 files changed, 36 insertions, 1 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 317fa1f4eb9..a4a0096d17d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -2575,6 +2575,10 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.voltype = "storage/posix",
.op_version = 2,
},
+ { .key = "storage.fadvise-random",
+ .voltype = "storage/posix",
+ .op_version = 2,
+ },
{ .key = "storage.bd-aio",
.voltype = "storage/bd",
.op_version = 3
diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h
index ba6bf2c43ac..c2f8aa0c26a 100644
--- a/xlators/storage/posix/src/posix-messages.h
+++ b/xlators/storage/posix/src/posix-messages.h
@@ -944,6 +944,14 @@
* @recommendedaction
*
*/
+#define P_MSG_FADV_FAILED (POSIX_COMP_BASE + 111)
+
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
/*------------*/
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index e56e71e8c27..4105bff323f 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -3130,6 +3130,7 @@ int32_t
posix_open (call_frame_t *frame, xlator_t *this,
loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata)
{
+ int32_t local_ret = 0;
int32_t op_ret = -1;
int32_t op_errno = 0;
char *real_path = NULL;
@@ -3172,9 +3173,21 @@ posix_open (call_frame_t *frame, xlator_t *this,
if (_fd == -1) {
op_ret = -1;
op_errno = errno;
- gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FILE_OP_FAILED,
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_FILE_OP_FAILED ,
"open on %s, flags: %d", real_path, flags);
goto out;
+ } else {
+ if (priv->fadvise_random) {
+ local_ret = posix_fadvise (_fd, 0, 0,
+ POSIX_FADV_RANDOM);
+ if (local_ret != 0) {
+ gf_msg (this->name, GF_LOG_WARNING, 0,
+ P_MSG_FADV_FAILED,
+ "Failed to fadvise "
+ "POSIX_FADV_RANDOM! "
+ "Error: %s", strerror (errno));
+ }
+ }
}
pfd = GF_CALLOC (1, sizeof (*pfd), gf_posix_mt_posix_fd);
@@ -7491,6 +7504,8 @@ init (xlator_t *this)
GF_OPTION_INIT ("min-free-disk", _private->min_free_disk,
percent_or_size, out);
+ GF_OPTION_INIT ("fadvise-random", _private->fadvise_random, bool, out);
+
pthread_mutex_init (&_private->freespace_check_lock, NULL);
sys_statvfs (_private->base_path, &_private->freespace_stats);
clock_gettime (CLOCK_MONOTONIC, &_private->freespace_check_last);
@@ -7690,6 +7705,13 @@ struct volume_options options[] = {
"used for the min-free-disk determination. "
"Set to 0 to disable."
},
+ {
+ .key = {"fadvise-random"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "fadvise fd's with POSIX_FADV_RANDOM to bypass "
+ "read-ahead limits",
+ },
{ .key = {NULL} }
};
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index a2e1201dd72..6dde604cc0c 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -182,6 +182,7 @@ struct posix_private {
/* mutex protection ends. */
uint32_t freespace_check_interval;
gf_boolean_t freespace_check_passed;
+ gf_boolean_t fadvise_random;
};
typedef struct {