diff options
author | Anand Avati <avati@redhat.com> | 2013-07-19 08:31:41 -0700 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2013-07-23 06:11:12 -0700 |
commit | 37ac6bdca826046cbcb0d50727af29baf9407950 (patch) | |
tree | b899eb81a70c7719c5d7e4328697cc314da24b97 /xlators/storage/posix/src/posix.c | |
parent | cee1f9b5c7917bba220f1156b342bf07cac4ad38 (diff) |
storage/posix: implement batched fsync in a single thread
Because of the extra fsync()s issued by AFR transaction, they
could potentially "clog" all the io-threads denying unrelated
operations from making progress.
This patch assigns a dedicated thread to issues fsyncs, as
an experimental feature to understand performance characteristics
with the approach.
As a basis, incoming individual fsync requests are grouped into
batches, falling in the same @batch-fsync-delay-usec window of
time. These windows can extend in practice, as processing of
the previous batch can take longer than @batch-fsync-delay-usec
while new requests are getting batched.
The feature support three modes (similar to the -S modes of fs_mark)
- syncfs: In this mode one syncfs() is issued per batch, instead
of N fsync()s (one per file.)
- syncfs-single-fsync: In this mode one syncfs() is issued per
batch (which, on Linux, guarantees the completion of write-out
of dirty pages in the filesystem up to that point) and one single
fsync() to synchronize or flush the controller/drive cache. This
corresponds to -S 2 of fsmark.
- syncfs-reverse-fsync: In this mode, one syncfs() is issued per
batch, and all the open files in that batch are fsync()'ed in
the reverse order of the queue. This corresponds to -S 4 of
fsmark.
- reverse-fsync: In this mode, no syncfs() is issued and all the
files in the batch are fsync()'ed in the reverse order. This
corresponds to -S 3 of fsmark.
Change-Id: Ia1e170a810c780c8d80e02cf910accc4170c4cd4
BUG: 927146
Signed-off-by: Anand Avati <avati@redhat.com>
Reviewed-on: http://review.gluster.org/4746
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/storage/posix/src/posix.c')
-rw-r--r-- | xlators/storage/posix/src/posix.c | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index fee2ce9d77c..e917766b5fe 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -2417,6 +2417,33 @@ out: } +int +posix_batch_fsync (call_frame_t *frame, xlator_t *this, + fd_t *fd, int datasync, dict_t *xdata) +{ + call_stub_t *stub = NULL; + struct posix_private *priv = NULL; + + priv = this->private; + + stub = fop_fsync_stub (frame, default_fsync, fd, datasync, xdata); + if (!stub) { + STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0); + return 0; + } + + pthread_mutex_lock (&priv->fsync_mutex); + { + list_add_tail (&stub->list, &priv->fsyncs); + priv->fsync_queue_count++; + pthread_cond_signal (&priv->fsync_cond); + } + pthread_mutex_unlock (&priv->fsync_mutex); + + return 0; +} + + int32_t posix_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, dict_t *xdata) @@ -2428,6 +2455,7 @@ posix_fsync (call_frame_t *frame, xlator_t *this, int ret = -1; struct iatt preop = {0,}; struct iatt postop = {0,}; + struct posix_private *priv = NULL; DECLARE_OLD_FS_ID_VAR; @@ -2443,6 +2471,12 @@ posix_fsync (call_frame_t *frame, xlator_t *this, goto out; #endif + priv = this->private; + if (priv->batch_fsync_mode && xdata && dict_get (xdata, "batch-fsync")) { + posix_batch_fsync (frame, this, fd, datasync, xdata); + return 0; + } + ret = posix_fd_ctx_get (fd, this, &pfd); if (ret < 0) { op_errno = -ret; @@ -4303,6 +4337,27 @@ posix_set_owner (xlator_t *this, uid_t uid, gid_t gid) return ret; } + +static int +set_batch_fsync_mode (struct posix_private *priv, const char *str) +{ + if (strcmp (str, "none") == 0) + priv->batch_fsync_mode = BATCH_NONE; + else if (strcmp (str, "syncfs") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS; + else if (strcmp (str, "syncfs-single-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_SINGLE_FSYNC; + else if (strcmp (str, "syncfs-reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_SYNCFS_REVERSE_FSYNC; + else if (strcmp (str, "reverse-fsync") == 0) + priv->batch_fsync_mode = BATCH_REVERSE_FSYNC; + else + return -1; + + return 0; +} + + int reconfigure (xlator_t *this, dict_t *options) { @@ -4310,6 +4365,7 @@ reconfigure (xlator_t *this, dict_t *options) struct posix_private *priv = NULL; uid_t uid = -1; gid_t gid = -1; + char *batch_fsync_mode_str = NULL; priv = this->private; @@ -4317,6 +4373,18 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("brick-gid", gid, options, uint32, out); posix_set_owner (this, uid, gid); + GF_OPTION_RECONF ("batch-fsync-delay-usec", priv->batch_fsync_delay_usec, + options, uint32, out); + + GF_OPTION_RECONF ("batch-fsync-mode", batch_fsync_mode_str, + options, str, out); + + if (set_batch_fsync_mode (priv, batch_fsync_mode_str) != 0) { + gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s", + batch_fsync_mode_str); + goto out; + } + GF_OPTION_RECONF ("linux-aio", priv->aio_configured, options, bool, out); @@ -4368,6 +4436,7 @@ init (xlator_t *this) char *guuid = NULL; uid_t uid = -1; gid_t gid = -1; + char *batch_fsync_mode_str; dir_data = dict_get (this->options, "directory"); @@ -4720,6 +4789,28 @@ init (xlator_t *this) INIT_LIST_HEAD (&_private->janitor_fds); posix_spawn_janitor_thread (this); + + pthread_mutex_init (&_private->fsync_mutex, NULL); + pthread_cond_init (&_private->fsync_cond, NULL); + INIT_LIST_HEAD (&_private->fsyncs); + + ret = pthread_create (&_private->fsyncer, NULL, posix_fsyncer, this); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, "fsyncer thread" + " creation failed (%s)", strerror (errno)); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-mode", batch_fsync_mode_str, str, out); + + if (set_batch_fsync_mode (_private, batch_fsync_mode_str) != 0) { + gf_log (this->name, GF_LOG_ERROR, "Unknown mode string: %s", + batch_fsync_mode_str); + goto out; + } + + GF_OPTION_INIT ("batch-fsync-delay-usec", _private->batch_fsync_delay_usec, + uint32, out); out: return ret; } @@ -4849,5 +4940,25 @@ struct volume_options options[] = { .description = "Interval in seconds for a filesystem health check, " "set to 0 to disable" }, + { .key = {"batch-fsync-mode"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "reverse-fsync", + .description = "Possible values:\n" + "\t- syncfs: Perform one syncfs() on behalf oa batch" + "of fsyncs.\n" + "\t- syncfs-single-fsync: Perform one syncfs() on behalf of a batch" + " of fsyncs and one fsync() per batch.\n" + "\t- syncfs-reverse-fsync: Preform one syncfs() on behalf of a batch" + " of fsyncs and fsync() each file in the batch in reverse order.\n" + " in reverse order.\n" + "\t- reverse-fsync: Perform fsync() of each file in the batch in" + " reverse order." + }, + { .key = {"batch-fsync-delay-usec"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "1000000", + .description = "Num of usecs to wait for aggregating fsync" + " requests", + }, { .key = {NULL} } }; |