diff options
author | Anand Avati <avati@redhat.com> | 2013-03-27 19:55:58 -0700 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2013-03-27 22:16:12 -0700 |
commit | 8909c28c1173e10fd2f10706bd8a0f2ca5b5d685 (patch) | |
tree | ec51c2507b2c50ca8bb01453c9645d49b38ee88e /xlators | |
parent | ca10fdc81a72a71ac67ac9fc8c5ad5b92febd875 (diff) |
cluster/afr: fsync() guarantees POST-OP completion
AFR now provides a stronger guarantee that fsync() returns only
after completely finishing all the deferred/delayed POST-OP on that
open file.
To acheive this we make a stub out of the returning fsync and
register it with the "delayed" frame in afr_changelog_wake_resume().
The delayed frame, after getting woken up and finishing the POST-OP
will call_resume() the registered stub (which UNWINDs the fsync) at
the time of frame destruction.
This provides a guarantee that an application's (or FUSE) fsync()
returns only after finishing up all the previous transactions,
including delayed POST-OPs and UNLOCK.
Change-Id: Iaa955457e2f25088a144fde37ad0444277b5cf49
BUG: 927146
Signed-off-by: Anand Avati <avati@redhat.com>
Reviewed-on: http://review.gluster.org/4737
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 29 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 30 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 7 |
3 files changed, 56 insertions, 10 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 5b96a7789f9..c55adc1ee9c 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -852,6 +852,11 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this) loc_wipe (&local->transaction.new_parent_loc); GF_FREE (local->transaction.postop_piggybacked); + + if (local->transaction.resume_stub) { + call_resume (local->transaction.resume_stub); + local->transaction.resume_stub = NULL; + } } @@ -2643,6 +2648,7 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int call_count = -1; int child_index = (long) cookie; int read_child = 0; + call_stub_t *stub = NULL; local = frame->local; @@ -2677,18 +2683,29 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_count = afr_frame_return (frame); if (call_count == 0) { + /* Make a stub out of the frame, and register it + with the waking up post-op. When the call-stub resumes, + we are guaranteed that there was no post-op pending + (i.e changelogs were unset in the server). This is an + essential "guarantee", that fsync() returns only after + completely finishing EVERYTHING, including the delayed + post-op. This guarantee is expected by FUSE graph switching + for example. + */ + stub = fop_fsync_cbk_stub (frame, default_fsync_cbk, op_ret, + op_errno, prebuf, postbuf, xdata); + if (!stub) { + AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0); + return 0; + } + /* If no new unstable writes happened between the time we cleared the unstable write witness flag in afr_fsync and now, calling afr_delayed_changelog_wake_up() should wake up and skip over the fsync phase and go straight to afr_changelog_post_op_now() */ - afr_delayed_changelog_wake_up (this, local->fd); - - AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno, - &local->cont.fsync.prebuf, - &local->cont.fsync.postbuf, - NULL); + afr_delayed_changelog_wake_resume (this, local->fd, stub); } return 0; diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 76697f06b5c..f632a2b701a 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -1339,7 +1339,8 @@ out: void -afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd); +afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd, + call_stub_t *stub); void afr_delayed_changelog_wake_up_cbk (void *data) @@ -1579,12 +1580,14 @@ afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this) void -afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd) +afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd, + call_stub_t *stub) { afr_fd_ctx_t *fd_ctx = NULL; call_frame_t *prev_frame = NULL; struct timeval delta = {0, }; afr_private_t *priv = NULL; + afr_local_t *local = NULL; priv = this->private; @@ -1613,7 +1616,11 @@ unlock: pthread_mutex_unlock (&fd_ctx->delay_lock); if (prev_frame) { + local = prev_frame->local; + local->transaction.resume_stub = stub; afr_changelog_post_op_safe (prev_frame, this); + } else { + call_resume (stub); } } @@ -1626,16 +1633,31 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this) local = frame->local; if (is_afr_delayed_changelog_post_op_needed (frame, this)) - afr_delayed_changelog_post_op (this, frame, local->fd); + afr_delayed_changelog_post_op (this, frame, local->fd, NULL); else afr_changelog_post_op_safe (frame, this); } + +/* Wake up the sleeping/delayed post-op, and also register + a stub to have it resumed after this transaction + completely finishes. + + The @stub gets saved in @local and gets resumed in + afr_local_cleanup() +*/ +void +afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub) +{ + afr_delayed_changelog_post_op (this, NULL, fd, stub); +} + + void afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd) { - afr_delayed_changelog_post_op (this, NULL, fd); + afr_delayed_changelog_post_op (this, NULL, fd, NULL); } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 5d9f752b95a..878dbd7ba9d 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -673,6 +673,10 @@ typedef struct _afr_local { */ int *postop_piggybacked; + /* stub to resume on destruction + of the transaction frame */ + call_stub_t *resume_stub; + int32_t **txn_changelog;//changelog after pre+post ops unsigned char *pre_op; @@ -1102,4 +1106,7 @@ afr_fd_report_unstable_write (xlator_t *this, fd_t *fd); gf_boolean_t afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd); +void +afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub); + #endif /* __AFR_H__ */ |