summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorAnand Avati <avati@redhat.com>2013-03-27 19:55:58 -0700
committerAnand Avati <avati@redhat.com>2013-03-27 22:16:12 -0700
commit8909c28c1173e10fd2f10706bd8a0f2ca5b5d685 (patch)
treeec51c2507b2c50ca8bb01453c9645d49b38ee88e /xlators
parentca10fdc81a72a71ac67ac9fc8c5ad5b92febd875 (diff)
cluster/afr: fsync() guarantees POST-OP completion
AFR now provides a stronger guarantee that fsync() returns only after completely finishing all the deferred/delayed POST-OP on that open file. To acheive this we make a stub out of the returning fsync and register it with the "delayed" frame in afr_changelog_wake_resume(). The delayed frame, after getting woken up and finishing the POST-OP will call_resume() the registered stub (which UNWINDs the fsync) at the time of frame destruction. This provides a guarantee that an application's (or FUSE) fsync() returns only after finishing up all the previous transactions, including delayed POST-OPs and UNLOCK. Change-Id: Iaa955457e2f25088a144fde37ad0444277b5cf49 BUG: 927146 Signed-off-by: Anand Avati <avati@redhat.com> Reviewed-on: http://review.gluster.org/4737 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/cluster/afr/src/afr-common.c29
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c30
-rw-r--r--xlators/cluster/afr/src/afr.h7
3 files changed, 56 insertions, 10 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 5b96a7789f9..c55adc1ee9c 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -852,6 +852,11 @@ afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
loc_wipe (&local->transaction.new_parent_loc);
GF_FREE (local->transaction.postop_piggybacked);
+
+ if (local->transaction.resume_stub) {
+ call_resume (local->transaction.resume_stub);
+ local->transaction.resume_stub = NULL;
+ }
}
@@ -2643,6 +2648,7 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int call_count = -1;
int child_index = (long) cookie;
int read_child = 0;
+ call_stub_t *stub = NULL;
local = frame->local;
@@ -2677,18 +2683,29 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ /* Make a stub out of the frame, and register it
+ with the waking up post-op. When the call-stub resumes,
+ we are guaranteed that there was no post-op pending
+ (i.e changelogs were unset in the server). This is an
+ essential "guarantee", that fsync() returns only after
+ completely finishing EVERYTHING, including the delayed
+ post-op. This guarantee is expected by FUSE graph switching
+ for example.
+ */
+ stub = fop_fsync_cbk_stub (frame, default_fsync_cbk, op_ret,
+ op_errno, prebuf, postbuf, xdata);
+ if (!stub) {
+ AFR_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
/* If no new unstable writes happened between the
time we cleared the unstable write witness flag in afr_fsync
and now, calling afr_delayed_changelog_wake_up() should
wake up and skip over the fsync phase and go straight to
afr_changelog_post_op_now()
*/
- afr_delayed_changelog_wake_up (this, local->fd);
-
- AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno,
- &local->cont.fsync.prebuf,
- &local->cont.fsync.postbuf,
- NULL);
+ afr_delayed_changelog_wake_resume (this, local->fd, stub);
}
return 0;
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 76697f06b5c..f632a2b701a 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -1339,7 +1339,8 @@ out:
void
-afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd);
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
+ call_stub_t *stub);
void
afr_delayed_changelog_wake_up_cbk (void *data)
@@ -1579,12 +1580,14 @@ afr_changelog_post_op_safe (call_frame_t *frame, xlator_t *this)
void
-afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd)
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd,
+ call_stub_t *stub)
{
afr_fd_ctx_t *fd_ctx = NULL;
call_frame_t *prev_frame = NULL;
struct timeval delta = {0, };
afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
priv = this->private;
@@ -1613,7 +1616,11 @@ unlock:
pthread_mutex_unlock (&fd_ctx->delay_lock);
if (prev_frame) {
+ local = prev_frame->local;
+ local->transaction.resume_stub = stub;
afr_changelog_post_op_safe (prev_frame, this);
+ } else {
+ call_resume (stub);
}
}
@@ -1626,16 +1633,31 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
local = frame->local;
if (is_afr_delayed_changelog_post_op_needed (frame, this))
- afr_delayed_changelog_post_op (this, frame, local->fd);
+ afr_delayed_changelog_post_op (this, frame, local->fd, NULL);
else
afr_changelog_post_op_safe (frame, this);
}
+
+/* Wake up the sleeping/delayed post-op, and also register
+ a stub to have it resumed after this transaction
+ completely finishes.
+
+ The @stub gets saved in @local and gets resumed in
+ afr_local_cleanup()
+*/
+void
+afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd, stub);
+}
+
+
void
afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
{
- afr_delayed_changelog_post_op (this, NULL, fd);
+ afr_delayed_changelog_post_op (this, NULL, fd, NULL);
}
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 5d9f752b95a..878dbd7ba9d 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -673,6 +673,10 @@ typedef struct _afr_local {
*/
int *postop_piggybacked;
+ /* stub to resume on destruction
+ of the transaction frame */
+ call_stub_t *resume_stub;
+
int32_t **txn_changelog;//changelog after pre+post ops
unsigned char *pre_op;
@@ -1102,4 +1106,7 @@ afr_fd_report_unstable_write (xlator_t *this, fd_t *fd);
gf_boolean_t
afr_fd_has_witnessed_unstable_write (xlator_t *this, fd_t *fd);
+void
+afr_delayed_changelog_wake_resume (xlator_t *this, fd_t *fd, call_stub_t *stub);
+
#endif /* __AFR_H__ */