From 5a3d849b8563067d35c1422e43e605bd9533f3c2 Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Tue, 26 Jun 2012 21:06:39 -0700 Subject: cluster/afr: post-op-delay support post-op-delay introduces an artificial delay between the OP and POST-OP-CHANGELOG phases of a write transaction to increase the probability of changelog-piggyback and eager-locking to work more efficiently. Also enable eager-locking by default. Change-Id: I865ca4b68512c44818719c7e388952f15d53e6c2 BUG: 836033 Signed-off-by: Anand Avati Reviewed-on: http://review.gluster.com/3621 Tested-by: Gluster Build System Reviewed-by: Pranith Kumar Karampuri --- xlators/cluster/afr/src/afr-common.c | 3 + xlators/cluster/afr/src/afr-lk-common.c | 2 + xlators/cluster/afr/src/afr-transaction.c | 147 +++++++++++++++++++++++++++++- xlators/cluster/afr/src/afr-transaction.h | 2 + xlators/cluster/afr/src/afr.c | 16 +++- xlators/cluster/afr/src/afr.h | 7 ++ 6 files changed, 175 insertions(+), 2 deletions(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 407e76c0e74..6787dfc27d5 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2400,6 +2400,7 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd) goto out; } + pthread_mutex_init (&fd_ctx->delay_lock, NULL); INIT_LIST_HEAD (&fd_ctx->paused_calls); INIT_LIST_HEAD (&fd_ctx->entries); @@ -2654,6 +2655,8 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd) if (fd_ctx->lock_acquired) GF_FREE (fd_ctx->lock_acquired); + pthread_mutex_destroy (&fd_ctx->delay_lock); + GF_FREE (fd_ctx); } diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index 555a9b9faf6..5e61be4d4df 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -1477,6 +1477,8 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this) piggyback = 0; local->transaction.eager_lock[i] = 1; + afr_set_delayed_post_op (frame, this); + LOCK (&local->fd->lock); { if (fd_ctx->lock_acquired[i]) { diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 0e9956d7bed..c4378d4451f 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -11,6 +11,7 @@ #include "dict.h" #include "byte-order.h" #include "common-utils.h" +#include "timer.h" #include "afr.h" #include "afr-transaction.h" @@ -523,7 +524,7 @@ afr_set_postop_dict (afr_local_t *local, xlator_t *this, dict_t *xattr, } int -afr_changelog_post_op (call_frame_t *frame, xlator_t *this) +afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this) { afr_private_t * priv = this->private; afr_internal_lock_t *int_lock = NULL; @@ -877,6 +878,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) } UNLOCK (&local->fd->lock); + afr_set_delayed_post_op (frame, this); + if (piggyback) afr_changelog_pre_op_cbk (frame, (void *)(long)i, this, 1, 0, xattr[i], @@ -1243,16 +1246,158 @@ afr_internal_lock_finish (call_frame_t *frame, xlator_t *this) } +void +afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + /* call this function from any of the related optimizations + which benefit from delaying post op are enabled, namely: + + - changelog piggybacking + - eager locking + */ + + priv = this->private; + if (!priv) + return; + + if (!priv->post_op_delay_secs) + return; + + local = frame->local; + if (!local) + return; + + if (!local->fd) + return; + + if (local->op == GF_FOP_WRITE) + local->delayed_post_op = _gf_true; +} + + +gf_boolean_t +is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + gf_boolean_t res = _gf_false; + + local = frame->local; + if (!local) + goto out; + + if (!local->delayed_post_op) + goto out; + + res = _gf_true; +out: + return res; +} + + +void +afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd); + +void +afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd); + +void +afr_delayed_changelog_wake_up_cbk (void *data) +{ + fd_t *fd = NULL; + + fd = data; + + afr_delayed_changelog_wake_up (THIS, fd); +} + + +void +afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd) +{ + afr_fd_ctx_t *fd_ctx = NULL; + call_frame_t *prev_frame = NULL; + struct timeval delta = {0, }; + afr_private_t *priv = NULL; + + priv = this->private; + + fd_ctx = afr_fd_ctx_get (fd, this); + if (!fd_ctx) + return; + + delta.tv_sec = priv->post_op_delay_secs; + delta.tv_usec = 0; + + pthread_mutex_lock (&fd_ctx->delay_lock); + { + prev_frame = fd_ctx->delay_frame; + fd_ctx->delay_frame = NULL; + if (fd_ctx->delay_timer) + gf_timer_call_cancel (this->ctx, fd_ctx->delay_timer); + fd_ctx->delay_timer = NULL; + if (!frame) + goto unlock; + fd_ctx->delay_timer = gf_timer_call_after (this->ctx, delta, + afr_delayed_changelog_wake_up_cbk, + fd); + fd_ctx->delay_frame = frame; + } +unlock: + pthread_mutex_unlock (&fd_ctx->delay_lock); + + if (prev_frame) { + afr_changelog_post_op_now (prev_frame, this); + } +} + + +void +afr_changelog_post_op (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + if (is_afr_delayed_changelog_post_op_needed (frame, this)) + afr_delayed_changelog_post_op (this, frame, local->fd); + else + afr_changelog_post_op_now (frame, this); +} + + +void +afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd) +{ + afr_delayed_changelog_post_op (this, NULL, fd); +} + + int afr_transaction_resume (call_frame_t *frame, xlator_t *this) { afr_internal_lock_t *int_lock = NULL; afr_local_t *local = NULL; afr_private_t *priv = NULL; + fd_t *fd = NULL; local = frame->local; int_lock = &local->internal_lock; priv = this->private; + fd = local->fd; + + if (fd) + /* The wake up needs to happen independent of + what type of fop arrives here. If it was + a write, then it has already inherited the + lock and changelog. If it was not a write, + then the presumption of the optimization (of + optimizing for successive write operations) + fails. + */ + afr_delayed_changelog_wake_up (this, fd); afr_restore_lk_owner (frame); diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index d19d16fb39e..102599633e3 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -31,4 +31,6 @@ afr_fd_ctx_get (fd_t *fd, xlator_t *this); int afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending, int child, afr_xattrop_type_t op); +void +afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this); #endif /* __TRANSACTION_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index cd84c357da8..108c82778db 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -183,6 +183,9 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options, int32, out); + GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options, + uint32, out); + /* Reset this so we re-discover in case the topology changed. */ priv->did_discovery = _gf_false; @@ -324,6 +327,8 @@ init (xlator_t *this) GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out); fix_quorum_options(this,priv,qtype); + GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out); + priv->wait_count = 1; priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count, @@ -617,7 +622,7 @@ struct volume_options options[] = { }, { .key = {"eager-lock"}, .type = GF_OPTION_TYPE_BOOL, - .default_value = "off", + .default_value = "on", }, { .key = {"self-heal-daemon"}, .type = GF_OPTION_TYPE_BOOL, @@ -657,5 +662,14 @@ struct volume_options options[] = { .default_value = "600", .description = "Poll timeout for checking the need to self-heal" }, + { .key = {"post-op-delay-secs"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = INT_MAX, + .default_value = "1", + .description = "Time interval induced artificially before " + "post-operation phase of the transaction to " + "enhance overlap of adjacent write operations.", + }, { .key = {NULL} }, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index e9867cb826d..abb95533449 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -151,6 +151,7 @@ typedef struct _afr_private { struct list_head saved_fds; /* list of fds on which locks have succeeded */ gf_boolean_t optimistic_change_log; gf_boolean_t eager_lock; + uint32_t post_op_delay_secs; unsigned int quorum_count; char vol_uuid[UUID_SIZE + 1]; @@ -413,6 +414,7 @@ typedef struct _afr_local { dict_t *dict; int optimistic_change_log; + gf_boolean_t delayed_post_op; gf_boolean_t fop_paused; int (*fop_call_continue) (call_frame_t *frame, xlator_t *this); @@ -733,6 +735,11 @@ typedef struct { unsigned char *locked_on; /* which subvolumes locks have been successful */ struct list_head paused_calls; /* queued calls while fix_open happens */ + + /* used for delayed-post-op optimization */ + pthread_mutex_t delay_lock; + gf_timer_t *delay_timer; + call_frame_t *delay_frame; } afr_fd_ctx_t; -- cgit