summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnand Avati <avati@redhat.com>2012-06-26 21:06:39 -0700
committerVijay Bellur <vbellur@redhat.com>2012-12-12 00:13:47 -0500
commitf276480174b7c62fbcf914e330563de063463634 (patch)
tree4eab20c5a0d8de3300998a236ada4c6721ea0f9d
parentc067bd3e1bcf645731d8c3dfffce310fb7a3e813 (diff)
cluster/afr: post-op-delay support
post-op-delay introduces an artificial delay between the OP and POST-OP-CHANGELOG phases of a write transaction to increase the probability of changelog-piggyback and eager-locking to work more efficiently. Change-Id: I865ca4b68512c44818719c7e388952f15d53e6c2 BUG: 836033 Signed-off-by: Anand Avati <avati@redhat.com> Signed-off-by: Pranith Kumar K <pranithk@gluster.com> Reviewed-on: https://code.engineering.redhat.com/gerrit/1858 Reviewed-by: Vijay Bellur <vbellur@redhat.com> Tested-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r--xlators/cluster/afr/src/afr-common.c3
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c2
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c147
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h2
-rw-r--r--xlators/cluster/afr/src/afr.c13
-rw-r--r--xlators/cluster/afr/src/afr.h7
6 files changed, 173 insertions, 1 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index d3e51822fe3..98aba9fd492 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -2278,6 +2278,7 @@ __afr_fd_ctx_set (xlator_t *this, fd_t *fd)
goto out;
}
+ pthread_mutex_init (&fd_ctx->delay_lock, NULL);
INIT_LIST_HEAD (&fd_ctx->paused_calls);
INIT_LIST_HEAD (&fd_ctx->entries);
@@ -2532,6 +2533,8 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
if (fd_ctx->lock_acquired)
GF_FREE (fd_ctx->lock_acquired);
+ pthread_mutex_destroy (&fd_ctx->delay_lock);
+
GF_FREE (fd_ctx);
}
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 555a9b9faf6..5e61be4d4df 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -1477,6 +1477,8 @@ afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
piggyback = 0;
local->transaction.eager_lock[i] = 1;
+ afr_set_delayed_post_op (frame, this);
+
LOCK (&local->fd->lock);
{
if (fd_ctx->lock_acquired[i]) {
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 92f3dd65de8..65dea16df42 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -11,6 +11,7 @@
#include "dict.h"
#include "byte-order.h"
#include "common-utils.h"
+#include "timer.h"
#include "afr.h"
#include "afr-transaction.h"
@@ -492,7 +493,7 @@ afr_changelog_post_op_call_count (afr_transaction_type type,
}
int
-afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
+afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)
{
afr_private_t * priv = this->private;
afr_internal_lock_t *int_lock = NULL;
@@ -862,6 +863,8 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
}
UNLOCK (&local->fd->lock);
+ afr_set_delayed_post_op (frame, this);
+
if (piggyback)
afr_changelog_pre_op_cbk (frame, (void *)(long)i,
this, 1, 0, xattr[i],
@@ -1227,16 +1230,158 @@ afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
}
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ /* call this function from any of the related optimizations
+ which benefit from delaying post op are enabled, namely:
+
+ - changelog piggybacking
+ - eager locking
+ */
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ if (!priv->post_op_delay_secs)
+ return;
+
+ local = frame->local;
+ if (!local)
+ return;
+
+ if (!local->fd)
+ return;
+
+ if (local->op == GF_FOP_WRITE)
+ local->delayed_post_op = _gf_true;
+}
+
+
+gf_boolean_t
+is_afr_delayed_changelog_post_op_needed (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ gf_boolean_t res = _gf_false;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (!local->delayed_post_op)
+ goto out;
+
+ res = _gf_true;
+out:
+ return res;
+}
+
+
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd);
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd);
+
+void
+afr_delayed_changelog_wake_up_cbk (void *data)
+{
+ fd_t *fd = NULL;
+
+ fd = data;
+
+ afr_delayed_changelog_wake_up (THIS, fd);
+}
+
+
+void
+afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+ call_frame_t *prev_frame = NULL;
+ struct timeval delta = {0, };
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ fd_ctx = afr_fd_ctx_get (fd, this);
+ if (!fd_ctx)
+ return;
+
+ delta.tv_sec = priv->post_op_delay_secs;
+ delta.tv_usec = 0;
+
+ pthread_mutex_lock (&fd_ctx->delay_lock);
+ {
+ prev_frame = fd_ctx->delay_frame;
+ fd_ctx->delay_frame = NULL;
+ if (fd_ctx->delay_timer)
+ gf_timer_call_cancel (this->ctx, fd_ctx->delay_timer);
+ fd_ctx->delay_timer = NULL;
+ if (!frame)
+ goto unlock;
+ fd_ctx->delay_timer = gf_timer_call_after (this->ctx, delta,
+ afr_delayed_changelog_wake_up_cbk,
+ fd);
+ fd_ctx->delay_frame = frame;
+ }
+unlock:
+ pthread_mutex_unlock (&fd_ctx->delay_lock);
+
+ if (prev_frame) {
+ afr_changelog_post_op_now (prev_frame, this);
+ }
+}
+
+
+void
+afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (is_afr_delayed_changelog_post_op_needed (frame, this))
+ afr_delayed_changelog_post_op (this, frame, local->fd);
+ else
+ afr_changelog_post_op_now (frame, this);
+}
+
+
+void
+afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)
+{
+ afr_delayed_changelog_post_op (this, NULL, fd);
+}
+
+
int
afr_transaction_resume (call_frame_t *frame, xlator_t *this)
{
afr_internal_lock_t *int_lock = NULL;
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ fd_t *fd = NULL;
local = frame->local;
int_lock = &local->internal_lock;
priv = this->private;
+ fd = local->fd;
+
+ if (fd)
+ /* The wake up needs to happen independent of
+ what type of fop arrives here. If it was
+ a write, then it has already inherited the
+ lock and changelog. If it was not a write,
+ then the presumption of the optimization (of
+ optimizing for successive write operations)
+ fails.
+ */
+ afr_delayed_changelog_wake_up (this, fd);
afr_restore_lk_owner (frame);
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index 965ee67374c..69a8c1b1659 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -25,4 +25,6 @@ afr_fd_ctx_t *
afr_fd_ctx_get (fd_t *fd, xlator_t *this);
int
afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending);
+void
+afr_set_delayed_post_op (call_frame_t *frame, xlator_t *this);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 3f102f80885..51c51e56a72 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -165,6 +165,8 @@ reconfigure (xlator_t *this, dict_t *options)
uint32, out);
fix_quorum_options(this,priv,qtype);
+ GF_OPTION_RECONF ("post-op-delay-secs", priv->post_op_delay_secs, options,
+ uint32, out);
ret = 0;
out:
return ret;
@@ -291,6 +293,8 @@ init (xlator_t *this)
GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);
fix_quorum_options(this,priv,qtype);
+ GF_OPTION_INIT ("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
+
priv->wait_count = 1;
priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
@@ -596,5 +600,14 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_STR,
.description = "Local glusterd uuid string",
},
+ { .key = {"post-op-delay-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "1",
+ .description = "Time interval induced artificially before "
+ "post-operation phase of the transaction to "
+ "enhance overlap of adjacent write operations.",
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 256e93ad213..78d108db6d8 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -149,6 +149,7 @@ typedef struct _afr_private {
struct list_head saved_fds; /* list of fds on which locks have succeeded */
gf_boolean_t optimistic_change_log;
gf_boolean_t eager_lock;
+ uint32_t post_op_delay_secs;
unsigned int quorum_count;
char vol_uuid[UUID_SIZE + 1];
@@ -408,6 +409,7 @@ typedef struct _afr_local {
dict_t *dict;
int optimistic_change_log;
+ gf_boolean_t delayed_post_op;
gf_boolean_t fop_paused;
int (*fop_call_continue) (call_frame_t *frame, xlator_t *this);
@@ -733,6 +735,11 @@ typedef struct {
unsigned char *locked_on; /* which subvolumes locks have been successful */
struct list_head paused_calls; /* queued calls while fix_open happens */
+
+ /* used for delayed-post-op optimization */
+ pthread_mutex_t delay_lock;
+ gf_timer_t *delay_timer;
+ call_frame_t *delay_frame;
} afr_fd_ctx_t;