diff options
author | Anand Avati <avati@gluster.com> | 2010-11-09 05:27:02 +0000 |
---|---|---|
committer | Anand V. Avati <avati@dev.gluster.com> | 2010-11-09 03:07:07 -0800 |
commit | 6fb49f18a9bbfd1266b4773e757e459519c6719c (patch) | |
tree | fff8ff41717114ead7a7e2b848e83058d6d8b15a /xlators/cluster/afr/src | |
parent | 667c5e22467cbecd371bfc052e7f65b6b6b41e2d (diff) |
replicate: optimistic changelog
The standard way of maintaining changelog in replicate has been to
write out pending flags and to unset the pending flag post the
actual operation.
This new optimization kicks in only when all subvolumes are up.
The optimization is that, during pre-op, no changelog is written for
METADATA and ENTRY/RENAME operations. If during the operation nothing
failed, no changelog is updated in post-op either. If however,
something does fail during an operation, then, pending flags get
written during post op pointing only towards the failed nodes.
DATA transactions continue to work the way they are.
If one subvolume is down, pending flags are written in pre-op changelog
itself as before.
The impact of this optimization is only in the case when both servers
die or the client dies while the 'FOP' stage of the transaction is
in progress. By nature of METADATA and ENTRY operations, detecting a
mismatch later is not dependent on the presence of changelog. Changelog
only determines the direction in which self-heal happens for these types
of transactions. For the direction too this optimization does not have
a major impact because in the cases of failure (both servers dieing or
client dieing) the final state (direction of self-heal) would be
arbitrary anyways as the syscall wouldn't have completed.
Signed-off-by: Anand V. Avati <avati@blackhole.gluster.com>
Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
BUG: 2068 (performance enhancements)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2068
Diffstat (limited to 'xlators/cluster/afr/src')
-rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 68 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 38 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 8 |
3 files changed, 101 insertions, 13 deletions
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index ff9c88bad..d48d6eb72 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -513,6 +513,14 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this) } } + index = afr_index_for_transaction_type (local->transaction.type); + if (local->optimistic_change_log && + local->transaction.type != AFR_DATA_TRANSACTION) { + /* if nothing_failed, then local->pending[..] == {0 .. 0} */ + for (i = 0; i < priv->child_count; i++) + local->pending[i][index]++; + } + for (i = 0; i < priv->child_count; i++) { if (!local->child_up[i]) continue; @@ -568,6 +576,12 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this) break; case AFR_METADATA_TRANSACTION: { + if (nothing_failed) { + afr_changelog_post_op_cbk (frame, (void *)(long)i, + this, 1, 0, xattr[i]); + break; + } + if (local->fd) STACK_WIND (frame, afr_changelog_post_op_cbk, priv->children[i], @@ -585,12 +599,17 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this) case AFR_ENTRY_RENAME_TRANSACTION: { - STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->xattrop, - &local->transaction.new_parent_loc, - GF_XATTROP_ADD_ARRAY, xattr[i]); + if (nothing_failed) { + afr_changelog_post_op_cbk (frame, (void *)(long)i, + this, 1, 0, xattr[i]); + } else { + STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->xattrop, + &local->transaction.new_parent_loc, + GF_XATTROP_ADD_ARRAY, xattr[i]); + } call_count--; } @@ -613,6 +632,12 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this) case AFR_ENTRY_TRANSACTION: { + if (nothing_failed) { + afr_changelog_post_op_cbk (frame, (void *)(long)i, + this, 1, 0, xattr[i]); + break; + } + if (local->fd) STACK_WIND (frame, afr_changelog_post_op_cbk, priv->children[i], @@ -808,6 +833,12 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) break; case AFR_METADATA_TRANSACTION: { + if (local->optimistic_change_log) { + afr_changelog_pre_op_cbk (frame, (void *)(long)i, + this, 1, 0, xattr[i]); + break; + } + if (local->fd) STACK_WIND_COOKIE (frame, afr_changelog_pre_op_cbk, @@ -829,13 +860,18 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) case AFR_ENTRY_RENAME_TRANSACTION: { - STACK_WIND_COOKIE (frame, - afr_changelog_pre_op_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->xattrop, - &local->transaction.new_parent_loc, - GF_XATTROP_ADD_ARRAY, xattr[i]); + if (local->optimistic_change_log) { + afr_changelog_pre_op_cbk (frame, (void *)(long)i, + this, 1, 0, xattr[i]); + } else { + STACK_WIND_COOKIE (frame, + afr_changelog_pre_op_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->xattrop, + &local->transaction.new_parent_loc, + GF_XATTROP_ADD_ARRAY, xattr[i]); + } call_count--; } @@ -860,6 +896,12 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) case AFR_ENTRY_TRANSACTION: { + if (local->optimistic_change_log) { + afr_changelog_pre_op_cbk (frame, (void *)(long)i, + this, 1, 0, xattr[i]); + break; + } + if (local->fd) STACK_WIND_COOKIE (frame, afr_changelog_pre_op_cbk, diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 775a53a8f..cb4582505 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -71,6 +71,7 @@ validate_options (xlator_t *this, dict_t *options, char **op_errstr) gf_boolean_t metadata_change_log; gf_boolean_t entry_change_log; gf_boolean_t strict_readdir; + gf_boolean_t optimistic_change_log; xlator_list_t * trav = NULL; @@ -257,6 +258,26 @@ validate_options (xlator_t *this, dict_t *options, char **op_errstr) "change-log %s'.", change_log); } + + dict_ret = dict_get_str (options, "optimistic-change-log", + &change_log); + if (dict_ret == 0) { + temp_ret = gf_string2boolean (change_log, &optimistic_change_log); + if (temp_ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "Validation faled for optimistic-change-log"); + *op_errstr = gf_strdup ("Error, option should be boolean"); + ret = -1; + goto out; + } + + + gf_log (this->name, GF_LOG_DEBUG, + "Validated 'option optimistic-" + "change-log %s'.", change_log); + } + + read_ret = dict_get_str (options, "read-subvolume", &read_subvol); if (read_ret) @@ -674,6 +695,7 @@ init (xlator_t *this) priv->data_change_log = 1; priv->metadata_change_log = 1; priv->entry_change_log = 1; + priv->optimistic_change_log = 1; dict_ret = dict_get_str (this->options, "data-change-log", &change_log); @@ -715,6 +737,19 @@ init (xlator_t *this) } } + dict_ret = dict_get_str (this->options, "optimistic-change-log", + &change_log); + if (dict_ret == 0) { + ret = gf_string2boolean (change_log, &priv->optimistic_change_log); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "Invalid 'option optimistic-change-log %s'. " + "Defaulting to optimistic-change-log as 'on'.", + change_log); + priv->optimistic_change_log = 1; + } + } + /* Locking options */ priv->inodelk_trace = 0; @@ -994,6 +1029,9 @@ struct volume_options options[] = { { .key = {"entry-change-log"}, .type = GF_OPTION_TYPE_BOOL }, + { .key = {"optimistic-change-log"}, + .type = GF_OPTION_TYPE_BOOL + }, { .key = {"data-lock-server-count"}, .type = GF_OPTION_TYPE_INT, .min = 0 diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 758ac789a..a7359f269 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -88,6 +88,7 @@ typedef struct _afr_private { pthread_mutex_t mutex; struct list_head saved_fds; /* list of fds on which locks have succeeded */ + gf_boolean_t optimistic_change_log; } afr_private_t; typedef struct { @@ -312,6 +313,7 @@ typedef struct _afr_local { int32_t lock_recovery_child; dict_t *dict; + int optimistic_change_log; int (*openfd_flush_cbk) (call_frame_t *frame, xlator_t *this); @@ -805,6 +807,8 @@ AFR_BASENAME (const char *str) static inline int AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv) { + int child_up_count = 0; + local->child_up = GF_CALLOC (sizeof (*local->child_up), priv->child_count, gf_afr_mt_char); @@ -815,6 +819,10 @@ AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv) memcpy (local->child_up, priv->child_up, sizeof (*local->child_up) * priv->child_count); + child_up_count = afr_up_children_count (priv->child_count, local->child_up); + + if (priv->optimistic_change_log && child_up_count == priv->child_count) + local->optimistic_change_log = 1; local->call_count = afr_up_children_count (priv->child_count, local->child_up); if (local->call_count == 0) |