From 6fb49f18a9bbfd1266b4773e757e459519c6719c Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Tue, 9 Nov 2010 05:27:02 +0000 Subject: replicate: optimistic changelog The standard way of maintaining changelog in replicate has been to write out pending flags and to unset the pending flag post the actual operation. This new optimization kicks in only when all subvolumes are up. The optimization is that, during pre-op, no changelog is written for METADATA and ENTRY/RENAME operations. If during the operation nothing failed, no changelog is updated in post-op either. If however, something does fail during an operation, then, pending flags get written during post op pointing only towards the failed nodes. DATA transactions continue to work the way they are. If one subvolume is down, pending flags are written in pre-op changelog itself as before. The impact of this optimization is only in the case when both servers die or the client dies while the 'FOP' stage of the transaction is in progress. By nature of METADATA and ENTRY operations, detecting a mismatch later is not dependent on the presence of changelog. Changelog only determines the direction in which self-heal happens for these types of transactions. For the direction too this optimization does not have a major impact because in the cases of failure (both servers dieing or client dieing) the final state (direction of self-heal) would be arbitrary anyways as the syscall wouldn't have completed. Signed-off-by: Anand V. Avati Signed-off-by: Anand V. Avati BUG: 2068 (performance enhancements) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2068 --- xlators/cluster/afr/src/afr.c | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'xlators/cluster/afr/src/afr.c') diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 775a53a8f..cb4582505 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -71,6 +71,7 @@ validate_options (xlator_t *this, dict_t *options, char **op_errstr) gf_boolean_t metadata_change_log; gf_boolean_t entry_change_log; gf_boolean_t strict_readdir; + gf_boolean_t optimistic_change_log; xlator_list_t * trav = NULL; @@ -257,6 +258,26 @@ validate_options (xlator_t *this, dict_t *options, char **op_errstr) "change-log %s'.", change_log); } + + dict_ret = dict_get_str (options, "optimistic-change-log", + &change_log); + if (dict_ret == 0) { + temp_ret = gf_string2boolean (change_log, &optimistic_change_log); + if (temp_ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "Validation faled for optimistic-change-log"); + *op_errstr = gf_strdup ("Error, option should be boolean"); + ret = -1; + goto out; + } + + + gf_log (this->name, GF_LOG_DEBUG, + "Validated 'option optimistic-" + "change-log %s'.", change_log); + } + + read_ret = dict_get_str (options, "read-subvolume", &read_subvol); if (read_ret) @@ -674,6 +695,7 @@ init (xlator_t *this) priv->data_change_log = 1; priv->metadata_change_log = 1; priv->entry_change_log = 1; + priv->optimistic_change_log = 1; dict_ret = dict_get_str (this->options, "data-change-log", &change_log); @@ -715,6 +737,19 @@ init (xlator_t *this) } } + dict_ret = dict_get_str (this->options, "optimistic-change-log", + &change_log); + if (dict_ret == 0) { + ret = gf_string2boolean (change_log, &priv->optimistic_change_log); + if (ret < 0) { + gf_log (this->name, GF_LOG_WARNING, + "Invalid 'option optimistic-change-log %s'. " + "Defaulting to optimistic-change-log as 'on'.", + change_log); + priv->optimistic_change_log = 1; + } + } + /* Locking options */ priv->inodelk_trace = 0; @@ -994,6 +1029,9 @@ struct volume_options options[] = { { .key = {"entry-change-log"}, .type = GF_OPTION_TYPE_BOOL }, + { .key = {"optimistic-change-log"}, + .type = GF_OPTION_TYPE_BOOL + }, { .key = {"data-lock-server-count"}, .type = GF_OPTION_TYPE_INT, .min = 0 -- cgit