diff options
author | Ravishankar N <ravishankar@redhat.com> | 2017-09-27 10:32:36 +0530 |
---|---|---|
committer | Shyamsundar Ranganathan <srangana@redhat.com> | 2017-10-02 12:34:23 +0000 |
commit | f5998f07dfd21d06a4119416ca79db50232b50d4 (patch) | |
tree | b72521e7fd07cb7cec55029ad7bfbae9a12c62f2 /xlators | |
parent | 5eab919dee035b9cf1b7f060bcf2d9eaa1e92eb3 (diff) |
afr: auto-resolve split-brains for zero-byte files
Backport of https://review.gluster.org/#/c/18283/
Problems:
As described in BZ 1491670, renaming hardlinks can result in data/mdata
split-brain of the DHT link-to files (T files) without any mismatch of
data and metadata.
As described in BZ 1486063, for a zero-byte file with only dirty bits
set, arbiter brick will likely be chosen as the source brick.
Fix:
For zero byte files in split-brain, pick first brick as
a) data source if file size is zero on all bricks.
b) metadata source if metadata is the same on all bricks
In arbiter case, if file size is zero on all bricks and there are no
pending afr xattrs, pick 1st brick as data source.
Change-Id: I0270a9a2f97c3b21087e280bb890159b43975e04
BUG: 1496321
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reported-by: Rahul Hinduja <rhinduja@redhat.com>
Reported-by: Mabi <mabi@protonmail.ch>
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 65 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 6 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 8 |
3 files changed, 79 insertions, 0 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 484b7dca54e..0d08bee861e 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -922,6 +922,65 @@ afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame, return fav_child; } +int +afr_mark_source_sinks_if_file_empty (xlator_t *this, unsigned char *sources, + unsigned char *sinks, + unsigned char *healed_sinks, + unsigned char *locked_on, + struct afr_reply *replies, + afr_transaction_type type) +{ + int source = -1; + int i = 0; + afr_private_t *priv = this->private; + struct iatt stbuf = {0, }; + + if ((AFR_COUNT (locked_on, priv->child_count) < priv->child_count) || + (afr_success_count(replies, priv->child_count) < priv->child_count)) + return -1; + + for (i = 0; i < priv->child_count; i++) { + if (replies[i].poststat.ia_size != 0) + return -1; + } + + if (type == AFR_DATA_TRANSACTION) + goto mark; + + /*For AFR_METADATA_TRANSACTION, metadata must be same on all bricks.*/ + stbuf = replies[0].poststat; + for (i = 1; i < priv->child_count; i++) { + if ((!IA_EQUAL (stbuf, replies[i].poststat, type)) || + (!IA_EQUAL (stbuf, replies[i].poststat, uid)) || + (!IA_EQUAL (stbuf, replies[i].poststat, gid)) || + (!IA_EQUAL (stbuf, replies[i].poststat, prot))) + return -1; + } + for (i = 1; i < priv->child_count; i++) { + if (!afr_xattrs_are_equal (replies[0].xdata, + replies[i].xdata)) + return -1; + } + +mark: + /* All bricks have a zero-byte file. Pick one of them as source. Rest + * are sinks.*/ + for (i = 0 ; i < priv->child_count; i++) { + if (source == -1) { + source = i; + sources[i] = 1; + sinks[i] = 0; + healed_sinks[i] = 0; + continue; + } + sources[i] = 0; + sinks[i] = 1; + healed_sinks[i] = 1; + } + + return source; +} + /* Return a source depending on the type of heal_op, and set sources[source], * sinks[source] and healed_sinks[source] to 1, 0 and 0 respectively. Do so * only if the following condition is met: @@ -950,6 +1009,12 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this, priv = this->private; xdata_req = local->xdata_req; + source = afr_mark_source_sinks_if_file_empty (this, sources, sinks, + healed_sinks, locked_on, + replies, type); + if (source >= 0) + return source; + ret = dict_get_int32 (xdata_req, "heal-op", &heal_op); if (ret) goto autoheal; diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 82c9ceb5c1f..e51add6ce5c 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -579,6 +579,12 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this, afr_mark_largest_file_as_source (this, sources, replies); afr_mark_biggest_witness_as_source (this, sources, witness); afr_mark_newest_file_as_source (this, sources, replies); + if (priv->arbiter_count) + /* Choose non-arbiter brick as source for empty files. */ + afr_mark_source_sinks_if_file_empty (this, sources, sinks, + healed_sinks, locked_on, + replies, + AFR_DATA_TRANSACTION); out: afr_mark_active_sinks (this, sources, locked_on, healed_sinks); diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index ded4903a4c7..92364d26735 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -317,4 +317,12 @@ afr_choose_source_by_policy (afr_private_t *priv, unsigned char *sources, int afr_selfheal_metadata_by_stbuf (xlator_t *this, struct iatt *stbuf); + +int +afr_mark_source_sinks_if_file_empty (xlator_t *this, unsigned char *sources, + unsigned char *sinks, + unsigned char *healed_sinks, + unsigned char *locked_on, + struct afr_reply *replies, + afr_transaction_type type); #endif /* !_AFR_SELFHEAL_H */ |