diff options
author | Ravishankar N <ravishankar@redhat.com> | 2017-09-14 11:29:15 +0530 |
---|---|---|
committer | Ravishankar N <ravishankar@redhat.com> | 2017-09-26 04:04:18 +0000 |
commit | 1719cffa911c5287715abfdb991bc8862f0c994e (patch) | |
tree | 3d524fab0d31fe7b3512fab1e4e6fa08b1f29ff8 /xlators/cluster/afr | |
parent | 898f0b7ce31ddf8ec02e572c5d22eff2e4205b4c (diff) |
afr: auto-resolve split-brains for zero-byte files
Problems:
As described in BZ 1491670, renaming hardlinks can result in data/mdata
split-brain of the DHT link-to files (T files) without any mismatch of
data and metadata.
As described in BZ 1486063, for a zero-byte file with only dirty bits
set, arbiter brick will likely be chosen as the source brick.
Fix:
For zero byte files in split-brain, pick first brick as
a) data source if file size is zero on all bricks.
b) metadata source if metadata is the same on all bricks
In arbiter case, if file size is zero on all bricks and there are no
pending afr xattrs, pick 1st brick as data source.
Change-Id: I0270a9a2f97c3b21087e280bb890159b43975e04
BUG: 1491670
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reported-by: Rahul Hinduja <rhinduja@redhat.com>
Reported-by: Mabi <mabi@protonmail.ch>
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 65 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 6 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 7 |
3 files changed, 78 insertions, 0 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 998289711df..6a159dc67d0 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1172,6 +1172,65 @@ afr_mark_split_brain_source_sinks_by_policy (call_frame_t *frame, return fav_child; } +int +afr_mark_source_sinks_if_file_empty (xlator_t *this, unsigned char *sources, + unsigned char *sinks, + unsigned char *healed_sinks, + unsigned char *locked_on, + struct afr_reply *replies, + afr_transaction_type type) +{ + int source = -1; + int i = 0; + afr_private_t *priv = this->private; + struct iatt stbuf = {0, }; + + if ((AFR_COUNT (locked_on, priv->child_count) < priv->child_count) || + (afr_success_count(replies, priv->child_count) < priv->child_count)) + return -1; + + for (i = 0; i < priv->child_count; i++) { + if (replies[i].poststat.ia_size != 0) + return -1; + } + + if (type == AFR_DATA_TRANSACTION) + goto mark; + + /*For AFR_METADATA_TRANSACTION, metadata must be same on all bricks.*/ + stbuf = replies[0].poststat; + for (i = 1; i < priv->child_count; i++) { + if ((!IA_EQUAL (stbuf, replies[i].poststat, type)) || + (!IA_EQUAL (stbuf, replies[i].poststat, uid)) || + (!IA_EQUAL (stbuf, replies[i].poststat, gid)) || + (!IA_EQUAL (stbuf, replies[i].poststat, prot))) + return -1; + } + for (i = 1; i < priv->child_count; i++) { + if (!afr_xattrs_are_equal (replies[0].xdata, + replies[i].xdata)) + return -1; + } + +mark: + /* All bricks have a zero-byte file. Pick one of them as source. Rest + * are sinks.*/ + for (i = 0 ; i < priv->child_count; i++) { + if (source == -1) { + source = i; + sources[i] = 1; + sinks[i] = 0; + healed_sinks[i] = 0; + continue; + } + sources[i] = 0; + sinks[i] = 1; + healed_sinks[i] = 1; + } + + return source; +} + /* Return a source depending on the type of heal_op, and set sources[source], * sinks[source] and healed_sinks[source] to 1, 0 and 0 respectively. Do so * only if the following condition is met: @@ -1200,6 +1259,12 @@ afr_mark_split_brain_source_sinks (call_frame_t *frame, xlator_t *this, priv = this->private; xdata_req = local->xdata_req; + source = afr_mark_source_sinks_if_file_empty (this, sources, sinks, + healed_sinks, locked_on, + replies, type); + if (source >= 0) + return source; + ret = dict_get_int32 (xdata_req, "heal-op", &heal_op); if (ret) goto autoheal; diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 13679608dfd..2c254e80aa1 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -581,6 +581,12 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this, afr_mark_largest_file_as_source (this, sources, replies); afr_mark_biggest_witness_as_source (this, sources, witness); afr_mark_newest_file_as_source (this, sources, replies); + if (priv->arbiter_count) + /* Choose non-arbiter brick as source for empty files. */ + afr_mark_source_sinks_if_file_empty (this, sources, sinks, + healed_sinks, locked_on, + replies, + AFR_DATA_TRANSACTION); out: afr_mark_active_sinks (this, sources, locked_on, healed_sinks); diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index de2970e6c5a..b54da1facfd 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -341,5 +341,12 @@ afr_gfid_split_brain_source (xlator_t *this, struct afr_reply *replies, inode_t *inode, uuid_t pargfid, const char *bname, int src_idx, int child_idx, unsigned char *locked_on, int *src, dict_t *xdata); +int +afr_mark_source_sinks_if_file_empty (xlator_t *this, unsigned char *sources, + unsigned char *sinks, + unsigned char *healed_sinks, + unsigned char *locked_on, + struct afr_reply *replies, + afr_transaction_type type); #endif /* !_AFR_SELFHEAL_H */ |