diff options
author | Ravishankar N <ravishankar@redhat.com> | 2020-02-11 14:34:48 +0530 |
---|---|---|
committer | hari gowtham <hari.gowtham005@gmail.com> | 2020-02-28 06:06:10 +0000 |
commit | 559fd060c59edec69ba66be7e0a447c8e0408d51 (patch) | |
tree | 7108161c3fc0f1c8b9467ffffb7c1fe6ac77354f /xlators | |
parent | 922c41d2d001df4d447280620bec6a2c4cf63357 (diff) |
afr: prevent spurious entry heals leading to gfid split-brain
Problem:
In a hyperconverged setup with granular-entry-heal enabled, if a file is
recreated while one of the bricks is down, and an index heal is triggered
(with the brick still down), entry-self heal was doing a spurious heal
with just the 2 good bricks. It was doing a post-op leading to removal
of the filename from .glusterfs/indices/entry-changes as well as
erroneous setting of afr xattrs on the parent. When the brick came up,
the xattrs were cleared, resulting in the renamed file not getting
healed and leading to gfid split-brain and EIO on the mount.
Fix:
Proceed with entry heal only when shd can connect to all bricks of the replica,
just like in data and metadata heal.
fixes: bz#1804594
Change-Id: I916ae26ad1fabf259bc6362da52d433b7223b17e
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
(cherry picked from commit 06453d77d056fbaa393a137ca277a20e38d2f67e)
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 4 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 8 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 6 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-name.c | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 2 |
5 files changed, 7 insertions, 15 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index a6fca0d5538..de4c306a0f4 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -6651,7 +6651,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque) ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0, locked_on); { - if (ret < AFR_SH_MIN_PARTICIPANTS) + if (ret < priv->child_count) goto data_unlock; ret = __afr_selfheal_data_prepare( heal_frame, this, inode, locked_on, sources, sinks, @@ -6668,7 +6668,7 @@ afr_fav_child_reset_sink_xattrs(void *opaque) ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, LLONG_MAX - 1, 0, locked_on); { - if (ret < AFR_SH_MIN_PARTICIPANTS) + if (ret < priv->child_count) goto mdata_unlock; ret = __afr_selfheal_metadata_prepare( heal_frame, this, inode, locked_on, sources, sinks, diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 8d7699b227b..8b28f5368f9 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1567,7 +1567,6 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, char *accused = NULL; /* Accused others without any self-accusal */ char *pending = NULL; /* Have pending operations on others */ char *self_accused = NULL; /* Accused itself */ - int min_participants = -1; priv = this->private; @@ -1591,12 +1590,7 @@ afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this, } } - if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION) { - min_participants = priv->child_count; - } else { - min_participants = AFR_SH_MIN_PARTICIPANTS; - } - if (afr_success_count(replies, priv->child_count) < min_participants) { + if (afr_success_count(replies, priv->child_count) < priv->child_count) { /* Treat this just like locks not being acquired */ return -ENOTCONN; } diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 810eeb05f9a..5475fca6342 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -584,7 +584,7 @@ afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd, ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, locked_on); { - if (ret < AFR_SH_MIN_PARTICIPANTS) { + if (ret < priv->child_count) { gf_msg_debug(this->name, 0, "%s: Skipping " "entry self-heal as only %d sub-volumes " @@ -973,7 +973,7 @@ __afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd, ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL, data_lock); { - if (ret < AFR_SH_MIN_PARTICIPANTS) { + if (ret < priv->child_count) { gf_msg_debug(this->name, 0, "%s: Skipping " "entry self-heal as only %d sub-volumes could " @@ -1097,7 +1097,7 @@ afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode) ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain, NULL, locked_on); { - if (ret < AFR_SH_MIN_PARTICIPANTS) { + if (ret < priv->child_count) { gf_msg_debug(this->name, 0, "%s: Skipping " "entry self-heal as only %d sub-volumes could " diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c index 36640b5456b..7d4f2080ec3 100644 --- a/xlators/cluster/afr/src/afr-self-heal-name.c +++ b/xlators/cluster/afr/src/afr-self-heal-name.c @@ -514,7 +514,7 @@ afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent, ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname, locked_on); { - if (ret < AFR_SH_MIN_PARTICIPANTS) { + if (ret < priv->child_count) { ret = -ENOTCONN; goto unlock; } diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 703f80e05cb..ed078bb0271 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -11,8 +11,6 @@ #ifndef _AFR_SELFHEAL_H #define _AFR_SELFHEAL_H -#define AFR_SH_MIN_PARTICIPANTS 2 - /* Perform fop on all UP subvolumes and wait for all callbacks to return */ #define AFR_ONALL(frame, rfn, fop, args...) \ |