From 73f586a007dcffc9692f5f6be3a512a31c4a31ce Mon Sep 17 00:00:00 2001 From: Xavi Hernandez Date: Wed, 31 Oct 2018 12:26:43 +0100 Subject: cluster/ec: prevent infinite loop in self-heal full There was a problem in commit 7f81067 that caused infinite loop when full heal was triggered. The previous commit was made to prevent self-heal to go idle after a replace brick operation. One of the changes consisted on setting a flag to force an immediate scan of the dirty directory if a heal on a directory succeeded (assuming it could have generated newer entries). However that change was causing an issue with a full self-heal, since every time an already healed directory was checked and it returned suceessfully, it was also setting the flag, forcing self-heal to start over again. This patch fixes this issue by only setting the flag if the heal is not full. It's assumed that a full self-heal will already traverse all entries automatically, so there's no need to force a new scan later. >Change-Id: Id12dbfc04e622b18183e796cc6cc87ccc30a6d55 >fixes: bz#1636631 >Signed-off-by: Xavi Hernandez (cherry picked from commit 7150c51ad75ccba22045a35fc31e5037612d1ad4) Change-Id: Id12dbfc04e622b18183e796cc6cc87ccc30a6d55 fixes: bz#1651525 Signed-off-by: Xavi Hernandez --- xlators/cluster/ec/src/ec-heald.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c index cc1062dd353..130790c66ac 100644 --- a/xlators/cluster/ec/src/ec-heald.c +++ b/xlators/cluster/ec/src/ec-heald.c @@ -153,12 +153,13 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name) } int -ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc) +ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc, + gf_boolean_t full) { int32_t ret; ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL); - if ((ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) { + if (!full && (ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) { /* If we have just healed a directory, it's possible that * other index entries have appeared to be healed. We put a * mark so that we can check it later and restart a scan @@ -202,7 +203,7 @@ ec_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, if (ret < 0) goto out; - ec_shd_selfheal(healer, healer->subvol, &loc); + ec_shd_selfheal(healer, healer->subvol, &loc, _gf_false); out: if (ret == -ENOENT || ret == -ESTALE) { gf_msg(healer->this->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL, @@ -290,7 +291,7 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, if (ret < 0) goto out; - ec_shd_selfheal(healer, healer->subvol, &loc); + ec_shd_selfheal(healer, healer->subvol, &loc, _gf_true); ret = 0; @@ -367,7 +368,7 @@ ec_shd_full_healer(void *data) "starting full sweep on subvol %s", ec_subvol_name(this, healer->subvol)); - ec_shd_selfheal(healer, healer->subvol, &rootloc); + ec_shd_selfheal(healer, healer->subvol, &rootloc, _gf_true); ec_shd_full_sweep(healer, this->itable->root); } -- cgit