From 73f586a007dcffc9692f5f6be3a512a31c4a31ce Mon Sep 17 00:00:00 2001
From: Xavi Hernandez <xhernandez@redhat.com>
Date: Wed, 31 Oct 2018 12:26:43 +0100
Subject: cluster/ec: prevent infinite loop in self-heal full

There was a problem in commit 7f81067 that caused infinite loop when
full heal was triggered.

The previous commit was made to prevent self-heal to go idle after a
replace brick operation. One of the changes consisted on setting a
flag to force an immediate scan of the dirty directory if a heal on
a directory succeeded (assuming it could have generated newer entries).

However that change was causing an issue with a full self-heal, since
every time an already healed directory was checked and it returned
suceessfully, it was also setting the flag, forcing self-heal to start
over again.

This patch fixes this issue by only setting the flag if the heal is not
full. It's assumed that a full self-heal will already traverse all
entries automatically, so there's no need to force a new scan later.

>Change-Id: Id12dbfc04e622b18183e796cc6cc87ccc30a6d55
>fixes: bz#1636631
>Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
(cherry picked from commit 7150c51ad75ccba22045a35fc31e5037612d1ad4)

Change-Id: Id12dbfc04e622b18183e796cc6cc87ccc30a6d55
fixes: bz#1651525
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
---
 xlators/cluster/ec/src/ec-heald.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index cc1062dd353..130790c66ac 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -153,12 +153,13 @@ ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name)
 }
 
 int
-ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc)
+ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
+                gf_boolean_t full)
 {
     int32_t ret;
 
     ret = syncop_getxattr(healer->this, loc, NULL, EC_XATTR_HEAL, NULL, NULL);
-    if ((ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) {
+    if (!full && (ret >= 0) && (loc->inode->ia_type == IA_IFDIR)) {
         /* If we have just healed a directory, it's possible that
          * other index entries have appeared to be healed. We put a
          * mark so that we can check it later and restart a scan
@@ -202,7 +203,7 @@ ec_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
     if (ret < 0)
         goto out;
 
-    ec_shd_selfheal(healer, healer->subvol, &loc);
+    ec_shd_selfheal(healer, healer->subvol, &loc, _gf_false);
 out:
     if (ret == -ENOENT || ret == -ESTALE) {
         gf_msg(healer->this->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
@@ -290,7 +291,7 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
     if (ret < 0)
         goto out;
 
-    ec_shd_selfheal(healer, healer->subvol, &loc);
+    ec_shd_selfheal(healer, healer->subvol, &loc, _gf_true);
 
     ret = 0;
 
@@ -367,7 +368,7 @@ ec_shd_full_healer(void *data)
                    "starting full sweep on subvol %s",
                    ec_subvol_name(this, healer->subvol));
 
-            ec_shd_selfheal(healer, healer->subvol, &rootloc);
+            ec_shd_selfheal(healer, healer->subvol, &rootloc, _gf_true);
             ec_shd_full_sweep(healer, this->itable->root);
         }
 
-- 
cgit