From a3e5c0566a7d867d16d80ca28657238ff1008a22 Mon Sep 17 00:00:00 2001
From: Ashish Pandey <aspandey@redhat.com>
Date: Mon, 28 Nov 2016 13:42:33 +0530
Subject: cluster/ec: Healing should not start if only "data" bricks are UP

Problem: In a disperse volume with "K+R" configuration, where
"K" is the number of data bricks and "R" is the number of redundancy
bricks (Total number of bricks, N = K+R), if only K bricks are UP,
we should NOT start heal process. This is because the bricks, which
are supposed to be healed, are not UP. This will unnecessary
eat up the resources.

Solution: Check for the number of xl_up_count and only
if it is greater than ec->fragments (number of data bricks),
start heal process.

Change-Id: I8579f39cfb47b65ff0f76e623b048bd67b15473b
BUG: 1399072
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
Reviewed-on: http://review.gluster.org/15937
Reviewed-by: Xavier Hernandez <xhernandez@datalab.es>
Smoke: Gluster Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
---
 xlators/cluster/ec/src/ec-heald.c | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

(limited to 'xlators/cluster')

diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index 9860f10eadd..ffb78d5e950 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -330,18 +330,20 @@ ec_shd_index_healer (void *data)
 
         healer = data;
         THIS = this = healer->this;
+        ec_t *ec = this->private;
 
         for (;;) {
                 ec_shd_healer_wait (healer);
 
                 ASSERT_LOCAL(this, healer);
 
-                gf_msg_debug (this->name, 0,
-                        "starting index sweep on subvol %s",
-                        ec_subvol_name (this, healer->subvol));
-
-                ec_shd_index_sweep (healer);
 
+                if (ec->xl_up_count > ec->fragments) {
+                        gf_msg_debug (this->name, 0,
+                                "starting index sweep on subvol %s",
+                                ec_subvol_name (this, healer->subvol));
+                        ec_shd_index_sweep (healer);
+                }
                 gf_msg_debug (this->name, 0,
                         "finished index sweep on subvol %s",
                         ec_subvol_name (this, healer->subvol));
@@ -362,6 +364,7 @@ ec_shd_full_healer (void *data)
 
         healer = data;
         THIS = this = healer->this;
+        ec_t *ec = this->private;
 
         rootloc.inode = this->itable->root;
         for (;;) {
@@ -378,13 +381,16 @@ ec_shd_full_healer (void *data)
 
                 ASSERT_LOCAL(this, healer);
 
-                gf_msg (this->name, GF_LOG_INFO, 0,
-                        EC_MSG_FULL_SWEEP_START,
-                        "starting full sweep on subvol %s",
-                        ec_subvol_name (this, healer->subvol));
 
-                ec_shd_selfheal (healer, healer->subvol, &rootloc);
-                ec_shd_full_sweep (healer, this->itable->root);
+                if (ec->xl_up_count > ec->fragments) {
+                        gf_msg (this->name, GF_LOG_INFO, 0,
+                                EC_MSG_FULL_SWEEP_START,
+                                "starting full sweep on subvol %s",
+                                ec_subvol_name (this, healer->subvol));
+
+                        ec_shd_selfheal (healer, healer->subvol, &rootloc);
+                        ec_shd_full_sweep (healer, this->itable->root);
+                }
 
                 gf_msg (this->name, GF_LOG_INFO, 0,
                         EC_MSG_FULL_SWEEP_STOP,
-- 
cgit