summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2020-02-26 16:08:05 +0530
committerRavishankar N <ravishankar@redhat.com>2020-02-27 07:16:20 +0000
commit2b578af8aad0757f5aed6611e2a03d70f3e295e2 (patch)
tree18a99651e30b140c74d3d0eec21b07d9464cdf76
parent71368e27a23e9090719e8a529bed996275464330 (diff)
afr: wake up index healer threads
Backport of https://review.gluster.org/#/c/glusterfs/+/23288/ ...whenever shd is re-enabled after disabling or there is a change in `cluster.heal-timeout`, without needing to restart shd or waiting for the current `cluster.heal-timeout` seconds to expire. See BZ 1743988 for more details. Change-Id: Ia5ebd7c8e9f5b54cba3199c141fdd1af2f9b9bfe fixes: bz#1807431 Reported-by: Glen Kiessling <glenk1973@hotmail.com> Signed-off-by: Ravishankar N <ravishankar@redhat.com>
-rw-r--r--tests/bugs/replicate/bug-1744548-heal-timeout.t42
-rw-r--r--xlators/cluster/afr/src/afr-common.c6
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c14
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h3
-rw-r--r--xlators/cluster/afr/src/afr.c10
-rw-r--r--xlators/cluster/afr/src/afr.h3
6 files changed, 67 insertions, 11 deletions
diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t
new file mode 100644
index 00000000000..3cb73bcad52
--- /dev/null
+++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST ! $CLI volume heal $V0
+
+# Enable shd and verify that index crawl is triggered immediately.
+TEST $CLI volume profile $V0 start
+TEST $CLI volume profile $V0 info clear
+TEST $CLI volume heal $V0 enable
+TEST $CLI volume heal $V0
+# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes
+COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'`
+TEST [ "$COUNT" == "333" ]
+
+# Check that a change in heal-timeout is honoured immediately.
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+sleep 10
+COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'`
+# Two crawls must have happened.
+TEST [ "$COUNT" == "666" ]
+
+# shd must not heal if it is disabled and heal-timeout is changed.
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume profile $V0 info clear
+TEST $CLI volume set $V0 cluster.heal-timeout 6
+sleep 6
+COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'`
+TEST [ -z $COUNT ]
+cleanup;
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index a1a36bb965e..fb52b110f51 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -5615,10 +5615,8 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
* b) Already heard from everyone, but we now got a child-up
* event.
*/
- if (have_heard_from_all && priv->shd.iamshd) {
- for (i = 0; i < priv->child_count; i++)
- if (priv->child_up[i])
- afr_selfheal_childup(this, i);
+ if (have_heard_from_all) {
+ afr_selfheal_childup(this, priv);
}
}
out:
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 5ad39a2e341..8dc788dfff2 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -1264,12 +1264,18 @@ out:
return ret;
}
-int
-afr_selfheal_childup(xlator_t *this, int subvol)
+void
+afr_selfheal_childup(xlator_t *this, afr_private_t *priv)
{
- afr_shd_index_healer_spawn(this, subvol);
+ int subvol = 0;
- return 0;
+ if (!priv->shd.iamshd)
+ return;
+ for (subvol = 0; subvol < priv->child_count; subvol++)
+ if (priv->child_up[subvol])
+ afr_shd_index_healer_spawn(this, subvol);
+
+ return;
}
int
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 7de7c431460..19905394540 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -60,9 +60,6 @@ typedef struct {
} afr_self_heald_t;
int
-afr_selfheal_childup(xlator_t *this, int subvol);
-
-int
afr_selfheal_daemon_init(xlator_t *this);
int
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index 5d5e536ff60..f87b2b37021 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -126,12 +126,14 @@ reconfigure(xlator_t *this, dict_t *options)
afr_private_t *priv = NULL;
xlator_t *read_subvol = NULL;
int read_subvol_index = -1;
+ int timeout_old = 0;
int ret = -1;
int index = -1;
char *qtype = NULL;
char *fav_child_policy = NULL;
gf_boolean_t consistent_io = _gf_false;
gf_boolean_t choose_local_old = _gf_false;
+ gf_boolean_t enabled_old = _gf_false;
priv = this->private;
@@ -236,11 +238,13 @@ reconfigure(xlator_t *this, dict_t *options)
GF_OPTION_RECONF("ensure-durability", priv->ensure_durability, options,
bool, out);
+ enabled_old = priv->shd.enabled;
GF_OPTION_RECONF("self-heal-daemon", priv->shd.enabled, options, bool, out);
GF_OPTION_RECONF("iam-self-heal-daemon", priv->shd.iamshd, options, bool,
out);
+ timeout_old = priv->shd.timeout;
GF_OPTION_RECONF("heal-timeout", priv->shd.timeout, options, int32, out);
GF_OPTION_RECONF("consistent-metadata", priv->consistent_metadata, options,
@@ -264,6 +268,12 @@ reconfigure(xlator_t *this, dict_t *options)
consistent_io = _gf_false;
priv->consistent_io = consistent_io;
+ if (priv->shd.enabled) {
+ if ((priv->shd.enabled != enabled_old) ||
+ (timeout_old != priv->shd.timeout))
+ afr_selfheal_childup(this, priv);
+ }
+
ret = 0;
out:
return ret;
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 68d4ae48b60..395a6931ea1 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -1325,4 +1325,7 @@ afr_ta_lock_release_synctask(xlator_t *this);
void
afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this);
+void
+afr_selfheal_childup(xlator_t *this, afr_private_t *priv);
+
#endif /* __AFR_H__ */