diff options
author | Ravishankar N <ravishankar@redhat.com> | 2020-02-26 16:08:05 +0530 |
---|---|---|
committer | Ravishankar N <ravishankar@redhat.com> | 2020-02-27 07:16:20 +0000 |
commit | 2b578af8aad0757f5aed6611e2a03d70f3e295e2 (patch) | |
tree | 18a99651e30b140c74d3d0eec21b07d9464cdf76 | |
parent | 71368e27a23e9090719e8a529bed996275464330 (diff) |
afr: wake up index healer threads
Backport of https://review.gluster.org/#/c/glusterfs/+/23288/
...whenever shd is re-enabled after disabling or there is a change in
`cluster.heal-timeout`, without needing to restart shd or waiting for the
current `cluster.heal-timeout` seconds to expire.
See BZ 1743988 for more details.
Change-Id: Ia5ebd7c8e9f5b54cba3199c141fdd1af2f9b9bfe
fixes: bz#1807431
Reported-by: Glen Kiessling <glenk1973@hotmail.com>
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
-rw-r--r-- | tests/bugs/replicate/bug-1744548-heal-timeout.t | 42 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 6 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 14 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.h | 3 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 10 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 3 |
6 files changed, 67 insertions, 11 deletions
diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t new file mode 100644 index 00000000000..3cb73bcad52 --- /dev/null +++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t @@ -0,0 +1,42 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd; +TEST pidof glusterd; +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume heal $V0 disable +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +TEST ! $CLI volume heal $V0 + +# Enable shd and verify that index crawl is triggered immediately. +TEST $CLI volume profile $V0 start +TEST $CLI volume profile $V0 info clear +TEST $CLI volume heal $V0 enable +TEST $CLI volume heal $V0 +# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes +COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` +TEST [ "$COUNT" == "333" ] + +# Check that a change in heal-timeout is honoured immediately. +TEST $CLI volume set $V0 cluster.heal-timeout 5 +sleep 10 +COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` +# Two crawls must have happened. +TEST [ "$COUNT" == "666" ] + +# shd must not heal if it is disabled and heal-timeout is changed. +TEST $CLI volume heal $V0 disable +TEST $CLI volume profile $V0 info clear +TEST $CLI volume set $V0 cluster.heal-timeout 6 +sleep 6 +COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` +TEST [ -z $COUNT ] +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index a1a36bb965e..fb52b110f51 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -5615,10 +5615,8 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2) * b) Already heard from everyone, but we now got a child-up * event. */ - if (have_heard_from_all && priv->shd.iamshd) { - for (i = 0; i < priv->child_count; i++) - if (priv->child_up[i]) - afr_selfheal_childup(this, i); + if (have_heard_from_all) { + afr_selfheal_childup(this, priv); } } out: diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 5ad39a2e341..8dc788dfff2 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -1264,12 +1264,18 @@ out: return ret; } -int -afr_selfheal_childup(xlator_t *this, int subvol) +void +afr_selfheal_childup(xlator_t *this, afr_private_t *priv) { - afr_shd_index_healer_spawn(this, subvol); + int subvol = 0; - return 0; + if (!priv->shd.iamshd) + return; + for (subvol = 0; subvol < priv->child_count; subvol++) + if (priv->child_up[subvol]) + afr_shd_index_healer_spawn(this, subvol); + + return; } int diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 7de7c431460..19905394540 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -60,9 +60,6 @@ typedef struct { } afr_self_heald_t; int -afr_selfheal_childup(xlator_t *this, int subvol); - -int afr_selfheal_daemon_init(xlator_t *this); int diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 5d5e536ff60..f87b2b37021 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -126,12 +126,14 @@ reconfigure(xlator_t *this, dict_t *options) afr_private_t *priv = NULL; xlator_t *read_subvol = NULL; int read_subvol_index = -1; + int timeout_old = 0; int ret = -1; int index = -1; char *qtype = NULL; char *fav_child_policy = NULL; gf_boolean_t consistent_io = _gf_false; gf_boolean_t choose_local_old = _gf_false; + gf_boolean_t enabled_old = _gf_false; priv = this->private; @@ -236,11 +238,13 @@ reconfigure(xlator_t *this, dict_t *options) GF_OPTION_RECONF("ensure-durability", priv->ensure_durability, options, bool, out); + enabled_old = priv->shd.enabled; GF_OPTION_RECONF("self-heal-daemon", priv->shd.enabled, options, bool, out); GF_OPTION_RECONF("iam-self-heal-daemon", priv->shd.iamshd, options, bool, out); + timeout_old = priv->shd.timeout; GF_OPTION_RECONF("heal-timeout", priv->shd.timeout, options, int32, out); GF_OPTION_RECONF("consistent-metadata", priv->consistent_metadata, options, @@ -264,6 +268,12 @@ reconfigure(xlator_t *this, dict_t *options) consistent_io = _gf_false; priv->consistent_io = consistent_io; + if (priv->shd.enabled) { + if ((priv->shd.enabled != enabled_old) || + (timeout_old != priv->shd.timeout)) + afr_selfheal_childup(this, priv); + } + ret = 0; out: return ret; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 68d4ae48b60..395a6931ea1 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1325,4 +1325,7 @@ afr_ta_lock_release_synctask(xlator_t *this); void afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this); +void +afr_selfheal_childup(xlator_t *this, afr_private_t *priv); + #endif /* __AFR_H__ */ |