diff options
-rw-r--r-- | tests/bugs/replicate/bug-1744548-heal-timeout.t | 42 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 6 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.c | 14 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heald.h | 3 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 10 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 3 |
6 files changed, 67 insertions, 11 deletions
diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t new file mode 100644 index 00000000000..3cb73bcad52 --- /dev/null +++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t @@ -0,0 +1,42 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../afr.rc + +cleanup; + +TEST glusterd; +TEST pidof glusterd; +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume heal $V0 disable +TEST $CLI volume start $V0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1 +EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2 +TEST ! $CLI volume heal $V0 + +# Enable shd and verify that index crawl is triggered immediately. +TEST $CLI volume profile $V0 start +TEST $CLI volume profile $V0 info clear +TEST $CLI volume heal $V0 enable +TEST $CLI volume heal $V0 +# Each brick does 3 opendirs, corresponding to dirty, xattrop and entry-changes +COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` +TEST [ "$COUNT" == "333" ] + +# Check that a change in heal-timeout is honoured immediately. +TEST $CLI volume set $V0 cluster.heal-timeout 5 +sleep 10 +COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` +# Two crawls must have happened. +TEST [ "$COUNT" == "666" ] + +# shd must not heal if it is disabled and heal-timeout is changed. +TEST $CLI volume heal $V0 disable +TEST $CLI volume profile $V0 info clear +TEST $CLI volume set $V0 cluster.heal-timeout 6 +sleep 6 +COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'` +TEST [ -z $COUNT ] +cleanup; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index bce0af5791a..679ccb2eebe 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -5667,10 +5667,8 @@ afr_notify(xlator_t *this, int32_t event, void *data, void *data2) * b) Already heard from everyone, but we now got a child-up * event. */ - if (have_heard_from_all && priv->shd.iamshd) { - for (i = 0; i < priv->child_count; i++) - if (priv->child_up[i]) - afr_selfheal_childup(this, i); + if (have_heard_from_all) { + afr_selfheal_childup(this, priv); } } out: diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c index 9c94835714f..8b4d2dc9cd1 100644 --- a/xlators/cluster/afr/src/afr-self-heald.c +++ b/xlators/cluster/afr/src/afr-self-heald.c @@ -1308,12 +1308,18 @@ out: return ret; } -int -afr_selfheal_childup(xlator_t *this, int subvol) +void +afr_selfheal_childup(xlator_t *this, afr_private_t *priv) { - afr_shd_index_healer_spawn(this, subvol); + int subvol = 0; - return 0; + if (!priv->shd.iamshd) + return; + for (subvol = 0; subvol < priv->child_count; subvol++) + if (priv->child_up[subvol]) + afr_shd_index_healer_spawn(this, subvol); + + return; } int diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h index 7de7c431460..19905394540 100644 --- a/xlators/cluster/afr/src/afr-self-heald.h +++ b/xlators/cluster/afr/src/afr-self-heald.h @@ -60,9 +60,6 @@ typedef struct { } afr_self_heald_t; int -afr_selfheal_childup(xlator_t *this, int subvol); - -int afr_selfheal_daemon_init(xlator_t *this); int diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 33a25cc5c0c..b2c64ecf0e6 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -141,6 +141,7 @@ reconfigure(xlator_t *this, dict_t *options) afr_private_t *priv = NULL; xlator_t *read_subvol = NULL; int read_subvol_index = -1; + int timeout_old = 0; int ret = -1; int index = -1; char *qtype = NULL; @@ -150,6 +151,7 @@ reconfigure(xlator_t *this, dict_t *options) char *locking_scheme = NULL; gf_boolean_t consistent_io = _gf_false; gf_boolean_t choose_local_old = _gf_false; + gf_boolean_t enabled_old = _gf_false; priv = this->private; @@ -255,11 +257,13 @@ reconfigure(xlator_t *this, dict_t *options) GF_OPTION_RECONF("ensure-durability", priv->ensure_durability, options, bool, out); + enabled_old = priv->shd.enabled; GF_OPTION_RECONF("self-heal-daemon", priv->shd.enabled, options, bool, out); GF_OPTION_RECONF("iam-self-heal-daemon", priv->shd.iamshd, options, bool, out); + timeout_old = priv->shd.timeout; GF_OPTION_RECONF("heal-timeout", priv->shd.timeout, options, int32, out); GF_OPTION_RECONF("consistent-metadata", priv->consistent_metadata, options, @@ -283,6 +287,12 @@ reconfigure(xlator_t *this, dict_t *options) consistent_io = _gf_false; priv->consistent_io = consistent_io; + if (priv->shd.enabled) { + if ((priv->shd.enabled != enabled_old) || + (timeout_old != priv->shd.timeout)) + afr_selfheal_childup(this, priv); + } + ret = 0; out: return ret; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index c066099d5d3..a3f2942b317 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1351,4 +1351,7 @@ afr_handle_replies_quorum(call_frame_t *frame, xlator_t *this); gf_boolean_t afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv, int child); + +void +afr_selfheal_childup(xlator_t *this, afr_private_t *priv); #endif /* __AFR_H__ */ |