diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2014-04-15 12:40:57 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2014-04-28 09:43:24 -0700 |
commit | 07ed48398e41df1b65202f3d2e0be6f2c3ca8ceb (patch) | |
tree | 61223ce6be2394606058b8403e0a70e40a6df9f0 | |
parent | 115b4093a44f6e23c28e5a382f82e72ddf73d97c (diff) |
cluster/afr: trigger self-heals even when they are set to off.
When attempt-self-heal is set to true, trigger data/metadata/entry
self-heals even when they are disabled. This is useful for
gluster volume heal info to report them even when metadata-self-heal
entry-self-heal, data-self-heal are set to off.
Change-Id: Idc3f0d5d049c875b4f975248fef56ea2238da47c
BUG: 1039544
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/7480
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
-rw-r--r-- | tests/basic/self-heald.t | 111 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 93 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 3 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 13 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 15 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 12 |
7 files changed, 213 insertions, 36 deletions
diff --git a/tests/basic/self-heald.t b/tests/basic/self-heald.t index bed008704c2..01ce5b5c8b9 100644 --- a/tests/basic/self-heald.t +++ b/tests/basic/self-heald.t @@ -5,6 +5,11 @@ cleanup; +function disconnected_brick_count { + local vol=$1 + $CLI volume heal $vol info | grep -i transport | wc -l +} + TEST glusterd TEST pidof glusterd TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5} @@ -28,6 +33,23 @@ HEAL_FILES=$(($HEAL_FILES + 3)) #Count the brick root dir cd ~ EXPECT "$HEAL_FILES" afr_get_pending_heal_count $V0 + +#When bricks are down, it says Transport End point Not connected for them +EXPECT "3" disconnected_brick_count $V0 + +#Create some stale indices and verify that they are not counted in heal info +#TO create stale index create and delete files when one brick is down in +#replica pair. +for i in {11..20}; do echo abc > $M0/$i; done +HEAL_FILES=$(($HEAL_FILES + 10)) #count extra 10 files +EXPECT "$HEAL_FILES" afr_get_pending_heal_count $V0 +#delete the files now, so that stale indices will remain. +for i in {11..20}; do rm -f $M0/$i; done +#After deleting files they should not appear in heal info +HEAL_FILES=$(($HEAL_FILES - 10)) +EXPECT "$HEAL_FILES" afr_get_pending_heal_count $V0 + + TEST ! $CLI volume heal $V0 TEST $CLI volume set $V0 cluster.self-heal-daemon off TEST ! $CLI volume heal $V0 @@ -45,4 +67,93 @@ TEST [ $HEAL_FILES -gt $(afr_get_pending_heal_count $V0) ] TEST $CLI volume heal $V0 full EXPECT_WITHIN 30 "0" afr_get_pending_heal_count $V0 + +#Test that ongoing IO is not considered as Pending heal +(dd if=/dev/zero of=$M0/file1 bs=1K 2>/dev/null 1>/dev/null)& +back_pid1=$!; +(dd if=/dev/zero of=$M0/file2 bs=1K 2>/dev/null 1>/dev/null)& +back_pid2=$!; +(dd if=/dev/zero of=$M0/file3 bs=1K 2>/dev/null 1>/dev/null)& +back_pid3=$!; +(dd if=/dev/zero of=$M0/file4 bs=1K 2>/dev/null 1>/dev/null)& +back_pid4=$!; +(dd if=/dev/zero of=$M0/file5 bs=1K 2>/dev/null 1>/dev/null)& +back_pid5=$!; +EXPECT 0 afr_get_pending_heal_count $V0 +kill -SIGTERM $back_pid1; +kill -SIGTERM $back_pid2; +kill -SIGTERM $back_pid3; +kill -SIGTERM $back_pid4; +kill -SIGTERM $back_pid5; +wait >/dev/null 2>&1; + +#Test that volume heal info reports files even when self-heal +#options are disabled +TEST touch $M0/f +TEST mkdir $M0/d +#DATA +TEST $CLI volume set $V0 cluster.data-self-heal off +EXPECT "off" volume_option $V0 cluster.data-self-heal +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST kill_brick $V0 $H0 $B0/${V0}4 +echo abc > $M0/f +EXPECT 1 afr_get_pending_heal_count $V0 +TEST $CLI volume start $V0 force +EXPECT_WITHIN 20 "Y" glustershd_up_status +EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 2 +EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 4 +TEST $CLI volume heal $V0 +EXPECT_WITHIN 30 "0" afr_get_pending_heal_count $V0 +TEST $CLI volume set $V0 cluster.data-self-heal on + +#METADATA +TEST $CLI volume set $V0 cluster.metadata-self-heal off +EXPECT "off" volume_option $V0 cluster.metadata-self-heal +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST kill_brick $V0 $H0 $B0/${V0}4 +TEST chmod 777 $M0/f +EXPECT 1 afr_get_pending_heal_count $V0 +TEST $CLI volume start $V0 force +EXPECT_WITHIN 20 "Y" glustershd_up_status +EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 2 +EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 4 +TEST $CLI volume heal $V0 +EXPECT_WITHIN 30 "0" afr_get_pending_heal_count $V0 +TEST $CLI volume set $V0 cluster.metadata-self-heal on + +#ENTRY +TEST $CLI volume set $V0 cluster.entry-self-heal off +EXPECT "off" volume_option $V0 cluster.entry-self-heal +TEST kill_brick $V0 $H0 $B0/${V0}0 +TEST kill_brick $V0 $H0 $B0/${V0}2 +TEST kill_brick $V0 $H0 $B0/${V0}4 +TEST touch $M0/d/a +EXPECT 2 afr_get_pending_heal_count $V0 +TEST $CLI volume start $V0 force +EXPECT_WITHIN 20 "Y" glustershd_up_status +EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 0 +EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 2 +EXPECT_WITHIN 20 "1" afr_child_up_status_in_shd $V0 4 +TEST $CLI volume heal $V0 +EXPECT_WITHIN 30 "0" afr_get_pending_heal_count $V0 +TEST $CLI volume set $V0 cluster.entry-self-heal on + +#Negative test cases +#Fail volume does not exist case +TEST ! $CLI volume heal fail info + +#Fail volume stopped case +TEST $CLI volume stop $V0 +TEST ! $CLI volume heal $V0 info + +#Fail non-replicate volume info +TEST $CLI volume delete $V0 +TEST $CLI volume create $V0 $H0:$B0/${V0}{6} +TEST $CLI volume start $V0 +TEST ! $CLI volume heal $V0 info + cleanup diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 4b39ff6039d..7dafa0529fa 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1411,19 +1411,90 @@ afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this) } gf_boolean_t -afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv) +afr_can_start_missing_entry_gfid_self_heal (afr_local_t *local, + afr_private_t *priv) { - GF_ASSERT (sh); - GF_ASSERT (priv); + if (!local) + goto out; + //attempt self heal is only for data/metadata/entry + if (local->self_heal.do_gfid_self_heal || + local->self_heal.do_missing_entry_self_heal) + return _gf_true; +out: + return _gf_false; +} + +gf_boolean_t +afr_can_start_entry_self_heal (afr_local_t *local, afr_private_t *priv) +{ + if (!local) + goto out; + //force_confirm_spb is not checked here because directory split-brains + //are not reported at the moment. + if (local->self_heal.do_entry_self_heal) { + if (local->attempt_self_heal || priv->entry_self_heal) + return _gf_true; + } +out: + return _gf_false; +} + +gf_boolean_t +afr_can_start_data_self_heal (afr_local_t *local, afr_private_t *priv) +{ + if (!local) + goto out; + + if (local->self_heal.force_confirm_spb) + return _gf_true; + + if (local->self_heal.do_data_self_heal) { + if (local->attempt_self_heal || + afr_data_self_heal_enabled (priv->data_self_heal)) + return _gf_true; + } +out: + return _gf_false; +} - if (sh->force_confirm_spb) +gf_boolean_t +afr_can_start_metadata_self_heal (afr_local_t *local, afr_private_t *priv) +{ + if (!local) + goto out; + if (local->self_heal.force_confirm_spb) return _gf_true; - return (sh->do_gfid_self_heal - || sh->do_missing_entry_self_heal - || (afr_data_self_heal_enabled (priv->data_self_heal) && - sh->do_data_self_heal) - || (priv->metadata_self_heal && sh->do_metadata_self_heal) - || (priv->entry_self_heal && sh->do_entry_self_heal)); + + if (local->self_heal.do_metadata_self_heal) { + if (local->attempt_self_heal || priv->metadata_self_heal) + return _gf_true; + } +out: + return _gf_false; +} + +gf_boolean_t +afr_can_self_heal_proceed (afr_local_t *local, afr_private_t *priv) +{ + if (!local) + goto out; + + if (local->self_heal.force_confirm_spb) + return _gf_true; + + if (afr_can_start_missing_entry_gfid_self_heal (local, priv)) + return _gf_true; + + if (afr_can_start_entry_self_heal (local, priv)) + return _gf_true; + + if (afr_can_start_data_self_heal (local, priv)) + return _gf_true; + + if (afr_can_start_metadata_self_heal (local, priv)) + return _gf_true; +out: + return _gf_false; } afr_transaction_type @@ -1840,7 +1911,7 @@ afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this, } afr_lookup_set_self_heal_params (local, this); - if (afr_can_self_heal_proceed (&local->self_heal, priv)) { + if (afr_can_self_heal_proceed (local, priv)) { if (afr_is_transaction_running (local) && (!local->attempt_self_heal)) goto out; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 6f82761b33d..ab9e44b41e0 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -2307,6 +2307,7 @@ afr_self_heal_local_init (afr_local_t *l, xlator_t *this) this->name, priv->child_count); if (ret) goto out; + lc->attempt_self_heal = l->attempt_self_heal; out: if (ret) { @@ -2474,7 +2475,7 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode) sh->sh_type_in_action = AFR_SELF_HEAL_INVALID; FRAME_SU_DO (sh_frame, afr_local_t); - if (sh->do_missing_entry_self_heal || sh->do_gfid_self_heal) { + if (afr_can_start_missing_entry_gfid_self_heal (local, priv)) { afr_self_heal_missing_entries (sh_frame, this); } else { loc = &sh_local->loc; diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index e5ca84292fa..5c53f6a6a06 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -1704,17 +1704,6 @@ afr_sh_non_reg_lock_success (call_frame_t *frame, xlator_t *this) return 0; } -gf_boolean_t -afr_can_start_data_self_heal (afr_self_heal_t *sh, afr_private_t *priv) -{ - if (sh->force_confirm_spb) - return _gf_true; - if (sh->do_data_self_heal && - afr_data_self_heal_enabled (priv->data_self_heal)) - return _gf_true; - return _gf_false; -} - int afr_self_heal_data (call_frame_t *frame, xlator_t *this) { @@ -1728,7 +1717,7 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this) sh->sh_type_in_action = AFR_SELF_HEAL_DATA; - if (afr_can_start_data_self_heal (sh, priv)) { + if (afr_can_start_data_self_heal (local, priv)) { afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); ret = afr_inodelk_init (&local->internal_lock.inodelk[1], priv->sh_domain, priv->child_count); diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 8359079cea4..1ea957ad042 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -2396,7 +2396,7 @@ afr_self_heal_entry (call_frame_t *frame, xlator_t *this) sh->sh_type_in_action = AFR_SELF_HEAL_ENTRY; - if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) { + if (afr_can_start_entry_self_heal (local, priv)) { afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); afr_sh_entrylk (frame, this, &local->loc, NULL, afr_sh_post_nonblocking_entry_cbk); diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index e26d3580e51..7abd852de6c 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -688,8 +688,10 @@ afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame, { afr_internal_lock_t *int_lock = NULL; afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; local = frame->local; + sh = &local->self_heal; int_lock = &local->internal_lock; if (int_lock->lock_op_ret < 0) { @@ -697,6 +699,7 @@ afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame, "inodelks failed for %s.", local->loc.path); gf_log (this->name, GF_LOG_DEBUG, "Metadata self-heal " "failed for %s.", local->loc.path); + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); afr_sh_metadata_done (frame, this); } else { @@ -740,16 +743,6 @@ afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this) return 0; } -gf_boolean_t -afr_can_start_metadata_self_heal (afr_self_heal_t *sh, afr_private_t *priv) -{ - if (sh->force_confirm_spb) - return _gf_true; - if (sh->do_metadata_self_heal && priv->metadata_self_heal) - return _gf_true; - return _gf_false; -} - int afr_self_heal_metadata (call_frame_t *frame, xlator_t *this) { @@ -761,7 +754,7 @@ afr_self_heal_metadata (call_frame_t *frame, xlator_t *this) sh = &local->self_heal; sh->sh_type_in_action = AFR_SELF_HEAL_METADATA; - if (afr_can_start_metadata_self_heal (sh, priv)) { + if (afr_can_start_metadata_self_heal (local, priv)) { afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); afr_sh_metadata_lock (frame, this); } else { diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 676bd9b9466..ad8964ccbaa 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -1216,4 +1216,16 @@ afr_handle_open_fd_count (call_frame_t *frame, xlator_t *this); afr_inode_ctx_t* afr_inode_ctx_get (inode_t *inode, xlator_t *this); +gf_boolean_t +afr_can_start_missing_entry_gfid_self_heal (afr_local_t *local, + afr_private_t *priv); + +gf_boolean_t +afr_can_start_entry_self_heal (afr_local_t *local, afr_private_t *priv); + +gf_boolean_t +afr_can_start_data_self_heal (afr_local_t *local, afr_private_t *priv); + +gf_boolean_t +afr_can_start_metadata_self_heal (afr_local_t *local, afr_private_t *priv); #endif /* __AFR_H__ */ |