diff options
author | Ravishankar N <ravishankar@redhat.com> | 2016-01-10 09:19:34 +0530 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2016-03-01 03:23:20 -0800 |
commit | 8210ca1a5c0e78e91c6fab7df7e002e39660b706 (patch) | |
tree | 432a6836cc685760ee441f4b8e46221947247211 | |
parent | ea00992d3d52a51b7c8311ad9565bbbb6e395f9d (diff) |
afr: Add throttled background client-side heals
If a heal is needed after inode refresh (lookup, read_txn), launch it in
the background instead of blocking the fop (that triggered refresh) until the
heal happens.
afr_replies_interpret() is modified such that the heal is
launched only if atleast one sink brick is up.
Max. no of heals that can happen in parallel is configurable via the
'background-self-heal-count' volume option. Any number greater than that
is put in a wait queue whose length is configurable via
'heal-wait-queue-leng' volume option. If the wait queue is also full,
further heals will be ignored.
Default values: background-self-heal-count=8, heal-wait-queue-leng=128
Change-Id: I1d4a52814cdfd43d90591b6d2ad7b6219937ce70
BUG: 1297172
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: http://review.gluster.org/13207
Smoke: Gluster Build System <jenkins@build.gluster.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
-rw-r--r-- | libglusterfs/src/globals.h | 4 | ||||
-rw-r--r-- | tests/basic/afr/client-side-heal.t | 1 | ||||
-rwxr-xr-x | tests/bugs/glusterd/859927/repl.t | 4 | ||||
-rw-r--r-- | tests/bugs/quota/bug-1035576.t | 1 | ||||
-rwxr-xr-x | tests/bugs/replicate/bug-802417.t | 5 | ||||
-rwxr-xr-x | tests/bugs/replicate/bug-977797.t | 52 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 89 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-dir-write.c | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 110 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal.h | 3 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 26 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 28 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 8 |
13 files changed, 241 insertions, 92 deletions
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h index ae05a3abd15..cd4582a12f8 100644 --- a/libglusterfs/src/globals.h +++ b/libglusterfs/src/globals.h @@ -62,6 +62,10 @@ #define GD_OP_VERSION_3_7_7 30707 /* Op-version for GlusterFS 3.7.7 */ +/* Op-version was not bumped up for 3.7.8 */ + +#define GD_OP_VERSION_3_7_9 30709 /* Op-version for GlusterFS 3.7.9 */ + #define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */ #define GD_OP_VER_PERSISTENT_AFR_XATTRS GD_OP_VERSION_3_6_0 diff --git a/tests/basic/afr/client-side-heal.t b/tests/basic/afr/client-side-heal.t index 18f76265b29..d87f4b14063 100644 --- a/tests/basic/afr/client-side-heal.t +++ b/tests/basic/afr/client-side-heal.t @@ -70,6 +70,7 @@ EXPECT 7 get_pending_heal_count $V0 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; TEST cat $M0/datafile +EXPECT_WITHIN $HEAL_TIMEOUT 6 get_pending_heal_count $V0 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; diff --git a/tests/bugs/glusterd/859927/repl.t b/tests/bugs/glusterd/859927/repl.t index a500961165c..40e86029685 100755 --- a/tests/bugs/glusterd/859927/repl.t +++ b/tests/bugs/glusterd/859927/repl.t @@ -23,7 +23,6 @@ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}; TEST $CLI volume set $V0 cluster.self-heal-daemon off TEST $CLI volume set $V0 performance.stat-prefetch off TEST $CLI volume set $V0 client-log-level DEBUG -TEST $CLI volume set $V0 cluster.background-self-heal-count 0 TEST $CLI volume start $V0 TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0; @@ -34,6 +33,7 @@ EXPECT full volume_option $V0 cluster.data-self-heal-algorithm create_setup_for_self_heal $M0/a EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 cat $file 2>&1 > /dev/null +EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0 TEST cmp $B0/${V0}1/a $B0/${V0}2/a TEST $CLI volume set $V0 cluster.data-self-heal-algorithm diff @@ -41,12 +41,14 @@ EXPECT diff volume_option $V0 cluster.data-self-heal-algorithm create_setup_for_self_heal $M0/a EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 cat $file 2>&1 > /dev/null +EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0 TEST cmp $B0/${V0}1/a $B0/${V0}2/a TEST $CLI volume reset $V0 cluster.data-self-heal-algorithm create_setup_for_self_heal $M0/a EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 cat $file 2>&1 > /dev/null +EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0 TEST cmp $B0/${V0}1/a $B0/${V0}2/a TEST ! $CLI volume set $V0 cluster.data-self-heal-algorithm "" diff --git a/tests/bugs/quota/bug-1035576.t b/tests/bugs/quota/bug-1035576.t index e3d32d107d0..12f960c46c6 100644 --- a/tests/bugs/quota/bug-1035576.t +++ b/tests/bugs/quota/bug-1035576.t @@ -18,7 +18,6 @@ TEST $CLI volume set $V0 performance.io-cache off TEST $CLI volume set $V0 performance.write-behind off TEST $CLI volume set $V0 performance.stat-prefetch off TEST $CLI volume set $V0 performance.read-ahead off -TEST $CLI volume set $V0 background-self-heal-count 0 TEST $CLI volume set $V0 self-heal-daemon off TEST $CLI volume quota $V0 enable diff --git a/tests/bugs/replicate/bug-802417.t b/tests/bugs/replicate/bug-802417.t index df989b1470b..c5ba98b65fd 100755 --- a/tests/bugs/replicate/bug-802417.t +++ b/tests/bugs/replicate/bug-802417.t @@ -32,7 +32,6 @@ TEST $CLI volume set $V0 performance.stat-prefetch off ## Make sure automatic self-heal doesn't perturb our results. TEST $CLI volume set $V0 cluster.self-heal-daemon off TEST $CLI volume set $V0 cluster.data-self-heal on -TEST $CLI volume set $V0 cluster.background-self-heal-count 0 ## Start volume and verify TEST $CLI volume start $V0; @@ -70,8 +69,8 @@ tgt_xattr_2="trusted.afr.${V0}-client-2" actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_0) EXPECT "0x000000000000000000000000|^\$" echo $actual -actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_1) -EXPECT "0x000000000000000000000000|^\$" echo $actual +EXPECT_WITHIN $HEAL_TIMEOUT "0x000000000000000000000000" \ +afr_get_changelog_xattr $obs_path_0 $tgt_xattr_1 actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_2) EXPECT "0x000000030000000000000000" echo $actual diff --git a/tests/bugs/replicate/bug-977797.t b/tests/bugs/replicate/bug-977797.t index 3ff14ecf3d5..72c616ba68e 100755 --- a/tests/bugs/replicate/bug-977797.t +++ b/tests/bugs/replicate/bug-977797.t @@ -26,7 +26,6 @@ TEST $CLI volume set $V0 quick-read off TEST $CLI volume set $V0 read-ahead off TEST $CLI volume set $V0 write-behind off TEST $CLI volume set $V0 io-cache off -TEST $CLI volume set $V0 background-self-heal-count 0 TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 @@ -56,34 +55,29 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1; TEST dd if=$M0/a/file of=/dev/null bs=1024k -b1c0dir=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a \ - trusted.afr.$V0-client-0 "entry") -b1c1dir=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a \ - trusted.afr.$V0-client-1 "entry") -b2c0dir=$(afr_get_specific_changelog_xattr \ - $B0/$V0"2"/a trusted.afr.$V0-client-0 "entry") -b2c1dir=$(afr_get_specific_changelog_xattr \ - $B0/$V0"2"/a trusted.afr.$V0-client-1 "entry") - - -b1c0f=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a/file \ - trusted.afr.$V0-client-0 "data") -b1c1f=$(afr_get_specific_changelog_xattr $B0/$V0"1"/a/file \ - trusted.afr.$V0-client-1 "data") -b2c0f=$(afr_get_specific_changelog_xattr $B0/$V0"2"/a/file \ - trusted.afr.$V0-client-0 "data") -b2c1f=$(afr_get_specific_changelog_xattr $B0/$V0"2"/a/file \ - trusted.afr.$V0-client-1 "data") - -EXPECT "00000000|^$" echo $b1c0f -EXPECT "00000000|^$" echo $b1c1f -EXPECT "00000000|^$" echo $b2c0f -EXPECT "00000000|^$" echo $b2c1f - -EXPECT "00000000|^$" echo $b1c0dir -EXPECT "00000000|^$" echo $b1c1dir -EXPECT "00000000|^$" echo $b2c0dir -EXPECT "00000000|^$" echo $b2c1dir +EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \ +afr_get_specific_changelog_xattr $B0/$V0"1"/a/file trusted.afr.$V0-client-0 "data" + +EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \ +afr_get_specific_changelog_xattr $B0/$V0"1"/a/file trusted.afr.$V0-client-1 "data" + +EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \ +afr_get_specific_changelog_xattr $B0/$V0"2"/a/file trusted.afr.$V0-client-0 "data" + +EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \ +afr_get_specific_changelog_xattr $B0/$V0"2"/a/file trusted.afr.$V0-client-1 "data" + +EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \ +afr_get_specific_changelog_xattr $B0/$V0"1"/a trusted.afr.$V0-client-0 "entry" + +EXPECT_WITHIN HEAL_TIMEOUT "00000000" \ +afr_get_specific_changelog_xattr $B0/$V0"1"/a trusted.afr.$V0-client-1 "entry" + +EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \ +afr_get_specific_changelog_xattr $B0/$V0"2"/a trusted.afr.$V0-client-0 "entry" + +EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \ +afr_get_specific_changelog_xattr $B0/$V0"2"/a trusted.afr.$V0-client-1 "entry" ## Finish up TEST $CLI volume stop $V0; diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 457f7865cec..1f68b87ef94 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -663,7 +663,8 @@ afr_accuse_smallfiles (xlator_t *this, struct afr_reply *replies, int -afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode) +afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode, + gf_boolean_t *start_heal) { afr_local_t *local = NULL; afr_private_t *priv = NULL; @@ -734,6 +735,13 @@ afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode) } } + for (i = 0; i < priv->child_count; i++) { + if (start_heal && priv->child_up[i] && + (!data_readable[i] || !metadata_readable[i])) { + *start_heal = _gf_true; + break; + } + } afr_inode_read_subvol_set (inode, this, data_readable, metadata_readable, event_generation); return ret; @@ -772,36 +780,6 @@ ret: return -err; } - -int -afr_refresh_selfheal_wrap (void *opaque) -{ - call_frame_t *frame = opaque; - afr_local_t *local = NULL; - xlator_t *this = NULL; - int err = 0; - - local = frame->local; - this = frame->this; - - afr_selfheal (frame->this, local->refreshinode->gfid); - - afr_selfheal_unlocked_discover (frame, local->refreshinode, - local->refreshinode->gfid, - local->replies); - - afr_replies_interpret (frame, this, local->refreshinode); - - err = afr_inode_refresh_err (frame, this); - - afr_local_replies_wipe (local, this->private); - - local->refreshfn (frame, this, err); - - return 0; -} - - gf_boolean_t afr_selfheal_enabled (xlator_t *this) { @@ -817,35 +795,43 @@ afr_selfheal_enabled (xlator_t *this) return data || priv->metadata_self_heal || priv->entry_self_heal; } - int afr_inode_refresh_done (call_frame_t *frame, xlator_t *this) { - call_frame_t *heal = NULL; + call_frame_t *heal_frame = NULL; afr_local_t *local = NULL; + gf_boolean_t start_heal = _gf_false; + afr_local_t *heal_local = NULL; + int op_errno = ENOMEM; int ret = 0; int err = 0; local = frame->local; - ret = afr_replies_interpret (frame, this, local->refreshinode); + ret = afr_replies_interpret (frame, this, local->refreshinode, + &start_heal); err = afr_inode_refresh_err (frame, this); afr_local_replies_wipe (local, this->private); - if (ret && afr_selfheal_enabled (this)) { - heal = copy_frame (frame); - if (heal) - heal->root->pid = GF_CLIENT_PID_SELF_HEALD; - ret = synctask_new (this->ctx->env, afr_refresh_selfheal_wrap, - afr_refresh_selfheal_done, heal, frame); - if (ret) - goto refresh_done; - } else { - refresh_done: - local->refreshfn (frame, this, err); - } + if (ret && afr_selfheal_enabled (this) && start_heal) { + heal_frame = copy_frame (frame); + if (!heal_frame) + goto refresh_done; + heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD; + heal_local = AFR_FRAME_INIT (heal_frame, op_errno); + if (!heal_local) { + AFR_STACK_DESTROY (heal_frame); + goto refresh_done; + } + heal_local->refreshinode = inode_ref (local->refreshinode); + heal_local->heal_frame = heal_frame; + afr_throttled_selfheal (heal_frame, this); + } + +refresh_done: + local->refreshfn (frame, this, err); return 0; } @@ -1758,7 +1744,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) */ gf_uuid_copy (args.gfid, read_gfid); args.ia_type = ia_type; - if (afr_replies_interpret (frame, this, local->inode)) { + if (afr_replies_interpret (frame, this, local->inode, NULL)) { read_subvol = afr_read_subvol_decide (local->inode, this, &args); afr_inode_read_subvol_reset (local->inode, this); @@ -2214,7 +2200,7 @@ afr_discover_done (call_frame_t *frame, xlator_t *this) goto unwind; } - afr_replies_interpret (frame, this, local->inode); + afr_replies_interpret (frame, this, local->inode, NULL); read_subvol = afr_read_subvol_decide (local->inode, this, NULL); if (read_subvol == -1) { @@ -3863,6 +3849,12 @@ afr_priv_dump (xlator_t *this) gf_proc_dump_write("favorite_child", "%d", priv->favorite_child); gf_proc_dump_write("wait_count", "%u", priv->wait_count); gf_proc_dump_write("quorum-reads", "%d", priv->quorum_reads); + gf_proc_dump_write("heal-wait-queue-length", "%d", + priv->heal_wait_qlen); + gf_proc_dump_write("heal-waiters", "%d", priv->heal_waiters); + gf_proc_dump_write("background-self-heal-count", "%d", + priv->background_self_heal_count); + gf_proc_dump_write("healers", "%d", priv->healers); return 0; } @@ -4169,6 +4161,7 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno) goto out; } + INIT_LIST_HEAD (&local->healer); return 0; out: return -1; diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 3f4ad246992..741a07ad9c0 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -88,7 +88,7 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this) priv = this->private; if (local->inode) { - afr_replies_interpret (frame, this, local->inode); + afr_replies_interpret (frame, this, local->inode, NULL); inode_read_subvol = afr_data_subvol_get (local->inode, this, NULL, NULL, NULL); } diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 49c6bd0cc98..74e1a444069 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -15,6 +15,9 @@ #include "protocol-common.h" #include "afr-messages.h" +void +afr_heal_synctask (xlator_t *this, afr_local_t *local); + int afr_selfheal_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) @@ -1422,3 +1425,110 @@ afr_selfheal (xlator_t *this, uuid_t gfid) return ret; } + +afr_local_t* +__afr_dequeue_heals (afr_private_t *priv) +{ + afr_local_t *local = NULL; + + if (list_empty (&priv->heal_waiting)) + goto none; + if ((priv->background_self_heal_count > 0) && + (priv->healers >= priv->background_self_heal_count)) + goto none; + + local = list_entry (priv->heal_waiting.next, afr_local_t, healer); + priv->heal_waiters--; + GF_ASSERT (priv->heal_waiters >= 0); + list_del_init(&local->healer); + list_add(&local->healer, &priv->healing); + priv->healers++; + return local; +none: + gf_msg_debug (THIS->name, 0, "Nothing dequeued. " + "Num healers: %d, Num Waiters: %d", + priv->healers, priv->heal_waiters); + return NULL; +} + +int +afr_refresh_selfheal_wrap (void *opaque) +{ + call_frame_t *heal_frame = opaque; + afr_local_t *local = heal_frame->local; + int ret = 0; + + ret = afr_selfheal (heal_frame->this, local->refreshinode->gfid); + return ret; +} + +int +afr_refresh_heal_done (int ret, call_frame_t *frame, void *opaque) +{ + call_frame_t *heal_frame = opaque; + xlator_t *this = heal_frame->this; + afr_private_t *priv = this->private; + afr_local_t *local = heal_frame->local; + + LOCK (&priv->lock); + { + list_del_init(&local->healer); + priv->healers--; + GF_ASSERT (priv->healers >= 0); + local = __afr_dequeue_heals (priv); + } + UNLOCK (&priv->lock); + + if (heal_frame) + AFR_STACK_DESTROY (heal_frame); + + if (local) + afr_heal_synctask (this, local); + return 0; + +} + +void +afr_heal_synctask (xlator_t *this, afr_local_t *local) +{ + int ret = 0; + call_frame_t *heal_frame = NULL; + + heal_frame = local->heal_frame; + ret = synctask_new (this->ctx->env, afr_refresh_selfheal_wrap, + afr_refresh_heal_done, heal_frame, heal_frame); + if (ret < 0) + /* Heal not launched. Will be queued when the next inode + * refresh happens and shd hasn't healed it yet. */ + afr_refresh_heal_done (ret, heal_frame, heal_frame); +} + +void +afr_throttled_selfheal (call_frame_t *frame, xlator_t *this) +{ + gf_boolean_t can_heal = _gf_true; + afr_private_t *priv = this->private; + afr_local_t *local = frame->local; + + LOCK (&priv->lock); + { + if ((priv->background_self_heal_count > 0) && + (priv->heal_wait_qlen + priv->background_self_heal_count) > + (priv->heal_waiters + priv->healers)) { + list_add_tail(&local->healer, &priv->heal_waiting); + priv->heal_waiters++; + local = __afr_dequeue_heals (priv); + } else { + can_heal = _gf_false; + } + } + UNLOCK (&priv->lock); + + if (can_heal) { + if (local) + afr_heal_synctask (this, local); + else + gf_msg_debug (this->name, 0, "Max number of heals are " + "pending, background self-heal rejected."); + } +} diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h index 74e852aa038..b298fa130c3 100644 --- a/xlators/cluster/afr/src/afr-self-heal.h +++ b/xlators/cluster/afr/src/afr-self-heal.h @@ -85,6 +85,9 @@ int afr_selfheal (xlator_t *this, uuid_t gfid); +void +afr_throttled_selfheal (call_frame_t *frame, xlator_t *this); + int afr_selfheal_name (xlator_t *this, uuid_t gfid, const char *name, void *gfid_req); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 98e4d3e2699..535f3e5b946 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -124,6 +124,10 @@ reconfigure (xlator_t *this, dict_t *options) priv->background_self_heal_count, options, uint32, out); + GF_OPTION_RECONF ("heal-wait-queue-length", + priv->heal_wait_qlen, options, uint32, out); + + GF_OPTION_RECONF ("metadata-self-heal", priv->metadata_self_heal, options, bool, out); @@ -275,6 +279,8 @@ init (xlator_t *this) priv->read_child = -1; GF_OPTION_INIT ("arbiter-count", priv->arbiter_count, uint32, out); + INIT_LIST_HEAD (&priv->healing); + INIT_LIST_HEAD (&priv->heal_waiting); priv->spb_choice_timeout = AFR_DEFAULT_SPB_CHOICE_TIMEOUT; @@ -327,6 +333,9 @@ init (xlator_t *this) GF_OPTION_INIT ("background-self-heal-count", priv->background_self_heal_count, uint32, out); + GF_OPTION_INIT ("heal-wait-queue-length", + priv->heal_wait_qlen, uint32, out); + GF_OPTION_INIT ("data-self-heal", priv->data_self_heal, str, out); GF_OPTION_INIT ("data-self-heal-algorithm", @@ -599,10 +608,21 @@ struct volume_options options[] = { { .key = {"background-self-heal-count"}, .type = GF_OPTION_TYPE_INT, .min = 0, - .default_value = "16", + .max = 256, + .default_value = "8", + .validate = GF_OPT_VALIDATE_MIN, + .description = "This specifies the number of per client self-heal " + "jobs that can perform parallel heals in the " + "background." + }, + { .key = {"heal-wait-queue-length"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = 10000, /*Around 100MB with sizeof(afr_local_t)= 10496 bytes*/ + .default_value = "128", .validate = GF_OPT_VALIDATE_MIN, - .description = "This specifies the number of self-heals that can be " - " performed in background without blocking the fop" + .description = "This specifies the number of heals that can be queued" + " for the parallel background self heal jobs." }, { .key = {"data-self-heal"}, .type = GF_OPTION_TYPE_STR, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 6f75e07f95c..7bb5aa8ded2 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -69,8 +69,17 @@ typedef struct _afr_private { unsigned int data_self_heal_window_size; /* max number of pipelined read/writes */ - unsigned int background_self_heal_count; - unsigned int background_self_heals_started; + struct list_head heal_waiting; /*queue for files that need heal*/ + uint32_t heal_wait_qlen; /*configurable queue length for heal_waiting*/ + int32_t heal_waiters; /* No. of elements currently in wait queue.*/ + + struct list_head healing;/* queue for files that are undergoing + background heal*/ + uint32_t background_self_heal_count;/*configurable queue length for + healing queue*/ + int32_t healers;/* No. of elements currently undergoing background + heal*/ + gf_boolean_t metadata_self_heal; /* on/off */ gf_boolean_t entry_self_heal; /* on/off */ @@ -122,12 +131,14 @@ typedef struct _afr_private { afr_self_heald_t shd; - /* pump dependencies */ - void *pump_private; - gf_boolean_t use_afr_in_pump; gf_boolean_t consistent_metadata; uint64_t spb_choice_timeout; gf_boolean_t need_heal; + + /* pump dependencies */ + void *pump_private; + gf_boolean_t use_afr_in_pump; + } afr_private_t; @@ -740,6 +751,10 @@ typedef struct _afr_local { int xflag; gf_boolean_t do_discovery; struct afr_reply *replies; + + /* For client side background heals. */ + struct list_head healer; + call_frame_t *heal_frame; } afr_local_t; @@ -891,7 +906,8 @@ int afr_locked_nodes_count (unsigned char *locked_nodes, int child_count); int -afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode); +afr_replies_interpret (call_frame_t *frame, xlator_t *this, inode_t *inode, + gf_boolean_t *start_heal); void afr_local_replies_wipe (afr_local_t *local, afr_private_t *priv); diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index dd9d159f0ca..96026a98d95 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -1119,6 +1119,14 @@ struct volopt_map_entry glusterd_volopt_map[] = { .op_version = GD_OP_VERSION_3_7_0, .flags = OPT_FLAG_CLIENT_OPT }, + { .key = "cluster.heal-wait-queue-length", + .voltype = "cluster/replicate", + .type = DOC, + .op_version = GD_OP_VERSION_3_7_9, + .flags = OPT_FLAG_CLIENT_OPT + }, + + /* stripe xlator options */ { .key = "cluster.stripe-block-size", .voltype = "cluster/stripe", .option = "block-size", |