diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2013-07-03 21:28:25 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2013-07-03 21:35:30 -0700 |
commit | 0aa4248cf967b158c0cf6ce33c300ad4b3ee3249 (patch) | |
tree | 56a8e408e81bc35ef335fcc6428b8cd7b14a5210 /xlators/cluster/afr/src/afr-self-heal-data.c | |
parent | c2abf3a6e39c5a5832a165757483bc0ae23cdb63 (diff) |
cluster/afr: Let two data-self-heals compete in new domain
Problem:
At the moment data-self-heal acquires locks in following
pattern. It takes full file lock then gets xattrs on files on both
replicas. Decides sources/sinks based on the xattrs. Now it acquires
lock from 0-128k then unlocks the full file lock. Syncs 0-128k range
from source to sink now acquires lock 128k+1 till 256k then unlocks
0-128k, syncs 128k+1 till 256k block... so on finally it takes full file
lock again then unlocks the final small range block.
It decrements pending counts and then unlocks the full file lock.
This pattern of locks is chosen to avoid more than 1 self-heal
to be in progress. BUT if another self-heal tries to take a full
file lock while a self-heal is already in progress it will be put in
blocked queue, further inodelks from writes by the application will
also be put in blocked queue because of the way locks xlator grants
inodelks. So until the self-heal is complete writes are blocked.
Here is the code:
xlators/features/locks/src/inodelk.c - line 225
if (__blocked_lock_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) {
ret = -EAGAIN;
if (can_block == 0)
goto out;
gettimeofday (&lock->blkd_time, NULL);
list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
}
This leads to hangs in applications.
Fix:
Since we want to prevent two parallel self-heals. We let them compete
in a separate "domain". Lets call the domain on which the locks have
been taken on in previous approach as "data-domain".
In the new approach When a self-heal is triggered,
it acquires a full lock in the new domain "self-heal-domain".
After this it performs data-self-heal using the locks in
"data-domain" as before.
unlock the full file lock in "self-heal-domain"
With this approach, application's writevs don't have to wait
in pending queue when more than 1 self-heal is triggered.
Change-Id: Id79aef3dfa888945977fb9758374ac41c320d0d5
BUG: 967717
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/5100
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'xlators/cluster/afr/src/afr-self-heal-data.c')
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 80 |
1 files changed, 58 insertions, 22 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 3c2726b8df6..b71c7599564 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -156,6 +156,25 @@ afr_sh_data_close (call_frame_t *frame, xlator_t *this) } int +afr_sh_dom_unlock (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; + + local = frame->local; + sh = &local->self_heal; + priv = this->private; + + if (sh->sh_dom_lock_held) + afr_sh_data_unlock (frame, this, priv->sh_domain, + afr_sh_data_close); + else + afr_sh_data_close (frame, this); + return 0; +} + +int afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *statpre, struct iatt *statpost, dict_t *xdata) @@ -289,14 +308,18 @@ afr_sh_data_unlock (call_frame_t *frame, xlator_t *this, char *dom, afr_local_t *local = NULL; afr_internal_lock_t *int_lock = NULL; afr_self_heal_t *sh = NULL; + afr_private_t *priv = NULL; int ret = 0; local = frame->local; int_lock = &local->internal_lock; sh = &local->self_heal; + priv = this->private; if (strcmp (dom, this->name) == 0) { sh->data_lock_held = _gf_false; + } else if (strcmp (dom, priv->sh_domain) == 0) { + sh->sh_dom_lock_held = _gf_false; } else { ret = -1; goto out; @@ -316,8 +339,8 @@ out: int afr_sh_data_finish (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; local = frame->local; sh = &local->self_heal; @@ -326,9 +349,9 @@ afr_sh_data_finish (call_frame_t *frame, xlator_t *this) "finishing data selfheal of %s", local->loc.path); if (sh->data_lock_held) - afr_sh_data_unlock (frame, this, this->name, afr_sh_data_close); + afr_sh_data_unlock (frame, this, this->name, afr_sh_dom_unlock); else - afr_sh_data_close (frame, this); + afr_sh_dom_unlock (frame, this); return 0; } @@ -346,10 +369,7 @@ afr_sh_data_fail (call_frame_t *frame, xlator_t *this) "finishing failed data selfheal of %s", local->loc.path); afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); - if (sh->data_lock_held) - afr_sh_data_unlock (frame, this, this->name, afr_sh_data_close); - else - afr_sh_data_close (frame, this); + afr_sh_data_finish (frame, this); return 0; } @@ -1184,6 +1204,22 @@ afr_sh_data_big_lock_success (call_frame_t *frame, xlator_t *this) } int +afr_sh_dom_lock_success (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + + local = frame->local; + sh = &local->self_heal; + + sh->sh_dom_lock_held = _gf_true; + afr_sh_data_lock (frame, this, 0, 0, _gf_true, this->name, + afr_sh_data_big_lock_success, + afr_sh_data_fail); + return 0; +} + +int afr_sh_data_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this) { afr_internal_lock_t *int_lock = NULL; @@ -1337,7 +1373,6 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, afr_private_t *priv = NULL; int call_count = 0; int child_index = 0; - gf_boolean_t block = _gf_true; local = frame->local; sh = &local->self_heal; @@ -1379,16 +1414,8 @@ afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, "fd for %s opened, commencing sync", local->loc.path); - /* - * The read and write self-heal trigger codepaths do not provide - * an unwind callback. We run a trylock in these codepaths - * because we are sensitive to locking latency. - */ - - block = sh->unwind ? _gf_true : _gf_false; - afr_sh_data_lock (frame, this, 0, 0, block, this->name, - afr_sh_data_big_lock_success, - afr_sh_data_fail); + afr_sh_data_lock (frame, this, 0, 0, _gf_true, priv->sh_domain, + afr_sh_dom_lock_success, afr_sh_data_fail); } return 0; @@ -1493,9 +1520,10 @@ afr_can_start_data_self_heal (afr_self_heal_t *sh, afr_private_t *priv) int afr_self_heal_data (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; - afr_self_heal_t *sh = NULL; - afr_private_t *priv = this->private; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_private_t *priv = this->private; + int ret = -1; local = frame->local; sh = &local->self_heal; @@ -1504,6 +1532,14 @@ afr_self_heal_data (call_frame_t *frame, xlator_t *this) if (afr_can_start_data_self_heal (sh, priv)) { afr_set_self_heal_status (sh, AFR_SELF_HEAL_STARTED); + ret = afr_inodelk_init (&local->internal_lock.inodelk[1], + priv->sh_domain, priv->child_count); + if (ret < 0) { + afr_set_self_heal_status (sh, AFR_SELF_HEAL_FAILED); + afr_sh_data_done (frame, this); + return 0; + } + if (IA_ISREG (sh->type)) { afr_sh_data_open (frame, this); } else { |