diff options
author | Pranith K <pranithk@gluster.com> | 2011-02-08 02:22:52 +0000 |
---|---|---|
committer | Anand V. Avati <avati@dev.gluster.com> | 2011-02-17 23:32:56 -0800 |
commit | 52cf9e992749a7cd5b2411581eff4c052d055ea9 (patch) | |
tree | 932da5f539a2c8d7c54ae86d991e63088fdf56d0 /xlators/cluster/afr | |
parent | e77eae0f04b90ad9722ce82d59fd552cae63c347 (diff) |
cluster/afr: stop spawning self-heal loops when the self-heal fails
With the current model of self-heal, any loop wont resume if a
self-heal failure happens. I have added the fix to stop spawning the
initial loops on self-heal failure.
Fixed the invalid read shown in valgrind in diff-self-heal.
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
BUG: 1174 (Replicate spawns read loops even after destination fails)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=1174
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-algorithm.c | 36 |
1 files changed, 24 insertions, 12 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c index a65fae0acbf..f72da774127 100644 --- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c +++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c @@ -219,7 +219,6 @@ sh_full_read_cbk (call_frame_t *rw_frame, void *cookie, if (op_ret <= 0) { sh->op_failed = 1; - sh_full_loop_return (rw_frame, this, offset); return 0; } @@ -353,8 +352,13 @@ sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_ UNLOCK (&sh_priv->lock); while (loop--) { - sh_full_read_write (frame, this, offset); - offset += block_size; + if (sh->op_failed) { + // op failed in other loop, stop spawning more loops + sh_full_loop_driver (frame, this, _gf_false); + } else { + sh_full_read_write (frame, this, offset); + offset += block_size; + } } if (is_driver_done) { @@ -492,12 +496,16 @@ sh_diff_loop_driver_done (call_frame_t *frame, xlator_t *this) afr_local_t * local = NULL; afr_self_heal_t * sh = NULL; afr_sh_algo_diff_private_t *sh_priv = NULL; + int32_t total_blocks = 0; + int32_t diff_blocks = 0; - priv = this->private; - local = frame->local; - sh = &local->self_heal; - sh_priv = sh->private; + priv = this->private; + local = frame->local; + sh = &local->self_heal; + sh_priv = sh->private; + total_blocks = sh_priv->total_blocks; + diff_blocks = sh_priv->diff_blocks; sh_diff_private_cleanup (frame, this); if (sh->op_failed) { @@ -514,9 +522,8 @@ sh_diff_loop_driver_done (call_frame_t *frame, xlator_t *this) gf_log (this->name, GF_LOG_NORMAL, "diff self-heal on %s: %d blocks of %d were different (%.2f%%)", - local->loc.path, sh_priv->diff_blocks, - sh_priv->total_blocks, - ((sh_priv->diff_blocks * 1.0)/sh_priv->total_blocks) * 100); + local->loc.path, diff_blocks, total_blocks, + ((diff_blocks * 1.0)/total_blocks) * 100); local->self_heal.algo_completion_cbk (frame, this); } @@ -1014,8 +1021,13 @@ sh_diff_loop_driver (call_frame_t *frame, xlator_t *this, UNLOCK (&sh_priv->lock); while (loop--) { - sh_diff_checksum (frame, this, offset); - offset += block_size; + if (sh->op_failed) { + // op failed in other loop, stop spawning more loops + sh_diff_loop_driver (frame, this, _gf_false, NULL); + } else { + sh_diff_checksum (frame, this, offset); + offset += block_size; + } } if (is_driver_done) { |