diff options
Diffstat (limited to 'xlators/cluster/afr/src/afr-self-heal-algorithm.c')
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-algorithm.c | 1114 |
1 files changed, 384 insertions, 730 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c index 04b388fe052..1c7cdf41819 100644 --- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c +++ b/xlators/cluster/afr/src/afr-self-heal-algorithm.c @@ -44,279 +44,249 @@ This file contains the various self-heal algorithms */ +static int +sh_loop_driver (call_frame_t *sh_frame, xlator_t *this, + gf_boolean_t is_first_call, call_frame_t *old_loop_frame); +static int +sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame, + int32_t op_ret, int32_t op_errno); +static int +sh_destroy_frame (call_frame_t *frame, xlator_t *this) +{ + if (!frame) + goto out; -/* - The "full" algorithm. Copies the entire file from - source to sinks. -*/ - + AFR_STACK_DESTROY (frame); +out: + return 0; +} static void -sh_full_private_cleanup (call_frame_t *frame, xlator_t *this) +sh_private_cleanup (call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - afr_self_heal_t * sh = NULL; - afr_sh_algo_full_private_t *sh_priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_sh_algo_private_t *sh_priv = NULL; local = frame->local; sh = &local->self_heal; sh_priv = sh->private; - if (sh_priv) GF_FREE (sh_priv); } - static int -sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_call); +sh_number_of_writes_needed (unsigned char *write_needed, int child_count) +{ + int writes = 0; + int i = 0; + + for (i = 0; i < child_count; i++) { + if (write_needed[i]) + writes++; + } + + return writes; +} + static int -sh_full_loop_driver_done (call_frame_t *frame, xlator_t *this) +sh_loop_driver_done (call_frame_t *frame, xlator_t *this, + call_frame_t *last_loop_frame) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_self_heal_t *sh = NULL; - afr_sh_algo_full_private_t *sh_priv = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_sh_algo_private_t *sh_priv = NULL; + int32_t total_blocks = 0; + int32_t diff_blocks = 0; - priv = this->private; - local = frame->local; - sh = &local->self_heal; - sh_priv = sh->private; + priv = this->private; + local = frame->local; + sh = &local->self_heal; + sh_priv = sh->private; + total_blocks = sh_priv->total_blocks; + diff_blocks = sh_priv->diff_blocks; - sh_full_private_cleanup (frame, this); + sh_private_cleanup (frame, this); if (sh->op_failed) { + GF_ASSERT (!last_loop_frame); + //loop_finish should have happened and the old_loop should be NULL gf_log (this->name, GF_LOG_INFO, - "full self-heal aborting on %s", + "self-heal aborting on %s", local->loc.path); local->self_heal.algo_abort_cbk (frame, this); } else { - gf_log (this->name, GF_LOG_INFO, - "full self-heal completed on %s", - local->loc.path); + GF_ASSERT (last_loop_frame); + if (diff_blocks == total_blocks) { + gf_log (this->name, GF_LOG_INFO, "full self-heal " + "completed on %s",local->loc.path); + } else { + gf_log (this->name, GF_LOG_INFO, + "diff self-heal on %s: completed. " + "(%d blocks of %d were different (%.2f%%))", + local->loc.path, diff_blocks, total_blocks, + ((diff_blocks * 1.0)/total_blocks) * 100); + } + sh->old_loop_frame = last_loop_frame; local->self_heal.algo_completion_cbk (frame, this); } + return 0; } -static int -sh_full_loop_return (call_frame_t *rw_frame, xlator_t *this, off_t offset) +int +sh_loop_finish (call_frame_t *loop_frame, xlator_t *this) { - afr_local_t * rw_local = NULL; - afr_self_heal_t * rw_sh = NULL; - call_frame_t *sh_frame = NULL; - afr_local_t * sh_local = NULL; - afr_self_heal_t *sh = NULL; - afr_sh_algo_full_private_t *sh_priv = NULL; - - rw_local = rw_frame->local; - rw_sh = &rw_local->self_heal; + afr_local_t *loop_local = NULL; + afr_self_heal_t *loop_sh = NULL; - sh_frame = rw_sh->sh_frame; - sh_local = sh_frame->local; - sh = &sh_local->self_heal; - sh_priv = sh->private; - - AFR_STACK_DESTROY (rw_frame); + if (!loop_frame) + goto out; - sh_full_loop_driver (sh_frame, this, _gf_false); + loop_local = loop_frame->local; + if (loop_local) { + loop_sh = &loop_local->self_heal; + } + if (loop_sh && loop_sh->loop_completion_cbk) { + if (loop_sh->data_lock_held) { + afr_sh_data_unlock (loop_frame, this, + loop_sh->loop_completion_cbk); + } else { + loop_sh->loop_completion_cbk (loop_frame, this); + } + } else { + //default loop_completion_cbk destroys the loop_frame + if (loop_sh && !loop_sh->loop_completion_cbk) + GF_ASSERT (!loop_sh->data_lock_held); + sh_destroy_frame (loop_frame, this); + } +out: return 0; } - static int -sh_full_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) +sh_loop_lock_success (call_frame_t *loop_frame, xlator_t *this) { - afr_private_t * priv = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t *rw_sh = NULL; - call_frame_t *sh_frame = NULL; - afr_local_t * sh_local = NULL; - afr_self_heal_t *sh = NULL; - int child_index = (long) cookie; - int call_count = 0; - - priv = this->private; - - rw_local = rw_frame->local; - rw_sh = &rw_local->self_heal; - - sh_frame = rw_sh->sh_frame; - sh_local = sh_frame->local; - sh = &sh_local->self_heal; - - gf_log (this->name, GF_LOG_TRACE, - "wrote %d bytes of data from %s to child %d, offset %"PRId64"", - op_ret, sh_local->loc.path, child_index, - rw_sh->offset - op_ret); - - LOCK (&sh_frame->lock); - { - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "write to %s failed on subvolume %s (%s)", - sh_local->loc.path, - priv->children[child_index]->name, - strerror (op_errno)); + afr_local_t *loop_local = NULL; + afr_self_heal_t *loop_sh = NULL; - sh->op_failed = 1; - } - } - UNLOCK (&sh_frame->lock); + loop_local = loop_frame->local; + loop_sh = &loop_local->self_heal; - call_count = afr_frame_return (rw_frame); - - if (call_count == 0) { - sh_full_loop_return (rw_frame, this, rw_sh->offset - op_ret); - } + sh_loop_finish (loop_sh->old_loop_frame, this); + loop_sh->old_loop_frame = NULL; + gf_log (this->name, GF_LOG_DEBUG, "Aquired lock for range %"PRIu64 + " %"PRIu64, loop_sh->offset, loop_sh->block_size); + loop_sh->data_lock_held = _gf_true; + loop_sh->sh_data_algo_start (loop_frame, this); return 0; } - static int -sh_full_read_cbk (call_frame_t *rw_frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iovec *vector, int32_t count, struct iatt *buf, - struct iobref *iobref) +sh_loop_lock_failure (call_frame_t *loop_frame, xlator_t *this) { - afr_private_t * priv = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t *rw_sh = NULL; - call_frame_t *sh_frame = NULL; - afr_local_t * sh_local = NULL; - afr_self_heal_t *sh = NULL; - int i = 0; - int call_count = 0; - off_t offset = (long) cookie; - - priv = this->private; - rw_local = rw_frame->local; - rw_sh = &rw_local->self_heal; - - sh_frame = rw_sh->sh_frame; - sh_local = sh_frame->local; - sh = &sh_local->self_heal; - - call_count = sh->active_sinks; - - rw_local->call_count = call_count; - - gf_log (this->name, GF_LOG_TRACE, - "read %d bytes of data from %s, offset %"PRId64"", - op_ret, sh_local->loc.path, offset); - - if (op_ret <= 0) { - gf_log (this->name, GF_LOG_ERROR, - "read from %s failed on subvolume %s (%s)", - sh_local->loc.path, - priv->children[sh->source]->name, - strerror (op_errno)); - sh->op_failed = 1; - sh_full_loop_return (rw_frame, this, offset); - return 0; - } - - rw_sh->offset += op_ret; - - if (sh->file_has_holes) { - if (iov_0filled (vector, count) == 0) { - /* the iter function depends on the - sh->offset already being updated - above - */ - gf_log (this->name, GF_LOG_DEBUG, - "block has all 0 filled"); - sh_full_loop_return (rw_frame, this, offset); - goto out; - } - } - - for (i = 0; i < priv->child_count; i++) { - if (sh->sources[i] || !sh_local->child_up[i]) - continue; - - /* this is a sink, so write to it */ - - STACK_WIND_COOKIE (rw_frame, sh_full_write_cbk, - (void *) (long) i, - priv->children[i], - priv->children[i]->fops->writev, - sh->healing_fd, vector, count, offset, - iobref); - - if (!--call_count) - break; - } - -out: + call_frame_t *sh_frame = NULL; + afr_local_t *loop_local = NULL; + afr_self_heal_t *loop_sh = NULL; + + loop_local = loop_frame->local; + loop_sh = &loop_local->self_heal; + sh_frame = loop_sh->sh_frame; + + gf_log (this->name, GF_LOG_ERROR, "failed lock for range %"PRIu64 + " %"PRIu64, loop_sh->offset, loop_sh->block_size); + sh_loop_finish (loop_sh->old_loop_frame, this); + loop_sh->old_loop_frame = NULL; + sh_loop_return (sh_frame, this, loop_frame, -1, ENOTCONN); return 0; } - static int -sh_full_read_write (call_frame_t *frame, xlator_t *this, off_t offset) +sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset, + call_frame_t *old_loop_frame) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t *rw_sh = NULL; - afr_self_heal_t *sh = NULL; - call_frame_t *rw_frame = NULL; - int32_t op_errno = 0; + call_frame_t *new_loop_frame = NULL; + afr_local_t *local = NULL; + afr_self_heal_t *sh = NULL; + afr_local_t *new_loop_local = NULL; + afr_self_heal_t *new_loop_sh = NULL; + afr_private_t *priv = NULL; - priv = this->private; - local = frame->local; - sh = &local->self_heal; - - rw_frame = copy_frame (frame); - if (!rw_frame) - goto out; + GF_ASSERT (sh_frame); - ALLOC_OR_GOTO (rw_local, afr_local_t, out); - - rw_frame->local = rw_local; - rw_sh = &rw_local->self_heal; + local = sh_frame->local; + sh = &local->self_heal; + priv = this->private; - rw_sh->offset = offset; - rw_sh->sh_frame = frame; + new_loop_frame = copy_frame (sh_frame); + if (!new_loop_frame) + goto out; + //We want the frame to have same lk_oner as sh_frame + new_loop_local = afr_local_copy (local, this); + if (!new_loop_local) + goto out; + new_loop_frame->local = new_loop_local; - STACK_WIND_COOKIE (rw_frame, sh_full_read_cbk, - (void *) (long) offset, - priv->children[sh->source], - priv->children[sh->source]->fops->readv, - sh->healing_fd, sh->block_size, - offset); + new_loop_sh = &new_loop_local->self_heal; + new_loop_sh->sources = memdup (sh->sources, + priv->child_count * sizeof (*sh->sources)); + if (!new_loop_sh->sources) + goto out; + new_loop_sh->write_needed = GF_CALLOC (priv->child_count, + sizeof (*new_loop_sh->write_needed), + gf_afr_mt_char); + if (!new_loop_sh->write_needed) + goto out; + new_loop_sh->checksum = GF_CALLOC (priv->child_count, MD5_DIGEST_LEN, + gf_afr_mt_uint8_t); + if (!new_loop_sh->checksum) + goto out; + new_loop_sh->offset = offset; + new_loop_sh->block_size = sh->block_size; + new_loop_sh->old_loop_frame = old_loop_frame; + new_loop_sh->sh_frame = sh_frame; + new_loop_sh->inode = inode_ref (sh->inode); + new_loop_sh->sh_data_algo_start = sh->sh_data_algo_start; + new_loop_sh->source = sh->source; + new_loop_sh->active_sinks = sh->active_sinks; + new_loop_sh->healing_fd = fd_ref (sh->healing_fd); + new_loop_sh->file_has_holes = sh->file_has_holes; + new_loop_sh->loop_completion_cbk = sh_destroy_frame; + afr_sh_data_lock (new_loop_frame, this, offset, new_loop_sh->block_size, + sh_loop_lock_success, sh_loop_lock_failure); return 0; - out: sh->op_failed = 1; - - sh_full_loop_driver (frame, this, _gf_false); - + if (new_loop_frame) { + new_loop_frame->local = new_loop_local; + } + if (old_loop_frame) + sh_loop_finish (old_loop_frame, this); + sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM); return 0; } - static int -sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_call) +sh_loop_driver (call_frame_t *sh_frame, xlator_t *this, + gf_boolean_t is_first_call, call_frame_t *old_loop_frame) { afr_private_t * priv = NULL; afr_local_t * local = NULL; - afr_self_heal_t *sh = NULL; - afr_sh_algo_full_private_t *sh_priv = NULL; + afr_self_heal_t * sh = NULL; + afr_sh_algo_private_t *sh_priv = NULL; gf_boolean_t is_driver_done = _gf_false; blksize_t block_size = 0; - off_t offset = 0; int loop = 0; + off_t offset = 0; priv = this->private; - local = frame->local; + local = sh_frame->local; sh = &local->self_heal; sh_priv = sh->private; @@ -324,23 +294,18 @@ sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_ { if (_gf_false == is_first_call) sh_priv->loops_running--; - offset = sh_priv->offset; - block_size = sh->block_size; - while ((sh->op_failed == 0) && + offset = sh_priv->offset; + block_size = sh->block_size; + while ((!sh->eof_reached) && (0 == sh->op_failed) && (sh_priv->loops_running < priv->data_self_heal_window_size) && (sh_priv->offset < sh->file_size)) { loop++; - gf_log (this->name, GF_LOG_TRACE, - "spawning a loop for offset %"PRId64, - sh_priv->offset); - - sh_priv->offset += sh->block_size; + sh_priv->offset += block_size; sh_priv->loops_running++; if (_gf_false == is_first_call) break; - } if (0 == sh_priv->loops_running) { is_driver_done = _gf_true; @@ -348,274 +313,130 @@ sh_full_loop_driver (call_frame_t *frame, xlator_t *this, gf_boolean_t is_first_ } UNLOCK (&sh_priv->lock); + if (0 == loop) { + //loop finish does unlock, but the erasing of the pending + //xattrs needs to happen before that so do not finish the loop + if (is_driver_done && !sh->op_failed) + goto driver_done; + if (old_loop_frame) { + sh_loop_finish (old_loop_frame, this); + old_loop_frame = NULL; + } + } + + //If we have more loops to form we should finish previous loop after + //the next loop lock while (loop--) { if (sh->op_failed) { // op failed in other loop, stop spawning more loops - sh_full_loop_driver (frame, this, _gf_false); + if (old_loop_frame) { + sh_loop_finish (old_loop_frame, this); + old_loop_frame = NULL; + } + sh_loop_driver (sh_frame, this, _gf_false, NULL); } else { - sh_full_read_write (frame, this, offset); + gf_log (this->name, GF_LOG_TRACE, "spawning a loop " + "for offset %"PRId64, offset); + + sh_loop_start (sh_frame, this, offset, old_loop_frame); + old_loop_frame = NULL; offset += block_size; } } +driver_done: if (is_driver_done) { - sh_full_loop_driver_done (frame, this); + sh_loop_driver_done (sh_frame, this, old_loop_frame); } - - return 0; -} - - -int -afr_sh_algo_full (call_frame_t *frame, xlator_t *this) -{ - afr_local_t * local = NULL; - afr_self_heal_t * sh = NULL; - afr_sh_algo_full_private_t *sh_priv = NULL; - - local = frame->local; - sh = &local->self_heal; - - sh_priv = GF_CALLOC (1, sizeof (*sh_priv), - gf_afr_mt_afr_private_t); - if (!sh_priv) - goto out; - - LOCK_INIT (&sh_priv->lock); - - sh->private = sh_priv; - - local->call_count = 0; - - sh_full_loop_driver (frame, this, _gf_true); -out: return 0; } - -/* - * The "diff" algorithm. Copies only those blocks whose checksums - * don't match with those of source. - */ - - -static void -sh_diff_private_cleanup (call_frame_t *frame, xlator_t *this) -{ - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_self_heal_t * sh = NULL; - afr_sh_algo_diff_private_t *sh_priv = NULL; - int i = 0; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - - sh_priv = sh->private; - - for (i = 0; i < priv->data_self_heal_window_size; i++) { - if (sh_priv->loops[i]) { - if (sh_priv->loops[i]->write_needed) - GF_FREE (sh_priv->loops[i]->write_needed); - - if (sh_priv->loops[i]->checksum) - GF_FREE (sh_priv->loops[i]->checksum); - - GF_FREE (sh_priv->loops[i]); - } - } - - if (sh_priv) { - if (sh_priv->loops) - GF_FREE (sh_priv->loops); - - GF_FREE (sh_priv); - } - - -} - - -static uint32_t -__make_cookie (int loop_index, int child_index) -{ - uint32_t ret = ((loop_index << 16) | child_index); - return ret; -} - - -static int -__loop_index (uint32_t cookie) -{ - return ((cookie & 0xFFFF0000) >> 16); -} - - -static int -__child_index (uint32_t cookie) -{ - return (cookie & 0x0000FFFF); -} - - -static void -sh_diff_loop_state_reset (struct sh_diff_loop_state *loop_state, int child_count) -{ - loop_state->active = _gf_false; -// loop_state->offset = 0; - - memset (loop_state->write_needed, - 0, sizeof (*loop_state->write_needed) * child_count); - - memset (loop_state->checksum, - 0, MD5_DIGEST_LEN * child_count); -} - - static int -sh_diff_number_of_writes_needed (unsigned char *write_needed, int child_count) -{ - int writes = 0; - int i = 0; - - for (i = 0; i < child_count; i++) { - if (write_needed[i]) - writes++; - } - - return writes; -} - - -static int -sh_diff_loop_driver_done (call_frame_t *frame, xlator_t *this) -{ - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_self_heal_t * sh = NULL; - afr_sh_algo_diff_private_t *sh_priv = NULL; - int32_t total_blocks = 0; - int32_t diff_blocks = 0; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - sh_priv = sh->private; - total_blocks = sh_priv->total_blocks; - diff_blocks = sh_priv->diff_blocks; - - sh_diff_private_cleanup (frame, this); - if (sh->op_failed) { - gf_log (this->name, GF_LOG_INFO, - "diff self-heal aborting on %s", - local->loc.path); - - local->self_heal.algo_abort_cbk (frame, this); - } else { - gf_log (this->name, GF_LOG_INFO, - "diff self-heal on %s: completed. " - "(%d blocks of %d were different (%.2f%%))", - local->loc.path, diff_blocks, total_blocks, - ((diff_blocks * 1.0)/total_blocks) * 100); - - local->self_heal.algo_completion_cbk (frame, this); - } - - return 0; -} - -static int -sh_diff_loop_driver (call_frame_t *frame, xlator_t *this, - gf_boolean_t is_first_call, - struct sh_diff_loop_state *loop_state); - -static int -sh_diff_loop_return (call_frame_t *rw_frame, xlator_t *this, - struct sh_diff_loop_state *loop_state) +sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame, + int32_t op_ret, int32_t op_errno) { afr_private_t * priv = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t * rw_sh = NULL; - call_frame_t *sh_frame = NULL; + afr_local_t * loop_local = NULL; + afr_self_heal_t * loop_sh = NULL; afr_local_t * sh_local = NULL; afr_self_heal_t *sh = NULL; - afr_sh_algo_diff_private_t *sh_priv = NULL; + afr_sh_algo_private_t *sh_priv = NULL; priv = this->private; - rw_local = rw_frame->local; - rw_sh = &rw_local->self_heal; - - sh_frame = rw_sh->sh_frame; sh_local = sh_frame->local; sh = &sh_local->self_heal; sh_priv = sh->private; - gf_log (this->name, GF_LOG_TRACE, - "loop for offset %"PRId64" returned", loop_state->offset); + if (loop_frame) { + loop_local = loop_frame->local; + if (loop_local) + loop_sh = &loop_local->self_heal; + if (loop_sh) + gf_log (this->name, GF_LOG_TRACE, "loop for offset " + "%"PRId64" returned", loop_sh->offset); + } - AFR_STACK_DESTROY (rw_frame); + if (op_ret == -1) { + sh->op_failed = 1; + afr_sh_set_error (sh, op_errno); + if (loop_frame) { + sh_loop_finish (loop_frame, this); + loop_frame = NULL; + } + } - sh_diff_loop_driver (sh_frame, this, _gf_false, loop_state); + sh_loop_driver (sh_frame, this, _gf_false, loop_frame); return 0; } - static int -sh_diff_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this, +sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *buf, struct iatt *postbuf) { afr_private_t * priv = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t * rw_sh = NULL; + afr_local_t * loop_local = NULL; + afr_self_heal_t * loop_sh = NULL; call_frame_t *sh_frame = NULL; afr_local_t * sh_local = NULL; afr_self_heal_t *sh = NULL; - afr_sh_algo_diff_private_t *sh_priv = NULL; - struct sh_diff_loop_state *loop_state = NULL; + afr_sh_algo_private_t *sh_priv = NULL; int call_count = 0; int child_index = 0; - int loop_index = 0; priv = this->private; - rw_local = rw_frame->local; - rw_sh = &rw_local->self_heal; + loop_local = loop_frame->local; + loop_sh = &loop_local->self_heal; - sh_frame = rw_sh->sh_frame; + sh_frame = loop_sh->sh_frame; sh_local = sh_frame->local; sh = &sh_local->self_heal; sh_priv = sh->private; - child_index = __child_index ((uint32_t) (long) cookie); - loop_index = __loop_index ((uint32_t) (long) cookie); - loop_state = sh_priv->loops[loop_index]; + child_index = (long) cookie; gf_log (this->name, GF_LOG_TRACE, "wrote %d bytes of data from %s to child %d, offset %"PRId64"", - op_ret, sh_local->loc.path, child_index, - loop_state->offset); + op_ret, sh_local->loc.path, child_index, loop_sh->offset); - LOCK (&sh_frame->lock); - { - if (op_ret == -1) { - gf_log (this->name, GF_LOG_ERROR, - "write to %s failed on subvolume %s (%s)", - sh_local->loc.path, - priv->children[child_index]->name, - strerror (op_errno)); + if (op_ret == -1) { + gf_log (this->name, GF_LOG_ERROR, + "write to %s failed on subvolume %s (%s)", + sh_local->loc.path, + priv->children[child_index]->name, + strerror (op_errno)); - sh->op_failed = 1; - } + sh->op_failed = 1; + afr_sh_set_error (loop_sh, op_errno); } - UNLOCK (&sh_frame->lock); - call_count = afr_frame_return (rw_frame); + call_count = afr_frame_return (loop_frame); if (call_count == 0) { - sh_diff_loop_return (rw_frame, this, loop_state); + sh_loop_return (sh_frame, this, loop_frame, + loop_sh->op_ret, loop_sh->op_errno); } return 0; @@ -623,74 +444,71 @@ sh_diff_write_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this, static int -sh_diff_read_cbk (call_frame_t *rw_frame, void *cookie, +sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, int32_t count, struct iatt *buf, struct iobref *iobref) { afr_private_t * priv = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t * rw_sh = NULL; - afr_sh_algo_diff_private_t * sh_priv = NULL; + afr_local_t * loop_local = NULL; + afr_self_heal_t * loop_sh = NULL; call_frame_t *sh_frame = NULL; - afr_local_t * sh_local = NULL; - afr_self_heal_t *sh = NULL; - int loop_index = 0; - struct sh_diff_loop_state *loop_state = NULL; - uint32_t wcookie = 0; int i = 0; int call_count = 0; + afr_local_t * sh_local = NULL; + afr_self_heal_t * sh = NULL; - priv = this->private; - rw_local = rw_frame->local; - rw_sh = &rw_local->self_heal; + priv = this->private; + loop_local = loop_frame->local; + loop_sh = &loop_local->self_heal; - sh_frame = rw_sh->sh_frame; + sh_frame = loop_sh->sh_frame; sh_local = sh_frame->local; sh = &sh_local->self_heal; - sh_priv = sh->private; - - loop_index = __loop_index ((uint32_t) (long) cookie); - loop_state = sh_priv->loops[loop_index]; - - call_count = sh_diff_number_of_writes_needed (loop_state->write_needed, - priv->child_count); - - rw_local->call_count = call_count; gf_log (this->name, GF_LOG_TRACE, "read %d bytes of data from %s, offset %"PRId64"", - op_ret, sh_local->loc.path, loop_state->offset); - - if ((op_ret <= 0) || - (call_count == 0)) { - sh_diff_loop_return (rw_frame, this, loop_state); + op_ret, loop_local->loc.path, loop_sh->offset); - return 0; + if (op_ret <= 0) { + if (op_ret < 0) { + sh->op_failed = 1; + gf_log (this->name, GF_LOG_ERROR, "read failed on %d " + "for %s reason :%s", sh->source, + sh_local->loc.path, strerror (errno)); + } else { + sh->eof_reached = _gf_true; + gf_log (this->name, GF_LOG_DEBUG, "Eof reached for %s", + sh_local->loc.path); + } + sh_loop_return (sh_frame, this, loop_frame, op_ret, op_errno); + goto out; } - if (sh->file_has_holes) { - if (iov_0filled (vector, count) == 0) { + if (loop_sh->file_has_holes && iov_0filled (vector, count) == 0) { gf_log (this->name, GF_LOG_DEBUG, "0 filled block"); - sh_diff_loop_return (rw_frame, this, loop_state); + sh_loop_return (sh_frame, this, loop_frame, + op_ret, op_errno); goto out; - } } - for (i = 0; i < priv->child_count; i++) { - if (loop_state->write_needed[i]) { - wcookie = __make_cookie (loop_index, i); + call_count = sh_number_of_writes_needed (loop_sh->write_needed, + priv->child_count); + GF_ASSERT (call_count > 0); + loop_local->call_count = call_count; - STACK_WIND_COOKIE (rw_frame, sh_diff_write_cbk, - (void *) (long) wcookie, - priv->children[i], - priv->children[i]->fops->writev, - sh->healing_fd, vector, count, - loop_state->offset, iobref); + for (i = 0; i < priv->child_count; i++) { + if (!loop_sh->write_needed[i]) + continue; + STACK_WIND_COOKIE (loop_frame, sh_loop_write_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->writev, + loop_sh->healing_fd, vector, count, + loop_sh->offset, iobref); - if (!--call_count) - break; - } + if (!--call_count) + break; } out: @@ -699,100 +517,77 @@ out: static int -sh_diff_read (call_frame_t *rw_frame, xlator_t *this, - int loop_index) +sh_loop_read (call_frame_t *loop_frame, xlator_t *this) { - afr_private_t * priv = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t * rw_sh = NULL; - afr_sh_algo_diff_private_t * sh_priv = NULL; - struct sh_diff_loop_state *loop_state = NULL; - call_frame_t *sh_frame = NULL; - afr_local_t * sh_local = NULL; - afr_self_heal_t *sh = NULL; - uint32_t cookie = 0; + afr_private_t *priv = NULL; + afr_local_t *loop_local = NULL; + afr_self_heal_t *loop_sh = NULL; priv = this->private; - rw_local = rw_frame->local; - rw_sh = &rw_local->self_heal; - - sh_frame = rw_sh->sh_frame; - sh_local = sh_frame->local; - sh = &sh_local->self_heal; - sh_priv = sh->private; - - loop_state = sh_priv->loops[loop_index]; + loop_local = loop_frame->local; + loop_sh = &loop_local->self_heal; - cookie = __make_cookie (loop_index, sh->source); - - STACK_WIND_COOKIE (rw_frame, sh_diff_read_cbk, - (void *) (long) cookie, - priv->children[sh->source], - priv->children[sh->source]->fops->readv, - sh->healing_fd, sh_priv->block_size, - loop_state->offset); + STACK_WIND_COOKIE (loop_frame, sh_loop_read_cbk, + (void *) (long) loop_sh->source, + priv->children[loop_sh->source], + priv->children[loop_sh->source]->fops->readv, + loop_sh->healing_fd, loop_sh->block_size, + loop_sh->offset); return 0; } static int -sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this, +sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, uint32_t weak_checksum, uint8_t *strong_checksum) { - afr_private_t * priv = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t *rw_sh = NULL; - call_frame_t *sh_frame = NULL; - afr_local_t * sh_local = NULL; - afr_self_heal_t *sh = NULL; - afr_sh_algo_diff_private_t * sh_priv = NULL; - int loop_index = 0; + afr_private_t *priv = NULL; + afr_local_t *loop_local = NULL; + afr_self_heal_t *loop_sh = NULL; + call_frame_t *sh_frame = NULL; + afr_local_t *sh_local = NULL; + afr_self_heal_t *sh = NULL; + afr_sh_algo_private_t *sh_priv = NULL; int child_index = 0; - struct sh_diff_loop_state *loop_state = NULL; int call_count = 0; int i = 0; int write_needed = 0; priv = this->private; - rw_local = rw_frame->local; - rw_sh = &rw_local->self_heal; + loop_local = loop_frame->local; + loop_sh = &loop_local->self_heal; - sh_frame = rw_sh->sh_frame; + sh_frame = loop_sh->sh_frame; sh_local = sh_frame->local; sh = &sh_local->self_heal; sh_priv = sh->private; - child_index = __child_index ((uint32_t) (long) cookie); - loop_index = __loop_index ((uint32_t) (long) cookie); - - loop_state = sh_priv->loops[loop_index]; + child_index = (long) cookie; if (op_ret < 0) { gf_log (this->name, GF_LOG_ERROR, "checksum on %s failed on subvolume %s (%s)", sh_local->loc.path, priv->children[child_index]->name, strerror (op_errno)); - sh->op_failed = 1; } else { - memcpy (loop_state->checksum + child_index * MD5_DIGEST_LEN, - strong_checksum, - MD5_DIGEST_LEN); + memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LEN, + strong_checksum, MD5_DIGEST_LEN); } - call_count = afr_frame_return (rw_frame); + call_count = afr_frame_return (loop_frame); if (call_count == 0) { for (i = 0; i < priv->child_count; i++) { if (sh->sources[i] || !sh_local->child_up[i]) continue; - if (memcmp (loop_state->checksum + (i * MD5_DIGEST_LEN), - loop_state->checksum + (sh->source * MD5_DIGEST_LEN), + if (memcmp (loop_sh->checksum + (i * MD5_DIGEST_LEN), + loop_sh->checksum + (sh->source * MD5_DIGEST_LEN), MD5_DIGEST_LEN)) { /* Checksums differ, so this block @@ -802,9 +597,9 @@ sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this, gf_log (this->name, GF_LOG_DEBUG, "checksum on subvolume %s at offset %" PRId64" differs from that on source", - priv->children[i]->name, loop_state->offset); + priv->children[i]->name, loop_sh->offset); - write_needed = loop_state->write_needed[i] = 1; + write_needed = loop_sh->write_needed[i] = 1; } } @@ -817,271 +612,130 @@ sh_diff_checksum_cbk (call_frame_t *rw_frame, void *cookie, xlator_t *this, UNLOCK (&sh_priv->lock); if (write_needed && !sh->op_failed) { - sh_diff_read (rw_frame, this, loop_index); + sh_loop_read (loop_frame, this); } else { - sh->offset += sh_priv->block_size; - - sh_diff_loop_return (rw_frame, this, loop_state); + sh_loop_return (sh_frame, this, loop_frame, + op_ret, op_errno); } } return 0; } - -static int -sh_diff_find_unused_loop (afr_sh_algo_diff_private_t *sh_priv, int max) -{ - int i = 0; - - LOCK (&sh_priv->lock); - { - for (i = 0; i < max; i++) { - if (sh_priv->loops[i]->active == _gf_false) { - sh_priv->loops[i]->active = _gf_true; - break; - } - } - } - UNLOCK (&sh_priv->lock); - - if (i == max) { - gf_log ("[sh-diff]", GF_LOG_ERROR, - "no free loops found! This shouldn't happen. Please" - " report this to gluster-devel@nongnu.org"); - } - - return i; -} - - static int -sh_diff_checksum (call_frame_t *frame, xlator_t *this, off_t offset) +sh_diff_checksum (call_frame_t *loop_frame, xlator_t *this) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_local_t * rw_local = NULL; - afr_self_heal_t * sh = NULL; - afr_self_heal_t * rw_sh = NULL; - afr_sh_algo_diff_private_t * sh_priv = NULL; - call_frame_t *rw_frame = NULL; - uint32_t cookie = 0; - int loop_index = 0; - struct sh_diff_loop_state *loop_state = NULL; - int32_t op_errno = 0; - int call_count = 0; - int i = 0; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - - sh_priv = sh->private; - - rw_frame = copy_frame (frame); - if (!rw_frame) - goto out; - - ALLOC_OR_GOTO (rw_local, afr_local_t, out); + afr_private_t *priv = NULL; + afr_local_t *loop_local = NULL; + afr_self_heal_t *loop_sh = NULL; + afr_sh_algo_private_t *loop_sh_priv = NULL; + int call_count = 0; + int i = 0; - rw_frame->local = rw_local; - rw_sh = &rw_local->self_heal; - - rw_sh->offset = sh->offset; - rw_sh->sh_frame = frame; - - call_count = sh->active_sinks + 1; /* sinks and source */ - - rw_local->call_count = call_count; - - loop_index = sh_diff_find_unused_loop (sh_priv, priv->data_self_heal_window_size); + priv = this->private; + loop_local = loop_frame->local; + loop_sh = &loop_local->self_heal; - loop_state = sh_priv->loops[loop_index]; - loop_state->offset = offset; + loop_sh_priv = loop_sh->private; - /* we need to send both the loop index and child index, - so squeeze them both into a 32-bit number */ + call_count = loop_sh->active_sinks + 1; /* sinks and source */ - cookie = __make_cookie (loop_index, sh->source); + loop_local->call_count = call_count; - STACK_WIND_COOKIE (rw_frame, sh_diff_checksum_cbk, - (void *) (long) cookie, - priv->children[sh->source], - priv->children[sh->source]->fops->rchecksum, - sh->healing_fd, - offset, sh_priv->block_size); + STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk, + (void *) (long) loop_sh->source, + priv->children[loop_sh->source], + priv->children[loop_sh->source]->fops->rchecksum, + loop_sh->healing_fd, + loop_sh->offset, loop_sh->block_size); for (i = 0; i < priv->child_count; i++) { - if (sh->sources[i] || !local->child_up[i]) + if (loop_sh->sources[i] || !loop_local->child_up[i]) continue; - cookie = __make_cookie (loop_index, i); - - STACK_WIND_COOKIE (rw_frame, sh_diff_checksum_cbk, - (void *) (long) cookie, + STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk, + (void *) (long) i, priv->children[i], priv->children[i]->fops->rchecksum, - sh->healing_fd, - offset, sh_priv->block_size); + loop_sh->healing_fd, + loop_sh->offset, loop_sh->block_size); if (!--call_count) break; } return 0; - -out: - sh->op_failed = 1; - - sh_diff_loop_driver (frame, this, _gf_false, loop_state); - - return 0; } - static int -sh_diff_loop_driver (call_frame_t *frame, xlator_t *this, - gf_boolean_t is_first_call, - struct sh_diff_loop_state *loop_state) +sh_full_read_write_to_sinks (call_frame_t *loop_frame, xlator_t *this) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_self_heal_t * sh = NULL; - afr_sh_algo_diff_private_t *sh_priv = NULL; - gf_boolean_t is_driver_done = _gf_false; - blksize_t block_size = 0; - int loop = 0; - off_t offset = 0; - char sh_type_str[256] = {0,}; - - priv = this->private; - local = frame->local; - sh = &local->self_heal; - sh_priv = sh->private; - - afr_self_heal_type_str_get(sh, sh_type_str, sizeof(sh_type_str)); - - LOCK (&sh_priv->lock); - { - if (loop_state) - sh_diff_loop_state_reset (loop_state, priv->child_count); - if (_gf_false == is_first_call) - sh_priv->loops_running--; - offset = sh_priv->offset; - block_size = sh_priv->block_size; - while ((0 == sh->op_failed) && - (sh_priv->loops_running < priv->data_self_heal_window_size) - && (sh_priv->offset < sh->file_size)) { - - loop++; - gf_log (this->name, GF_LOG_TRACE, - "spawning a loop for offset %"PRId64, - sh_priv->offset); - - sh_priv->offset += sh_priv->block_size; - sh_priv->loops_running++; - - if (_gf_false == is_first_call) - break; + afr_private_t *priv = NULL; + afr_local_t *loop_local = NULL; + afr_self_heal_t *loop_sh = NULL; + int i = 0; - } - if (0 == sh_priv->loops_running) { - is_driver_done = _gf_true; - } - } - UNLOCK (&sh_priv->lock); - - while (loop--) { - if (sh->op_failed) { - // op failed in other loop, stop spawning more loops - sh_diff_loop_driver (frame, this, _gf_false, NULL); - } else { - sh_diff_checksum (frame, this, offset); - offset += block_size; - } - } + priv = this->private; + loop_local = loop_frame->local; + loop_sh = &loop_local->self_heal; - if (is_driver_done) { - sh_diff_loop_driver_done (frame, this); + for (i = 0; i < priv->child_count; i++) { + if (loop_sh->sources[i] || !loop_local->child_up[i]) + continue; + loop_sh->write_needed[i] = 1; } + sh_loop_read (loop_frame, this); return 0; } +static int +sh_do_nothing (call_frame_t *frame, xlator_t *this) +{ + return 0; +} int -afr_sh_algo_diff (call_frame_t *frame, xlator_t *this) +afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this, + afr_sh_algo_fn sh_data_algo_start) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - afr_self_heal_t * sh = NULL; - afr_sh_algo_diff_private_t *sh_priv = NULL; - int i = 0; + afr_local_t *sh_local = NULL; + afr_self_heal_t *sh = NULL; + afr_sh_algo_private_t *sh_priv = NULL; - priv = this->private; - local = frame->local; - sh = &local->self_heal; + sh_local = sh_frame->local; + sh = &sh_local->self_heal; sh_priv = GF_CALLOC (1, sizeof (*sh_priv), gf_afr_mt_afr_private_t); if (!sh_priv) - goto err; - - sh_priv->block_size = this->ctx->page_size; - - sh->private = sh_priv; + goto out; LOCK_INIT (&sh_priv->lock); - local->call_count = 0; - - sh_priv->loops = GF_CALLOC (priv->data_self_heal_window_size, - sizeof (*sh_priv->loops), - gf_afr_mt_sh_diff_loop_state); - if (!sh_priv->loops) - goto err; - - for (i = 0; i < priv->data_self_heal_window_size; i++) { - sh_priv->loops[i] = GF_CALLOC (1, sizeof (*sh_priv->loops[i]), - gf_afr_mt_sh_diff_loop_state); - if (!sh_priv->loops[i]) - goto err; - - sh_priv->loops[i]->checksum = GF_CALLOC (priv->child_count, - MD5_DIGEST_LEN, gf_afr_mt_uint8_t); - if (!sh_priv->loops[i]->checksum) - goto err; - - sh_priv->loops[i]->write_needed = GF_CALLOC (priv->child_count, - sizeof (*sh_priv->loops[i]->write_needed), - gf_afr_mt_char); - if (!sh_priv->loops[i]->write_needed) - goto err; - - } + sh->private = sh_priv; + sh->sh_data_algo_start = sh_data_algo_start; - sh_diff_loop_driver (frame, this, _gf_true, NULL); + sh_local->call_count = 0; + sh->loop_completion_cbk = sh_do_nothing; + sh_loop_driver (sh_frame, this, _gf_true, sh_frame); +out: return 0; -err: - if (sh_priv) { - if (sh_priv->loops) { - for (i = 0; i < priv->data_self_heal_window_size; i++) { - if (sh_priv->loops[i]->write_needed) - GF_FREE (sh_priv->loops[i]->write_needed); - if (sh_priv->loops[i]->checksum) - GF_FREE (sh_priv->loops[i]->checksum); - if (sh_priv->loops[i]) - GF_FREE (sh_priv->loops[i]); - } - - GF_FREE (sh_priv->loops); - } +} - GF_FREE (sh_priv); - } +int +afr_sh_algo_diff (call_frame_t *sh_frame, xlator_t *this) +{ + afr_sh_start_loops (sh_frame, this, sh_diff_checksum); return 0; } +int +afr_sh_algo_full (call_frame_t *sh_frame, xlator_t *this) +{ + afr_sh_start_loops (sh_frame, this, sh_full_read_write_to_sinks); + return 0; +} struct afr_sh_algorithm afr_self_heal_algorithms[] = { {.name = "full", .fn = afr_sh_algo_full}, |