diff options
author | Pranith Kumar K <pranithk@gluster.com> | 2011-06-09 04:00:41 +0000 |
---|---|---|
committer | Anand Avati <avati@gluster.com> | 2011-06-09 07:41:34 -0700 |
commit | 5462cbb9c483addf5288e44bbc6eae147bd9d442 (patch) | |
tree | 43b7829e553aaa295c37bf445864656ce86414ab | |
parent | 272d43e6721d559594375e385b42e88122b42bd9 (diff) |
cluster/afr: Read-dir should wind to the read-child first
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Signed-off-by: Anand Avati <avati@gluster.com>
BUG: 2840 (files not getting self-healed when the first child goes down)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2840
-rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 97 |
1 files changed, 53 insertions, 44 deletions
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 19ddcbda762..3b1385377ec 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -521,30 +521,30 @@ int32_t afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, gf_dirent_t *entries) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - ino_t inum = 0; - - int call_child = 0; - int ret = 0; - - gf_dirent_t * entry = NULL; - gf_dirent_t * tmp = NULL; - - int child_index = -1; - - uint64_t ctx = 0; - afr_fd_ctx_t *fd_ctx = NULL; - - off_t offset = 0; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + xlator_t **children = NULL; + ino_t inum = 0; + int call_child = 0; + int first_call_child = 0; + int ret = 0; + gf_dirent_t *entry = NULL; + gf_dirent_t *tmp = NULL; + uint64_t ctx = 0; + afr_fd_ctx_t *fd_ctx = NULL; + off_t offset = 0; priv = this->private; children = priv->children; local = frame->local; - child_index = (long) cookie; + first_call_child = (long) cookie; + if (local->cont.readdir.last_tried == -1) { + call_child = (long) cookie; + } else { + call_child = local->cont.readdir.last_tried; + } if (priv->strict_readdir) { ret = fd_ctx_get (local->fd, this, &ctx); @@ -559,11 +559,13 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, fd_ctx = (afr_fd_ctx_t *)(long) ctx; if (child_went_down (op_ret, op_errno)) { - if (all_tried (child_index, priv->child_count)) { + if ((call_child + 1) % priv->child_count + == first_call_child) { goto out; } - call_child = ++child_index; + call_child = (call_child + 1) % priv->child_count; + local->cont.readdir.last_tried = call_child; gf_log (this->name, GF_LOG_TRACE, "starting readdir afresh on child %d, offset %"PRId64, @@ -572,7 +574,7 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, fd_ctx->failed_over = _gf_true; STACK_WIND_COOKIE (frame, afr_readdirp_cbk, - (void *) (long) call_child, + (void *) (long) first_call_child, children[call_child], children[call_child]->fops->readdirp, local->fd, local->cont.readdir.size, 0); @@ -583,10 +585,10 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret != -1) { list_for_each_entry_safe (entry, tmp, &entries->list, list) { inum = afr_itransform (entry->d_ino, priv->child_count, - child_index); + call_child); entry->d_ino = inum; inum = afr_itransform (entry->d_stat.ia_ino, - priv->child_count, child_index); + priv->child_count, call_child); entry->d_stat.ia_ino = inum; if ((local->fd->inode == local->fd->inode->table->root) @@ -614,13 +616,14 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, try to get more entries */ gf_log (this->name, GF_LOG_TRACE, - "trying to fetch non-duplicate entries from offset %"PRId64", child %s", - offset, children[child_index]->name); + "trying to fetch non-duplicate entries " + "from offset %"PRId64", child %s", + offset, children[call_child]->name); STACK_WIND_COOKIE (frame, afr_readdirp_cbk, - (void *) (long) child_index, - children[child_index], - children[child_index]->fops->readdirp, + (void *) (long) first_call_child, + children[call_child], + children[call_child]->fops->readdirp, local->fd, local->cont.readdir.size, offset); return 0; } @@ -640,18 +643,16 @@ int32_t afr_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, int whichop) { - afr_private_t * priv = NULL; - xlator_t ** children = NULL; + afr_private_t *priv = NULL; + xlator_t **children = NULL; int call_child = 0; afr_local_t *local = NULL; - - uint64_t ctx; - afr_fd_ctx_t *fd_ctx; - - int ret = -1; - - int32_t op_ret = -1; - int32_t op_errno = 0; + uint64_t ctx; + afr_fd_ctx_t *fd_ctx; + int ret = -1; + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t read_child = -1; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -669,12 +670,20 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this, frame->local = local; - call_child = afr_first_up_child (priv); - if (call_child == -1) { - op_errno = ENOTCONN; - gf_log (this->name, GF_LOG_DEBUG, - "no child is up"); - goto out; + read_child = afr_read_child (this, fd->inode); + + if ((read_child >= 0) && (priv->child_up[read_child])) { + call_child = read_child; + local->cont.readdir.last_tried = -1; + } else { + call_child = afr_first_up_child (priv); + if (call_child == -1) { + op_errno = ENOTCONN; + gf_log (this->name, GF_LOG_INFO, + "no child is up"); + goto out; + } + local->cont.readdir.last_tried = call_child; } local->fd = fd_ref (fd); |