diff options
| author | Pranith Kumar K <pranithk@gluster.com> | 2011-06-08 09:30:01 +0000 | 
|---|---|---|
| committer | Anand Avati <avati@gluster.com> | 2011-06-08 11:19:01 -0700 | 
| commit | d979696eae8e73bcca9eb6b8b6cdaecf2e19700a (patch) | |
| tree | ee85db96f64c7d7bdc64fadcaf1e6087a2a80ecc | |
| parent | 43368cffd23b9dc4b1f98cf595b0d486b9e6dec9 (diff) | |
cluster/afr: Read-dir should wind to the read-child first
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Signed-off-by: Anand Avati <avati@gluster.com>
BUG: 2840 (files not getting self-healed when the first child goes down)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2840
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 50 | 
1 files changed, 33 insertions, 17 deletions
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index b2a001a198a..65adc566465 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -522,10 +522,10 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          xlator_t **      children    = NULL;          ino_t            inum        = 0;          int              call_child  = 0; +        int              first_call_child  = 0;          int              ret         = 0;          gf_dirent_t *    entry       = NULL;          gf_dirent_t *    tmp         = NULL; -        int              child_index = -1;          uint64_t         ctx         = 0;          afr_fd_ctx_t    *fd_ctx      = NULL;          off_t            offset      = 0; @@ -535,7 +535,12 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          local = frame->local; -        child_index = (long) cookie; +        first_call_child = (long) cookie; +        if (local->cont.readdir.last_tried == -1) { +                call_child = (long) cookie; +        } else { +                call_child = local->cont.readdir.last_tried; +        }          if (priv->strict_readdir) {                  ret = fd_ctx_get (local->fd, this, &ctx); @@ -550,13 +555,15 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                  fd_ctx = (afr_fd_ctx_t *)(long) ctx;                  if (child_went_down (op_ret, op_errno)) { -                        if (all_tried (child_index, priv->child_count)) { +                        if ((call_child + 1) % priv->child_count +                                       == first_call_child) {                                  gf_log (this->name, GF_LOG_INFO,                                          "all options tried going out");                                  goto out;                          } -                        call_child = ++child_index; +                        call_child = (call_child + 1) % priv->child_count; +                        local->cont.readdir.last_tried = call_child;                          gf_log (this->name, GF_LOG_TRACE,                                  "starting readdir afresh on child %d, offset %"PRId64, @@ -565,7 +572,7 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          fd_ctx->failed_over = _gf_true;                          STACK_WIND_COOKIE (frame, afr_readdirp_cbk, -                                           (void *) (long) call_child, +                                           (void *) (long) first_call_child,                                             children[call_child],                                             children[call_child]->fops->readdirp, local->fd,                                             local->cont.readdir.size, 0); @@ -576,10 +583,10 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          if (op_ret != -1) {                  list_for_each_entry_safe (entry, tmp, &entries->list, list) {                          inum = afr_itransform (entry->d_ino, priv->child_count, -                                               child_index); +                                               call_child);                          entry->d_ino = inum;                          inum  = afr_itransform (entry->d_stat.ia_ino, -                                                priv->child_count, child_index); +                                                priv->child_count, call_child);                          entry->d_stat.ia_ino = inum;                          if ((local->fd->inode == local->fd->inode->table->root) @@ -611,12 +618,12 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                                  gf_log (this->name, GF_LOG_TRACE,                                          "trying to fetch non-duplicate entries "                                          "from offset %"PRId64", child %s", -                                        offset, children[child_index]->name); +                                        offset, children[call_child]->name);                                  STACK_WIND_COOKIE (frame, afr_readdirp_cbk, -                                                   (void *) (long) child_index, -                                                   children[child_index], -                                                   children[child_index]->fops->readdirp, +                                                   (void *) (long) first_call_child, +                                                   children[call_child], +                                                   children[call_child]->fops->readdirp,                                                     local->fd, local->cont.readdir.size, offset);                                  return 0;                          } @@ -645,6 +652,7 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,          int              ret        = -1;          int32_t          op_ret     = -1;          int32_t          op_errno   = 0; +        int32_t          read_child = -1;          VALIDATE_OR_GOTO (frame, out);          VALIDATE_OR_GOTO (this, out); @@ -662,12 +670,20 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,          frame->local = local; -        call_child = afr_first_up_child (priv); -        if (call_child == -1) { -                op_errno = ENOTCONN; -                gf_log (this->name, GF_LOG_INFO, -                        "no child is up"); -                goto out; +        read_child = afr_read_child (this, fd->inode); + +        if ((read_child >= 0) && (priv->child_up[read_child])) { +                call_child = read_child; +                local->cont.readdir.last_tried = -1; +        } else { +                call_child = afr_first_up_child (priv); +                if (call_child == -1) { +                        op_errno = ENOTCONN; +                        gf_log (this->name, GF_LOG_INFO, +                                "no child is up"); +                        goto out; +                } +                local->cont.readdir.last_tried = call_child;          }          local->fd                  = fd_ref (fd);  | 
