From b0a3a3fda3f0993cd8c0e1b135bb569b6543e7c0 Mon Sep 17 00:00:00 2001 From: Pranith K Date: Thu, 14 Jul 2011 08:07:46 +0000 Subject: cluster/afr: Choose next call child from fresh-children for inode-read-fops Signed-off-by: Pranith Kumar K Signed-off-by: Anand Avati BUG: 2840 (files not getting self-healed when the first child goes down) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2840 --- xlators/cluster/afr/src/afr-common.c | 37 +- xlators/cluster/afr/src/afr-dir-read.c | 95 ++++-- xlators/cluster/afr/src/afr-dir-read.h | 5 - xlators/cluster/afr/src/afr-inode-read.c | 548 ++++++++++++++---------------- xlators/cluster/afr/src/afr-transaction.c | 4 +- xlators/cluster/afr/src/afr.h | 64 +--- xlators/cluster/afr/src/pump.c | 80 ++--- 7 files changed, 391 insertions(+), 442 deletions(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index e8afc6d8..21f7b4e4 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -452,8 +452,9 @@ afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode, * in execution there is a chance for inode's read_ctx to change. */ int32_t -afr_next_call_child (int32_t *fresh_children, size_t child_count, - int32_t *last_index, int32_t read_child) +afr_next_call_child (int32_t *fresh_children, unsigned char *child_up, + size_t child_count, int32_t *last_index, + int32_t read_child) { int next_index = 0; int32_t next_call_child = -1; @@ -463,12 +464,12 @@ afr_next_call_child (int32_t *fresh_children, size_t child_count, next_index = *last_index; retry: next_index++; - if (next_index >= child_count) + if ((next_index >= child_count) || + (fresh_children[next_index] == -1)) goto out; - if (fresh_children[next_index] == read_child) + if ((fresh_children[next_index] == read_child) || + (!child_up[fresh_children[next_index]])) goto retry; - if (fresh_children[next_index] == -1) - goto out; *last_index = next_index; next_call_child = fresh_children[next_index]; out: @@ -1475,7 +1476,8 @@ afr_lookup (call_frame_t *frame, xlator_t *this, if (ret == 0) { /* lookup is a revalidate */ - local->read_child_index = afr_inode_get_read_ctx (this, loc->inode, + local->read_child_index = afr_inode_get_read_ctx (this, + loc->inode, NULL); } else { LOCK (&priv->read_child_lock); @@ -3069,6 +3071,24 @@ out: return ret; } +int +afr_first_up_child (unsigned char *child_up, size_t child_count) +{ + int ret = -1; + int i = 0; + + GF_ASSERT (child_up); + + for (i = 0; i < child_count; i++) { + if (child_up[i]) { + ret = i; + break; + } + } + + return ret; +} + int AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv) { @@ -3147,7 +3167,8 @@ afr_transaction_local_init (afr_local_t *local, afr_private_t *priv) if (priv->optimistic_change_log && child_up_count == priv->child_count) local->optimistic_change_log = 1; - local->first_up_child = afr_first_up_child (priv); + local->first_up_child = afr_first_up_child (local->child_up, + priv->child_count); local->child_errno = GF_CALLOC (sizeof (*local->child_errno), priv->child_count, diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 8593d0c1..ce941f01 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -49,7 +49,6 @@ #include "afr-self-heal.h" #include "afr-self-heal-common.h" - int afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this) { @@ -517,24 +516,38 @@ int32_t afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, gf_dirent_t *entries) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - int call_child = 0; - int ret = 0; - gf_dirent_t * entry = NULL; - gf_dirent_t * tmp = NULL; - int child_index = -1; - uint64_t ctx = 0; - afr_fd_ctx_t *fd_ctx = NULL; - off_t offset = 0; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + int32_t next_call_child = -1; + int ret = 0; + gf_dirent_t * entry = NULL; + gf_dirent_t * tmp = NULL; + int32_t *last_index = NULL; + int32_t read_child = -1; + int32_t *fresh_children = NULL; + uint64_t ctx = 0; + afr_fd_ctx_t *fd_ctx = NULL; + off_t offset = 0; + int32_t call_child = -1; priv = this->private; children = priv->children; local = frame->local; - child_index = (long) cookie; + read_child = (long) cookie; + last_index = &local->cont.readdir.last_index; + fresh_children = local->fresh_children; + + /* the value of the last_index changes if afr_next_call_child is + * called. So to find the call_child of this callback use last_index + * before the next_call_child call. + */ + if (*last_index == -1) + call_child = read_child; + else + call_child = fresh_children[*last_index]; if (priv->strict_readdir) { ret = fd_ctx_get (local->fd, this, &ctx); @@ -548,25 +561,25 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, fd_ctx = (afr_fd_ctx_t *)(long) ctx; - if (child_went_down (op_ret, op_errno)) { - if (all_tried (child_index, priv->child_count)) { - gf_log (this->name, GF_LOG_INFO, - "all options tried going out"); + if (op_ret == -1) { + next_call_child = afr_next_call_child (fresh_children, + local->child_up, + priv->child_count, + last_index, + read_child); + if (next_call_child < 0) goto out; - } - - call_child = ++child_index; - gf_log (this->name, GF_LOG_TRACE, "starting readdir afresh on child %d, offset %"PRId64, - call_child, (uint64_t) 0); + next_call_child, (uint64_t) 0); fd_ctx->failed_over = _gf_true; STACK_WIND_COOKIE (frame, afr_readdirp_cbk, - (void *) (long) call_child, - children[call_child], - children[call_child]->fops->readdirp, local->fd, + (void *) (long) read_child, + children[next_call_child], + children[next_call_child]->fops->readdirp, + local->fd, local->cont.readdir.size, 0); return 0; } @@ -603,12 +616,12 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, gf_log (this->name, GF_LOG_TRACE, "trying to fetch non-duplicate entries " "from offset %"PRId64", child %s", - offset, children[child_index]->name); + offset, children[call_child]->name); STACK_WIND_COOKIE (frame, afr_readdirp_cbk, - (void *) (long) child_index, - children[child_index], - children[child_index]->fops->readdirp, + (void *) (long) read_child, + children[call_child], + children[call_child]->fops->readdirp, local->fd, local->cont.readdir.size, offset); return 0; } @@ -623,7 +636,6 @@ out: return 0; } - int32_t afr_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, int whichop) @@ -637,6 +649,7 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this, int ret = -1; int32_t op_ret = -1; int32_t op_errno = 0; + uint64_t read_child = 0; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -646,19 +659,29 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this, children = priv->children; ALLOC_OR_GOTO (local, afr_local_t, out); + frame->local = local; + ret = AFR_LOCAL_INIT (local, priv); if (ret < 0) { op_errno = -ret; goto out; } - frame->local = local; + local->fresh_children = afr_fresh_children_create (priv->child_count); + if (!local->fresh_children) { + op_errno = ENOMEM; + goto out; + } - call_child = afr_first_up_child (priv); - if (call_child == -1) { - op_errno = ENOTCONN; - gf_log (this->name, GF_LOG_INFO, - "no child is up"); + read_child = afr_inode_get_read_ctx (this, fd->inode, + local->fresh_children); + op_ret = afr_get_call_child (this, local->child_up, read_child, + local->fresh_children, + &call_child, + &local->cont.readdir.last_index); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; goto out; } diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h index 40c7b6ae..3143cb97 100644 --- a/xlators/cluster/afr/src/afr-dir-read.h +++ b/xlators/cluster/afr/src/afr-dir-read.h @@ -37,11 +37,6 @@ int32_t afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset); -int32_t -afr_getdents (call_frame_t *frame, xlator_t *this, - fd_t *fd, size_t size, off_t offset, int32_t flag); - - int32_t afr_checksum (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags); diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index f2507f07..caac56f6 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -63,13 +63,14 @@ int32_t afr_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - int unwind = 1; - int last_tried = -1; - int this_try = -1; - int read_child = -1; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + int unwind = 1; + int32_t *last_index = NULL; + int32_t next_call_child = -1; + int32_t read_child = -1; + int32_t *fresh_children = NULL; priv = this->private; children = priv->children; @@ -79,27 +80,21 @@ afr_access_cbk (call_frame_t *frame, void *cookie, read_child = (long) cookie; if (op_ret == -1) { - retry: - last_tried = local->cont.access.last_tried; - - if (all_tried (last_tried, priv->child_count)) { - gf_log (this->name, GF_LOG_DEBUG, - "%s: all subvolumes tried, going out", - local->loc.path); + last_index = &local->cont.access.last_index; + fresh_children = local->fresh_children; + next_call_child = afr_next_call_child (fresh_children, + local->child_up, + priv->child_count, + last_index, read_child); + if (next_call_child < 0) goto out; - } - this_try = ++local->cont.access.last_tried; - - if (this_try == read_child) { - goto retry; - } unwind = 0; STACK_WIND_COOKIE (frame, afr_access_cbk, (void *) (long) read_child, - children[this_try], - children[this_try]->fops->access, + children[next_call_child], + children[next_call_child]->fops->access, &local->loc, local->cont.access.mask); } @@ -115,13 +110,13 @@ out: int32_t afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask) { - afr_private_t * priv = NULL; - xlator_t ** children = NULL; - int call_child = 0; - afr_local_t *local = NULL; - int32_t read_child = -1; - int32_t op_ret = -1; - int32_t op_errno = 0; + afr_private_t *priv = NULL; + xlator_t **children = NULL; + int call_child = 0; + afr_local_t *local = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t read_child = -1; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -133,32 +128,31 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask) children = priv->children; ALLOC_OR_GOTO (local, afr_local_t, out); + frame->local = local; - local->fresh_children = GF_CALLOC (priv->child_count, - sizeof (*local->fresh_children), - gf_afr_mt_int32_t); - if (local->fresh_children) { - op_errno = ENOMEM; + op_ret = AFR_LOCAL_INIT (local, priv); + if (op_ret < 0) { + op_errno = -op_ret; goto out; } - read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children); - - if ((read_child >= 0) && (priv->child_up[read_child])) { - call_child = read_child; - - local->cont.access.last_tried = -1; + local->fresh_children = afr_fresh_children_create (priv->child_count); + if (!local->fresh_children) { + op_errno = ENOMEM; + goto out; + } - } else { - call_child = afr_first_up_child (priv); - if (call_child == -1) { - op_errno = ENOTCONN; - gf_log (this->name, GF_LOG_INFO, - "%s: no child is up", loc->path); - goto out; - } - local->cont.access.last_tried = call_child; + read_child = afr_inode_get_read_ctx (this, loc->inode, + local->fresh_children); + op_ret = afr_get_call_child (this, local->child_up, read_child, + local->fresh_children, + &call_child, + &local->cont.access.last_index); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + goto out; } loc_copy (&local->loc, loc); @@ -166,7 +160,8 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask) STACK_WIND_COOKIE (frame, afr_access_cbk, (void *) (long) call_child, - children[call_child], children[call_child]->fops->access, + children[call_child], + children[call_child]->fops->access, loc, mask); op_ret = 0; @@ -187,13 +182,14 @@ afr_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *buf) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - int unwind = 1; - int last_tried = -1; - int this_try = -1; - int read_child = -1; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + int unwind = 1; + int32_t *last_index = NULL; + int32_t next_call_child = -1; + int32_t read_child = -1; + int32_t *fresh_children = NULL; priv = this->private; children = priv->children; @@ -203,27 +199,21 @@ afr_stat_cbk (call_frame_t *frame, void *cookie, local = frame->local; if (op_ret == -1) { - retry: - last_tried = local->cont.stat.last_tried; - - if (all_tried (last_tried, priv->child_count)) { - gf_log (this->name, GF_LOG_DEBUG, - "%s: all subvolumes tried, going out", - local->loc.path); + last_index = &local->cont.stat.last_index; + fresh_children = local->fresh_children; + next_call_child = afr_next_call_child (fresh_children, + local->child_up, + priv->child_count, + last_index, read_child); + if (next_call_child < 0) goto out; - } - this_try = ++local->cont.stat.last_tried; - - if (this_try == read_child) { - goto retry; - } unwind = 0; STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) read_child, - children[this_try], - children[this_try]->fops->stat, + children[next_call_child], + children[next_call_child]->fops->stat, &local->loc); } @@ -239,13 +229,13 @@ out: int32_t afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - int32_t read_child = -1; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + xlator_t **children = NULL; int call_child = 0; int32_t op_ret = -1; int32_t op_errno = 0; + int32_t read_child = -1; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -257,35 +247,30 @@ afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) children = priv->children; ALLOC_OR_GOTO (local, afr_local_t, out); - frame->local = local; + op_ret = AFR_LOCAL_INIT (local, priv); + if (op_ret < 0) { + op_errno = -op_ret; + goto out; + } - local->fresh_children = GF_CALLOC (priv->child_count, - sizeof (*local->fresh_children), - gf_afr_mt_int32_t); - if (local->fresh_children) { + local->fresh_children = afr_fresh_children_create (priv->child_count); + if (!local->fresh_children) { op_errno = ENOMEM; goto out; } - read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children); - - if ((read_child >= 0) && (priv->child_up[read_child])) { - call_child = read_child; - - local->cont.stat.last_tried = -1; - - } else { - call_child = afr_first_up_child (priv); - if (call_child == -1) { - op_errno = ENOTCONN; - gf_log (this->name, GF_LOG_INFO, - "%s: no child is up", loc->path); - goto out; - } - local->cont.stat.last_tried = call_child; + read_child = afr_inode_get_read_ctx (this, loc->inode, + local->fresh_children); + op_ret = afr_get_call_child (this, local->child_up, read_child, + local->fresh_children, + &call_child, + &local->cont.stat.last_index); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + goto out; } - loc_copy (&local->loc, loc); local->cont.stat.ino = loc->inode->ino; @@ -313,13 +298,14 @@ int32_t afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *buf) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - int unwind = 1; - int last_tried = -1; - int this_try = -1; - int read_child = -1; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + xlator_t **children = NULL; + int unwind = 1; + int32_t *last_index = NULL; + int32_t next_call_child = -1; + int32_t read_child = -1; + int32_t *fresh_children = NULL; priv = this->private; children = priv->children; @@ -329,27 +315,21 @@ afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, read_child = (long) cookie; if (op_ret == -1) { - retry: - last_tried = local->cont.fstat.last_tried; - - if (all_tried (last_tried, priv->child_count)) { - gf_log (this->name, GF_LOG_DEBUG, - "%p: all subvolumes tried, going out", - local->fd); + last_index = &local->cont.fstat.last_index; + fresh_children = local->fresh_children; + next_call_child = afr_next_call_child (fresh_children, + local->child_up, + priv->child_count, + last_index, read_child); + if (next_call_child < 0) goto out; - } - this_try = ++local->cont.fstat.last_tried; - - if (this_try == read_child) { - goto retry; - } unwind = 0; STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) read_child, - children[this_try], - children[this_try]->fops->fstat, + children[next_call_child], + children[next_call_child]->fops->fstat, local->fd); } @@ -366,13 +346,13 @@ int32_t afr_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + xlator_t **children = NULL; int call_child = 0; - int32_t read_child = -1; int32_t op_ret = -1; int32_t op_errno = 0; + int32_t read_child = 0; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -384,36 +364,36 @@ afr_fstat (call_frame_t *frame, xlator_t *this, children = priv->children; - ALLOC_OR_GOTO (local, afr_local_t, out); + VALIDATE_OR_GOTO (fd->inode, out); + ALLOC_OR_GOTO (local, afr_local_t, out); frame->local = local; - VALIDATE_OR_GOTO (fd->inode, out); + op_ret = AFR_LOCAL_INIT (local, priv); + if (op_ret < 0) { + op_errno = -op_ret; + goto out; + } - local->fresh_children = GF_CALLOC (priv->child_count, - sizeof (*local->fresh_children), - gf_afr_mt_int32_t); - if (local->fresh_children) { + local->fresh_children = afr_fresh_children_create (priv->child_count); + if (!local->fresh_children) { op_errno = ENOMEM; goto out; } - read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children); - if ((read_child >= 0) && (priv->child_up[read_child])) { - call_child = read_child; + read_child = afr_inode_get_read_ctx (this, fd->inode, + local->fresh_children); - local->cont.fstat.last_tried = -1; - } else { - call_child = afr_first_up_child (priv); - if (call_child == -1) { - op_errno = ENOTCONN; - gf_log (this->name, GF_LOG_INFO, - "%p: no child is up", fd); - goto out; - } - local->cont.fstat.last_tried = call_child; + op_ret = afr_get_call_child (this, local->child_up, read_child, + local->fresh_children, + &call_child, + &local->cont.fstat.last_index); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + goto out; } local->cont.fstat.ino = fd->inode->ino; @@ -442,13 +422,14 @@ afr_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, const char *buf, struct iatt *sbuf) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - int unwind = 1; - int last_tried = -1; - int this_try = -1; - int read_child = -1; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + int unwind = 1; + int32_t *last_index = NULL; + int32_t next_call_child = -1; + int32_t read_child = -1; + int32_t *fresh_children = NULL; priv = this->private; children = priv->children; @@ -458,26 +439,20 @@ afr_readlink_cbk (call_frame_t *frame, void *cookie, read_child = (long) cookie; if (op_ret == -1) { - retry: - last_tried = local->cont.readlink.last_tried; - - if (all_tried (last_tried, priv->child_count)) { - gf_log (this->name, GF_LOG_DEBUG, - "%s: all subvolumes tried, going out", - local->loc.path); + last_index = &local->cont.readlink.last_index; + fresh_children = local->fresh_children; + next_call_child = afr_next_call_child (fresh_children, + local->child_up, + priv->child_count, + last_index, read_child); + if (next_call_child < 0) goto out; - } - this_try = ++local->cont.readlink.last_tried; - - if (this_try == read_child) { - goto retry; - } unwind = 0; STACK_WIND_COOKIE (frame, afr_readlink_cbk, (void *) (long) read_child, - children[this_try], - children[this_try]->fops->readlink, + children[next_call_child], + children[next_call_child]->fops->readlink, &local->loc, local->cont.readlink.size); } @@ -495,13 +470,13 @@ int32_t afr_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size) { - afr_private_t * priv = NULL; - xlator_t ** children = NULL; - int call_child = 0; - afr_local_t *local = NULL; - int32_t read_child = -1; - int32_t op_ret = -1; - int32_t op_errno = 0; + afr_private_t *priv = NULL; + xlator_t **children = NULL; + int call_child = 0; + afr_local_t *local = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t read_child = -1; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -513,34 +488,28 @@ afr_readlink (call_frame_t *frame, xlator_t *this, children = priv->children; ALLOC_OR_GOTO (local, afr_local_t, out); - frame->local = local; + op_ret = AFR_LOCAL_INIT (local, priv); + if (op_ret < 0) { + op_errno = -op_ret; + goto out; + } - local->fresh_children = GF_CALLOC (priv->child_count, - sizeof (*local->fresh_children), - gf_afr_mt_int32_t); - if (local->fresh_children) { + local->fresh_children = afr_fresh_children_create (priv->child_count); + if (!local->fresh_children) { op_errno = ENOMEM; goto out; } - read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children); - - if ((read_child >= 0) && (priv->child_up[read_child])) { - call_child = read_child; - - local->cont.readlink.last_tried = -1; - - } else { - call_child = afr_first_up_child (priv); - - if (call_child == -1) { - op_errno = ENOTCONN; - gf_log (this->name, GF_LOG_INFO, - "%s: no child is up", loc->path); - goto out; - } - - local->cont.readlink.last_tried = call_child; + read_child = afr_inode_get_read_ctx (this, loc->inode, + local->fresh_children); + op_ret = afr_get_call_child (this, local->child_up, read_child, + local->fresh_children, + &call_child, + &local->cont.readlink.last_index); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + goto out; } loc_copy (&local->loc, loc); @@ -550,7 +519,8 @@ afr_readlink (call_frame_t *frame, xlator_t *this, STACK_WIND_COOKIE (frame, afr_readlink_cbk, (void *) (long) call_child, - children[call_child], children[call_child]->fops->readlink, + children[call_child], + children[call_child]->fops->readlink, loc, size); op_ret = 0; @@ -622,13 +592,14 @@ afr_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *dict) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - int unwind = 1; - int last_tried = -1; - int this_try = -1; - int read_child = -1; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + int unwind = 1; + int32_t *last_index = NULL; + int32_t next_call_child = -1; + int32_t read_child = -1; + int32_t *fresh_children = NULL; priv = this->private; children = priv->children; @@ -638,26 +609,20 @@ afr_getxattr_cbk (call_frame_t *frame, void *cookie, read_child = (long) cookie; if (op_ret == -1) { - retry: - last_tried = local->cont.getxattr.last_tried; - - if (all_tried (last_tried, priv->child_count)) { - gf_log (this->name, GF_LOG_DEBUG, - "%s: all subvolumes tried, going out", - local->loc.path); + last_index = &local->cont.getxattr.last_index; + fresh_children = local->fresh_children; + next_call_child = afr_next_call_child (fresh_children, + local->child_up, + priv->child_count, + last_index, read_child); + if (next_call_child < 0) goto out; - } - this_try = ++local->cont.getxattr.last_tried; - - if (this_try == read_child) { - goto retry; - } unwind = 0; STACK_WIND_COOKIE (frame, afr_getxattr_cbk, (void *) (long) read_child, - children[this_try], - children[this_try]->fops->getxattr, + children[next_call_child], + children[next_call_child]->fops->getxattr, &local->loc, local->cont.getxattr.name); } @@ -790,16 +755,16 @@ int32_t afr_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name) { - afr_private_t * priv = NULL; - xlator_t ** children = NULL; - int call_child = 0; - afr_local_t * local = NULL; - xlator_list_t * trav = NULL; - xlator_t ** sub_volumes = NULL; - int read_child = -1; - int i = 0; - int32_t op_ret = -1; - int32_t op_errno = 0; + afr_private_t *priv = NULL; + xlator_t **children = NULL; + int call_child = 0; + afr_local_t *local = NULL; + xlator_list_t *trav = NULL; + xlator_t **sub_volumes = NULL; + int i = 0; + int32_t op_ret = -1; + int32_t op_errno = 0; + int32_t read_child = -1; VALIDATE_OR_GOTO (frame, out); @@ -814,6 +779,12 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, ALLOC_OR_GOTO (local, afr_local_t, out); frame->local = local; + op_ret = AFR_LOCAL_INIT (local, priv); + if (op_ret < 0) { + op_errno = -op_ret; + goto out; + } + loc_copy (&local->loc, loc); if (name) local->cont.getxattr.name = gf_strdup (name); @@ -908,36 +879,27 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, } } - local->fresh_children = GF_CALLOC (priv->child_count, - sizeof (*local->fresh_children), - gf_afr_mt_int32_t); - if (local->fresh_children) { + local->fresh_children = afr_fresh_children_create (priv->child_count); + if (!local->fresh_children) { op_errno = ENOMEM; goto out; } - read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children); - - if ((read_child >= 0) && (priv->child_up[read_child])) { - call_child = read_child; - - local->cont.getxattr.last_tried = -1; - } else { - call_child = afr_first_up_child (priv); - - if (call_child == -1) { - op_errno = ENOTCONN; - gf_log (this->name, GF_LOG_INFO, - "%s: no child is up", loc->path); - goto out; - } - local->cont.getxattr.last_tried = call_child; + read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children); + op_ret = afr_get_call_child (this, local->child_up, read_child, + local->fresh_children, + &call_child, + &local->cont.getxattr.last_index); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + goto out; } - STACK_WIND_COOKIE (frame, afr_getxattr_cbk, (void *) (long) call_child, - children[call_child], children[call_child]->fops->getxattr, + children[call_child], + children[call_child]->fops->getxattr, loc, name); op_ret = 0; @@ -971,13 +933,14 @@ afr_readv_cbk (call_frame_t *frame, void *cookie, struct iovec *vector, int32_t count, struct iatt *buf, struct iobref *iobref) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; - int unwind = 1; - int last_tried = -1; - int this_try = -1; - int read_child = -1; + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + xlator_t ** children = NULL; + int unwind = 1; + int32_t *last_index = NULL; + int32_t next_call_child = -1; + int32_t *fresh_children = NULL; + int32_t read_child = -1; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -993,31 +956,21 @@ afr_readv_cbk (call_frame_t *frame, void *cookie, read_child = (long) cookie; if (op_ret == -1) { - retry: - last_tried = local->cont.readv.last_tried; - - if (all_tried (last_tried, priv->child_count)) { - gf_log (this->name, GF_LOG_DEBUG, - "%p: all subvolumes tried, going out", - local->fd); + last_index = &local->cont.readv.last_index; + fresh_children = local->fresh_children; + next_call_child = afr_next_call_child (fresh_children, + local->child_up, + priv->child_count, + last_index, read_child); + if (next_call_child < 0) goto out; - } - this_try = ++local->cont.readv.last_tried; - - if (this_try == read_child) { - /* - skip the read child since if we are here - we must have already tried that child - */ - goto retry; - } unwind = 0; STACK_WIND_COOKIE (frame, afr_readv_cbk, (void *) (long) read_child, - children[this_try], - children[this_try]->fops->readv, + children[next_call_child], + children[next_call_child]->fops->readv, local->fd, local->cont.readv.size, local->cont.readv.offset); } @@ -1039,10 +992,10 @@ afr_readv (call_frame_t *frame, xlator_t *this, afr_private_t * priv = NULL; afr_local_t * local = NULL; xlator_t ** children = NULL; - int32_t read_child = -1; int call_child = 0; int32_t op_ret = -1; int32_t op_errno = 0; + int32_t read_child = -1; VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); @@ -1053,37 +1006,28 @@ afr_readv (call_frame_t *frame, xlator_t *this, children = priv->children; ALLOC_OR_GOTO (local, afr_local_t, out); - frame->local = local; + op_ret = AFR_LOCAL_INIT (local, priv); + if (op_ret < 0) { + op_errno = -op_ret; + goto out; + } - local->fresh_children = GF_CALLOC (priv->child_count, - sizeof (*local->fresh_children), - gf_afr_mt_int32_t); - if (local->fresh_children) { + local->fresh_children = afr_fresh_children_create (priv->child_count); + if (!local->fresh_children) { op_errno = ENOMEM; goto out; } - read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children); - - if ((read_child >= 0) && (priv->child_up[read_child])) { - call_child = read_child; - - /* - if read fails from the read child, we try - all children starting with the first one - */ - local->cont.readv.last_tried = -1; - - } else { - call_child = afr_first_up_child (priv); - if (call_child == -1) { - op_errno = ENOTCONN; - gf_log (this->name, GF_LOG_DEBUG, - "%p: no child is up", fd); - goto out; - } - local->cont.readv.last_tried = call_child; + read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children); + op_ret = afr_get_call_child (this, local->child_up, read_child, + local->fresh_children, + &call_child, + &local->cont.readv.last_index); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + goto out; } local->fd = fd_ref (fd); diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index b8d2e27a..2e2c5726 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -428,10 +428,10 @@ afr_update_read_child (call_frame_t *frame, xlator_t *this, inode_t *inode, if (pending[curr_read_child][idx] != 0) goto out; - fresh_children = GF_CALLOC (priv->child_count, sizeof (*fresh_children), - gf_afr_mt_int32_t); + fresh_children = afr_fresh_children_create (priv->child_count); if (!fresh_children) goto out; + for (new_read_child = 0; new_read_child < priv->child_count; new_read_child++) { diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 0b8f96ec..c6d26314 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -395,28 +395,28 @@ typedef struct _afr_local { struct { int32_t mask; - int last_tried; /* index of the child we tried previously */ + int last_index; /* index of the child we tried previously */ } access; struct { - int last_tried; + int last_index; ino_t ino; } stat; struct { - int last_tried; + int last_index; ino_t ino; } fstat; struct { size_t size; - int last_tried; + int last_index; ino_t ino; } readlink; struct { char *name; - int last_tried; + int last_index; long pathinfo_len; } getxattr; @@ -424,7 +424,7 @@ typedef struct _afr_local { ino_t ino; size_t size; off_t offset; - int last_tried; + int last_index; } readv; /* dir read */ @@ -444,20 +444,8 @@ typedef struct _afr_local { off_t offset; gf_boolean_t failed; - int last_tried; + int last_index; } readdir; - - struct { - int32_t op_ret; - int32_t op_errno; - - size_t size; - off_t offset; - int32_t flag; - - int last_tried; - } getdents; - /* inode write */ struct { @@ -860,6 +848,10 @@ AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv); int afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count, transaction_lk_type_t lk_type); + +int +afr_first_up_child (unsigned char *child_up, size_t child_count); + int afr_select_read_child_from_policy (int32_t *fresh_children, int32_t child_count, int32_t prev_read_child, @@ -870,35 +862,15 @@ afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode, int32_t *fresh_children, int32_t prev_read_child, int32_t config_read_child); -/** - * first_up_child - return the index of the first child that is up - */ - -static inline int -afr_first_up_child (afr_private_t *priv) -{ - xlator_t ** children = NULL; - int ret = -1; - int i = 0; - - LOCK (&priv->lock); - { - children = priv->children; - for (i = 0; i < priv->child_count; i++) { - if (priv->child_up[i]) { - ret = i; - break; - } - } - } - UNLOCK (&priv->lock); - - return ret; -} +int32_t +afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child, + int32_t *fresh_children, + int32_t *call_child, int32_t *last_index); int32_t -afr_next_call_child (int32_t *fresh_children, size_t child_count, - int32_t *last_index, int32_t read_child); +afr_next_call_child (int32_t *fresh_children, unsigned char *child_up, + size_t child_count, int32_t *last_index, + int32_t read_child); void afr_get_fresh_children (int32_t *success_children, int32_t *sources, int32_t *fresh_children, unsigned int child_count); diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c index 300b0850..e7ff4651 100644 --- a/xlators/cluster/afr/src/pump.c +++ b/xlators/cluster/afr/src/pump.c @@ -1435,14 +1435,15 @@ pump_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *dict) { - afr_private_t * priv = NULL; - afr_local_t * local = NULL; - xlator_t ** children = NULL; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + xlator_t **children = NULL; + int unwind = 1; + int32_t *last_index = NULL; + int32_t next_call_child = -1; + int32_t read_child = -1; + int32_t *fresh_children = NULL; - int unwind = 1; - int last_tried = -1; - int this_try = -1; - int read_child = -1; priv = this->private; children = priv->children; @@ -1452,23 +1453,20 @@ pump_getxattr_cbk (call_frame_t *frame, void *cookie, read_child = (long) cookie; if (op_ret == -1) { - retry: - last_tried = local->cont.getxattr.last_tried; - - if (all_tried (last_tried, priv->child_count)) { - goto out; - } - this_try = ++local->cont.getxattr.last_tried; - - if (this_try == read_child) { - goto retry; - } + last_index = &local->cont.getxattr.last_index; + fresh_children = local->fresh_children; + next_call_child = afr_next_call_child (fresh_children, + local->child_up, + priv->child_count, + last_index, read_child); + if (next_call_child < 0) + goto out; unwind = 0; STACK_WIND_COOKIE (frame, pump_getxattr_cbk, (void *) (long) read_child, - children[this_try], - children[this_try]->fops->getxattr, + children[next_call_child], + children[next_call_child]->fops->getxattr, &local->loc, local->cont.getxattr.name); } @@ -1491,12 +1489,10 @@ pump_getxattr (call_frame_t *frame, xlator_t *this, afr_private_t * priv = NULL; xlator_t ** children = NULL; int call_child = 0; - afr_local_t * local = NULL; - - int read_child = -1; - - int32_t op_ret = -1; - int32_t op_errno = 0; + afr_local_t *local = NULL; + int32_t op_ret = -1; + int32_t op_errno = 0; + uint64_t read_child = 0; VALIDATE_OR_GOTO (frame, out); @@ -1511,6 +1507,12 @@ pump_getxattr (call_frame_t *frame, xlator_t *this, ALLOC_OR_GOTO (local, afr_local_t, out); frame->local = local; + op_ret = AFR_LOCAL_INIT (local, priv); + if (op_ret < 0) { + op_errno = -op_ret; + goto out; + } + if (name) { if (!strncmp (name, AFR_XATTR_PREFIX, strlen (AFR_XATTR_PREFIX))) { @@ -1543,25 +1545,17 @@ pump_getxattr (call_frame_t *frame, xlator_t *this, op_errno = ENOMEM; goto out; } - read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children); - if (read_child >= 0) { - call_child = read_child; - - local->cont.getxattr.last_tried = -1; - } else { - call_child = afr_first_up_child (priv); - - if (call_child == -1) { - op_errno = ENOTCONN; - gf_log (this->name, GF_LOG_DEBUG, - "no child is up"); - goto out; - } - - local->cont.getxattr.last_tried = call_child; + read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children); + op_ret = afr_get_call_child (this, local->child_up, read_child, + local->fresh_children, + &call_child, + &local->cont.getxattr.last_index); + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + goto out; } - loc_copy (&local->loc, loc); if (name) local->cont.getxattr.name = gf_strdup (name); -- cgit