diff options
| author | Pranith Kumar K <pranithk@gluster.com> | 2011-06-09 04:00:41 +0000 | 
|---|---|---|
| committer | Anand Avati <avati@gluster.com> | 2011-06-09 07:41:34 -0700 | 
| commit | 5462cbb9c483addf5288e44bbc6eae147bd9d442 (patch) | |
| tree | 43b7829e553aaa295c37bf445864656ce86414ab | |
| parent | 272d43e6721d559594375e385b42e88122b42bd9 (diff) | |
cluster/afr: Read-dir should wind to the read-child first
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Signed-off-by: Anand Avati <avati@gluster.com>
BUG: 2840 (files not getting self-healed when the first child goes down)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2840
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 97 | 
1 files changed, 53 insertions, 44 deletions
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 19ddcbda762..3b1385377ec 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -521,30 +521,30 @@ int32_t  afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                    int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)  { -        afr_private_t * priv     = NULL; -        afr_local_t *   local    = NULL; -        xlator_t **     children = NULL; -        ino_t           inum = 0; - -        int call_child = 0; -        int ret        = 0; - -        gf_dirent_t * entry = NULL; -        gf_dirent_t * tmp   = NULL; - -        int child_index = -1; - -        uint64_t      ctx    = 0; -        afr_fd_ctx_t *fd_ctx = NULL; - -	off_t offset = 0; +        afr_private_t   *priv            = NULL; +        afr_local_t     *local           = NULL; +        xlator_t        **children       = NULL; +        ino_t           inum             = 0; +        int             call_child       = 0; +        int             first_call_child = 0; +        int             ret              = 0; +        gf_dirent_t     *entry           = NULL; +        gf_dirent_t     *tmp             = NULL; +        uint64_t        ctx              = 0; +        afr_fd_ctx_t    *fd_ctx          = NULL; +	off_t           offset           = 0;          priv     = this->private;          children = priv->children;          local = frame->local; -        child_index = (long) cookie; +        first_call_child = (long) cookie; +        if (local->cont.readdir.last_tried == -1) { +                call_child = (long) cookie; +        } else { +                call_child = local->cont.readdir.last_tried; +        }  	if (priv->strict_readdir) {  		ret = fd_ctx_get (local->fd, this, &ctx); @@ -559,11 +559,13 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  		fd_ctx = (afr_fd_ctx_t *)(long) ctx;  		if (child_went_down (op_ret, op_errno)) { -			if (all_tried (child_index, priv->child_count)) { +			if ((call_child + 1) % priv->child_count +                                        == first_call_child) {  				goto out;  			} -			call_child = ++child_index; +                        call_child = (call_child + 1) % priv->child_count; +                        local->cont.readdir.last_tried = call_child;  			gf_log (this->name, GF_LOG_TRACE,  				"starting readdir afresh on child %d, offset %"PRId64, @@ -572,7 +574,7 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  			fd_ctx->failed_over = _gf_true;  			STACK_WIND_COOKIE (frame, afr_readdirp_cbk, -					   (void *) (long) call_child, +					   (void *) (long) first_call_child,  					   children[call_child],  					   children[call_child]->fops->readdirp, local->fd,  					   local->cont.readdir.size, 0); @@ -583,10 +585,10 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  	if (op_ret != -1) {  		list_for_each_entry_safe (entry, tmp, &entries->list, list) {  			inum = afr_itransform (entry->d_ino, priv->child_count, -					       child_index); +					       call_child);  			entry->d_ino = inum;  			inum  = afr_itransform (entry->d_stat.ia_ino, -						priv->child_count, child_index); +						priv->child_count, call_child);  			entry->d_stat.ia_ino = inum;  			if ((local->fd->inode == local->fd->inode->table->root) @@ -614,13 +616,14 @@ afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  				   try to get more entries */  				gf_log (this->name, GF_LOG_TRACE, -					"trying to fetch non-duplicate entries from offset %"PRId64", child %s", -					offset, children[child_index]->name); +                                        "trying to fetch non-duplicate entries " +                                        "from offset %"PRId64", child %s", +					offset, children[call_child]->name);  				STACK_WIND_COOKIE (frame, afr_readdirp_cbk, -						   (void *) (long) child_index, -						   children[child_index], -						   children[child_index]->fops->readdirp, +						   (void *) (long) first_call_child, +						   children[call_child], +						   children[call_child]->fops->readdirp,  						   local->fd, local->cont.readdir.size, offset);  				return 0;  			} @@ -640,18 +643,16 @@ int32_t  afr_do_readdir (call_frame_t *frame, xlator_t *this,  	        fd_t *fd, size_t size, off_t offset, int whichop)  { -	afr_private_t * priv       = NULL; -	xlator_t **     children   = NULL; +	afr_private_t   *priv       = NULL; +	xlator_t        **children   = NULL;  	int             call_child = 0;  	afr_local_t     *local     = NULL; - -        uint64_t      ctx; -        afr_fd_ctx_t *fd_ctx; - -	int ret = -1; - -	int32_t op_ret   = -1; -	int32_t op_errno = 0; +        uint64_t        ctx; +        afr_fd_ctx_t    *fd_ctx; +	int             ret = -1; +	int32_t         op_ret   = -1; +	int32_t         op_errno = 0; +        int32_t         read_child = -1;  	VALIDATE_OR_GOTO (frame, out);  	VALIDATE_OR_GOTO (this, out); @@ -669,12 +670,20 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,  	frame->local = local; -	call_child = afr_first_up_child (priv); -	if (call_child == -1) { -		op_errno = ENOTCONN; -		gf_log (this->name, GF_LOG_DEBUG, -			"no child is up"); -		goto out; +        read_child = afr_read_child (this, fd->inode); + +        if ((read_child >= 0) && (priv->child_up[read_child])) { +                call_child = read_child; +                local->cont.readdir.last_tried = -1; +        } else { +                call_child = afr_first_up_child (priv); +                if (call_child == -1) { +                        op_errno = ENOTCONN; +                        gf_log (this->name, GF_LOG_INFO, +                                "no child is up"); +                        goto out; +                } +                local->cont.readdir.last_tried = call_child;  	}          local->fd                  = fd_ref (fd);  | 
