From 1bcb009cf65e57117653bbbb5cdf673f9d9142e9 Mon Sep 17 00:00:00 2001 From: Vikas Gorur Date: Wed, 25 Nov 2009 07:51:23 +0000 Subject: cluster/afr: Do self-heal on unopened fds. This patch completes the previous patch for self-heal of open fds in replicate. If an fd was never opened on a subvolume, we remember that and do the open after we've done self-heal on that fd. Signed-off-by: Vikas Gorur Signed-off-by: Anand V. Avati BUG: 170 (Auto-heal fails on files that are open()-ed/mmap()-ed) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=170 --- xlators/cluster/afr/src/afr-dir-write.c | 17 +++ xlators/cluster/afr/src/afr-inode-write.c | 2 + xlators/cluster/afr/src/afr-open.c | 185 ++++++++++++++++++++++++++---- xlators/cluster/afr/src/afr-transaction.c | 41 ++++++- xlators/cluster/afr/src/afr.c | 9 ++ xlators/cluster/afr/src/afr.h | 4 + 6 files changed, 232 insertions(+), 26 deletions(-) (limited to 'xlators/cluster/afr') diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 9bea54c44..55fc1a006 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -126,6 +126,9 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, afr_local_t * local = NULL; afr_private_t * priv = NULL; + uint64_t ctx; + afr_fd_ctx_t *fd_ctx; + int ret = 0; int call_count = -1; @@ -154,6 +157,20 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->op_errno = -ret; } + ret = fd_ctx_get (fd, this, &ctx); + + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "could not get fd ctx for fd=%p", fd); + local->op_ret = -1; + local->op_errno = -ret; + } + + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + fd_ctx->opened_on[child_index] = 1; + fd_ctx->flags = local->cont.create.flags; + if (local->success_count == 0) { local->cont.create.buf = *buf; diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 5f35aa26f..32e93e6e2 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -228,6 +228,8 @@ afr_do_writev (call_frame_t *frame, xlator_t *this) local->op = GF_FOP_WRITE; + local->success_count = 0; + local->transaction.fop = afr_writev_wind; local->transaction.done = afr_writev_done; local->transaction.unwind = afr_writev_unwind; diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c index 1918dee81..539e258a1 100644 --- a/xlators/cluster/afr/src/afr-open.c +++ b/xlators/cluster/afr/src/afr-open.c @@ -62,14 +62,6 @@ afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, struct stat *postbuf) { afr_local_t * local = frame->local; - int ret = 0; - - ret = afr_fd_ctx_set (this, local->fd); - - if (ret < 0) { - local->op_ret = -1; - local->op_errno = -ret; - } AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno, local->fd); @@ -85,6 +77,11 @@ afr_open_cbk (call_frame_t *frame, void *cookie, afr_local_t * local = NULL; afr_private_t * priv = NULL; + int child_index = (long) cookie; + + uint64_t ctx; + afr_fd_ctx_t *fd_ctx; + int ret = 0; int call_count = -1; @@ -101,6 +98,32 @@ afr_open_cbk (call_frame_t *frame, void *cookie, if (op_ret >= 0) { local->op_ret = op_ret; local->success_count++; + + ret = afr_fd_ctx_set (this, fd); + + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "could not set fd ctx for fd=%p", + fd); + + local->op_ret = -1; + local->op_errno = -ret; + } + + ret = fd_ctx_get (fd, this, &ctx); + + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "could not get fd ctx for fd=%p", fd); + local->op_ret = -1; + local->op_errno = -ret; + } + + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + fd_ctx->opened_on[child_index] = 1; + fd_ctx->flags = local->cont.open.flags; + fd_ctx->wbflags = local->cont.open.wbflags; } } UNLOCK (&frame->lock); @@ -114,17 +137,6 @@ afr_open_cbk (call_frame_t *frame, void *cookie, this, this->fops->ftruncate, fd, 0); } else { - ret = afr_fd_ctx_set (this, fd); - - if (ret < 0) { - gf_log (this->name, GF_LOG_DEBUG, - "could not set fd ctx for fd=%p", - fd); - - local->op_ret = -1; - local->op_errno = -ret; - } - AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno, local->fd); } @@ -173,7 +185,11 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, frame->local = local; call_count = local->call_count; - local->cont.open.flags = flags; + loc_copy (&local->loc, loc); + + local->cont.open.flags = flags; + local->cont.open.wbflags = wbflags; + local->fd = fd_ref (fd); for (i = 0; i < priv->child_count; i++) { @@ -198,14 +214,134 @@ out: } +int +afr_up_down_flush_open_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + fd_t *fd) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + int ret = 0; + + uint64_t ctx; + afr_fd_ctx_t *fd_ctx; + + int call_count = 0; + int child_index = (long) cookie; + + priv = this->private; + local = frame->local; + + LOCK (&frame->lock); + { + if (op_ret >= 0) { + ret = fd_ctx_get (fd, this, &ctx); + + if (ret < 0) { + goto out; + } + + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + fd_ctx->opened_on[child_index] = 1; + + gf_log (this->name, GF_LOG_TRACE, + "fd for %s opened successfully on subvolume %s", + local->loc.path, priv->children[child_index]->name); + } + } +out: + UNLOCK (&frame->lock); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.post_post_op (frame, this); + } + + return 0; +} + + +static int +__unopened_count (int child_count, unsigned char *opened_on, unsigned char *child_up) +{ + int i; + int count = 0; + + for (i = 0; i < child_count; i++) { + if (!opened_on[i] && child_up[i]) + count++; + } + + return count; +} + + int afr_up_down_flush_sh_unwind (call_frame_t *frame, xlator_t *this) { - afr_local_t *local = NULL; + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + + uint64_t ctx; + afr_fd_ctx_t *fd_ctx; + int abandon = 0; + int ret = 0; + int i; + int call_count = 0; + + priv = this->private; local = frame->local; - local->transaction.post_post_op (frame, this); + /* + * Some subvolumes might have come up on which we never + * opened this fd in the first place. Re-open fd's on those + * subvolumes now. + */ + + ret = fd_ctx_get (local->fd, this, &ctx); + + if (ret < 0) { + abandon = 1; + goto out; + } + + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + call_count = __unopened_count (priv->child_count, fd_ctx->opened_on, + local->child_up); + + if (call_count == 0) { + abandon = 1; + goto out; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (!fd_ctx->opened_on[i] && local->child_up[i]) { + gf_log (this->name, GF_LOG_TRACE, + "opening fd for %s on subvolume %s", + local->loc.path, priv->children[i]->name); + + STACK_WIND_COOKIE (frame, afr_up_down_flush_open_cbk, + (void *)(long) i, + priv->children[i], + priv->children[i]->fops->open, + &local->loc, fd_ctx->flags, local->fd, + fd_ctx->wbflags); + + if (!--call_count) + break; + } + } + +out: + if (abandon) + local->transaction.post_post_op (frame, this); return 0; } @@ -227,6 +363,11 @@ afr_up_down_flush_post_post_op (call_frame_t *frame, xlator_t *this) local->loc.inode = inode_ref (local->fd->inode); local->loc.parent = inode_parent (local->fd->inode, 0, NULL); + /* forcibly trigger missing-entries self-heal */ + + local->success_count = 1; + local->enoent_count = 1; + sh->data_lock_held = _gf_true; sh->need_data_self_heal = _gf_true; sh->mode = local->fd->inode->st_mode; diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 94f0972a1..1b43c6d81 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -1201,8 +1201,13 @@ int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index) afr_local_t * local = NULL; afr_private_t * priv = NULL; + uint64_t ctx; + afr_fd_ctx_t *fd_ctx; + struct flock flock; + int ret = 0; + loc_t * lower = NULL; loc_t * higher = NULL; @@ -1216,10 +1221,38 @@ int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index) flock.l_len = local->transaction.len; flock.l_type = F_WRLCK; - /* skip over children that are down */ - while ((child_index < priv->child_count) - && !local->child_up[child_index]) - child_index++; + if (local->fd) { + ret = fd_ctx_get (local->fd, this, &ctx); + + if (ret < 0) { + gf_log (this->name, GF_LOG_DEBUG, + "unable to get fd ctx for fd=%p", + local->fd); + + local->op_ret = -1; + local->op_errno = EINVAL; + + afr_unlock (frame, this); + + return 0; + } + + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + /* skip over children that or down + or don't have the fd open */ + + while ((child_index < priv->child_count) + && (!local->child_up[child_index] + || !fd_ctx->opened_on[child_index])) + + child_index++; + } else { + /* skip over children that are down */ + while ((child_index < priv->child_count) + && !local->child_up[child_index]) + child_index++; + } if ((child_index == priv->child_count) && local->transaction.lock_count == 0) { diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 994588c87..2d0138aba 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -856,6 +856,15 @@ afr_fd_ctx_set (xlator_t *this, fd_t *fd) goto unlock; } + fd_ctx->opened_on = CALLOC (sizeof (*fd_ctx->opened_on), + priv->child_count); + if (!fd_ctx->opened_on) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory"); + op_ret = -ENOMEM; + goto unlock; + } + fd_ctx->child_failed = CALLOC (sizeof (*fd_ctx->child_failed), priv->child_count); diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 4de1f2820..6a1c53cee 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -261,6 +261,7 @@ typedef struct _afr_local { struct { int32_t flags; + int32_t wbflags; } open; struct { @@ -534,7 +535,10 @@ typedef struct _afr_local { typedef struct { unsigned char *pre_op_done; + unsigned char *opened_on; /* which subvolumes the fd is open on */ unsigned char *child_failed; + int flags; + int32_t wbflags; uint64_t up_count; /* number of CHILD_UPs this fd has seen */ uint64_t down_count; /* number of CHILD_DOWNs this fd has seen */ } afr_fd_ctx_t; -- cgit