From 74612a456ad1602f8038fae79fee654eb427602a Mon Sep 17 00:00:00 2001 From: Vikas Gorur Date: Tue, 24 Nov 2009 08:45:09 +0000 Subject: cluster/afr: Do self-heal on reopened fds. This patch brings in partial support for self-heal of open fds. The precondition is that the fd should have been opened successfully during the initial open() (or create()), and we assume that protocol/client has successfully reopened the fd when the subvolume comes back up. It works by doing an "up/down flush" (a dummy flush transaction to do post-op wherever necessary) and then triggering data self-heal on the file in the post-post-op hook of the dummy flush transaction. This ensures that any writes that come in during self-heal will wait until self-heal completes. The up/down flush is also done when a subvolume goes down, so that post-op is done on all subvolumes where pre-op was done. Signed-off-by: Vikas Gorur Signed-off-by: Anand V. Avati BUG: 170 (Auto-heal fails on files that are open()-ed/mmap()-ed) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=170 --- xlators/cluster/afr/src/afr-inode-write.c | 179 ++++++++++++++++++++++-------- 1 file changed, 135 insertions(+), 44 deletions(-) (limited to 'xlators/cluster/afr/src/afr-inode-write.c') diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 7dcc067089b..5f35aa26f66 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -204,6 +204,58 @@ afr_writev_done (call_frame_t *frame, xlator_t *this) } +int +afr_do_writev (call_frame_t *frame, xlator_t *this) +{ + call_frame_t * transaction_frame = NULL; + afr_local_t * local = NULL; + + int op_ret = -1; + int op_errno = 0; + + local = frame->local; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory."); + op_errno = ENOMEM; + goto out; + } + + transaction_frame->local = local; + frame->local = NULL; + + local->op = GF_FOP_WRITE; + + local->transaction.fop = afr_writev_wind; + local->transaction.done = afr_writev_done; + local->transaction.unwind = afr_writev_unwind; + + local->transaction.main_frame = frame; + if (local->fd->flags & O_APPEND) { + local->transaction.start = 0; + local->transaction.len = 0; + } else { + local->transaction.start = local->cont.writev.offset; + local->transaction.len = iov_length (local->cont.writev.vector, + local->cont.writev.count); + } + + afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL); + } + + return 0; +} + + int afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, @@ -211,26 +263,21 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, { afr_private_t * priv = NULL; afr_local_t * local = NULL; - call_frame_t *transaction_frame = NULL; int ret = -1; int op_ret = -1; int op_errno = 0; + uint64_t ctx; + afr_fd_ctx_t *fd_ctx = NULL; + VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (this->private, out); priv = this->private; - transaction_frame = copy_frame (frame); - if (!transaction_frame) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto out; - } - ALLOC_OR_GOTO (local, afr_local_t, out); ret = AFR_LOCAL_INIT (local, priv); @@ -239,37 +286,38 @@ afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, goto out; } - transaction_frame->local = local; + frame->local = local; - local->op = GF_FOP_WRITE; local->cont.writev.vector = iov_dup (vector, count); local->cont.writev.count = count; local->cont.writev.offset = offset; local->cont.writev.ino = fd->inode->ino; local->cont.writev.iobref = iobref_ref (iobref); - local->transaction.fop = afr_writev_wind; - local->transaction.done = afr_writev_done; - local->transaction.unwind = afr_writev_unwind; - local->fd = fd_ref (fd); - local->transaction.main_frame = frame; - if (fd->flags & O_APPEND) { - local->transaction.start = 0; - local->transaction.len = 0; - } else { - local->transaction.start = offset; - local->transaction.len = iov_length (vector, count); - } + ret = fd_ctx_get (fd, this, &ctx); + if (ret < 0) { + goto out; + } - afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + if (fd_ctx->down_count < priv->down_count) { + local->up_down_flush_cbk = afr_do_writev; + afr_up_down_flush (frame, this, fd, AFR_CHILD_DOWN_FLUSH); + + } else if (fd_ctx->up_count < priv->up_count) { + local->up_down_flush_cbk = afr_do_writev; + afr_up_down_flush (frame, this, fd, AFR_CHILD_UP_FLUSH); + + } else { + afr_do_writev (frame, this); + } op_ret = 0; out: if (op_ret == -1) { - if (transaction_frame) - AFR_STACK_DESTROY (transaction_frame); AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL); } @@ -647,6 +695,52 @@ afr_ftruncate_done (call_frame_t *frame, xlator_t *this) } +int +afr_do_ftruncate (call_frame_t *frame, xlator_t *this) +{ + call_frame_t * transaction_frame = NULL; + afr_local_t * local = NULL; + + int op_ret = -1; + int op_errno = 0; + + local = frame->local; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory."); + goto out; + } + + transaction_frame->local = local; + frame->local = NULL; + + local->op = GF_FOP_FTRUNCATE; + + local->transaction.fop = afr_ftruncate_wind; + local->transaction.done = afr_ftruncate_done; + local->transaction.unwind = afr_ftruncate_unwind; + + local->transaction.main_frame = frame; + + local->transaction.start = 0; + local->transaction.len = local->cont.ftruncate.offset; + + afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + + op_ret = 0; +out: + if (op_ret == -1) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, NULL); + } + + return 0; +} + + int afr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) @@ -660,19 +754,15 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this, int op_ret = -1; int op_errno = 0; + uint64_t ctx; + afr_fd_ctx_t *fd_ctx = NULL; + VALIDATE_OR_GOTO (frame, out); VALIDATE_OR_GOTO (this, out); VALIDATE_OR_GOTO (this->private, out); priv = this->private; - transaction_frame = copy_frame (frame); - if (!transaction_frame) { - gf_log (this->name, GF_LOG_ERROR, - "Out of memory."); - goto out; - } - ALLOC_OR_GOTO (local, afr_local_t, out); ret = AFR_LOCAL_INIT (local, priv); @@ -681,25 +771,26 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this, goto out; } - transaction_frame->local = local; - - local->op = GF_FOP_FTRUNCATE; - local->op_ret = -1; + frame->local = local; local->cont.ftruncate.offset = offset; local->cont.ftruncate.ino = fd->inode->ino; - local->transaction.fop = afr_ftruncate_wind; - local->transaction.done = afr_ftruncate_done; - local->transaction.unwind = afr_ftruncate_unwind; - local->fd = fd_ref (fd); - local->transaction.main_frame = frame; - local->transaction.start = 0; - local->transaction.len = offset; + ret = fd_ctx_get (fd, this, &ctx); + if (ret < 0) { + goto out; + } - afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + if (fd_ctx->down_count < priv->down_count) { + local->up_down_flush_cbk = afr_do_ftruncate; + afr_up_down_flush (frame, this, fd, AFR_CHILD_DOWN_FLUSH); + } else { + afr_do_ftruncate (frame, this); + } op_ret = 0; out: -- cgit