From 5634986f594fe75d0cd2e69cadf002a2c701f366 Mon Sep 17 00:00:00 2001 From: Vikas Gorur Date: Tue, 7 Apr 2009 06:55:43 -0700 Subject: Fix in changelog logic. If a writev fails, remember it by marking it in the fd context. Signed-off-by: Anand V. Avati --- xlators/cluster/afr/src/afr-dir-write.c | 37 ++++++--- xlators/cluster/afr/src/afr-inode-write.c | 16 ++-- xlators/cluster/afr/src/afr-transaction.c | 126 +++++++++++++++++++++++++----- xlators/cluster/afr/src/afr-transaction.h | 2 +- xlators/cluster/afr/src/afr.c | 105 ++++++++++++++++++++++++- xlators/cluster/afr/src/afr.h | 9 +++ 6 files changed, 254 insertions(+), 41 deletions(-) diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 89d38caacf3..2a3d1548a8c 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -108,24 +108,37 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, { afr_local_t * local = NULL; afr_private_t * priv = NULL; - + + int ret = 0; + int call_count = -1; int child_index = -1; local = frame->local; - priv = this->private; + priv = this->private; child_index = (long) cookie; LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); + + ret = afr_fd_ctx_set (this, fd); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, + "could not set ctx on fd=%p", fd); + + local->op_ret = -1; + local->op_errno = -ret; + } + + if (op_ret != -1) { local->op_ret = op_ret; - if (local->success_count == 0) { + if (local->success_count == 0) { local->cont.create.buf = *buf; local->cont.create.buf.st_ino = afr_itransform (buf->st_ino, @@ -349,7 +362,7 @@ afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); if (op_ret != -1) { local->op_ret = op_ret; @@ -574,7 +587,7 @@ afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); if (op_ret != -1) { local->op_ret = op_ret; @@ -801,7 +814,7 @@ afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); if (op_ret != -1) { local->op_ret = op_ret; @@ -1026,7 +1039,7 @@ afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); if (op_ret != -1) { local->op_ret = op_ret; @@ -1251,7 +1264,7 @@ afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); if ((op_ret != -1) && (local->success_count == 0)) { local->op_ret = op_ret; @@ -1444,7 +1457,7 @@ afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); if (op_ret != -1) { if (local->success_count == 0) { @@ -1632,7 +1645,7 @@ afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); if (op_ret != -1) { if (local->success_count == 0) { @@ -1792,7 +1805,7 @@ afr_setdents_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); if ((op_ret != -1) && (local->success_count == 0)) { local->op_ret = op_ret; diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 025bf8af4c1..8d33d5f62ef 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -95,7 +95,7 @@ afr_chmod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); if (op_ret != -1) { if (local->success_count == 0) { @@ -290,7 +290,7 @@ afr_fchmod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); if (op_ret != -1) { if (local->success_count == 0) { @@ -484,7 +484,7 @@ afr_chown_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); if (op_ret != -1) { if (local->success_count == 0) { @@ -682,7 +682,7 @@ afr_fchown_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); if (op_ret != -1) { if (local->success_count == 0) { @@ -879,7 +879,7 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); if (op_ret != -1) { if (local->success_count == 0) { @@ -1093,7 +1093,7 @@ afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); if (op_ret != -1) { if (local->success_count == 0) { @@ -1292,7 +1292,7 @@ afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); if (op_ret != -1) { if (local->success_count == 0) { @@ -1490,7 +1490,7 @@ afr_utimens_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { if (afr_fop_failed (op_ret, op_errno)) - afr_transaction_child_died (frame, this, child_index); + afr_transaction_fop_failed (frame, this, child_index); if (op_ret != -1) { if (local->success_count == 0) { diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 0590b598405..ba66332827b 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -43,6 +43,55 @@ __mark_child_dead (int32_t *pending, int child_count, int child) } +static void +__mark_fop_failed_on_fd (fd_t *fd, xlator_t *this, + int child_index) +{ + uint64_t ctx; + afr_fd_ctx_t * fd_ctx = NULL; + + int ret = 0; + + ret = fd_ctx_get (fd, this, &ctx); + + if (ret < 0) + goto out; + + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + fd_ctx->child_failed[child_index] = 1; +out: + return; +} + + +static void +__mark_failed_children (int32_t *pending, int child_count, + xlator_t *this, fd_t *fd) +{ + uint64_t ctx; + afr_fd_ctx_t * fd_ctx = NULL; + + int ret = 0; + int i = 0; + + ret = fd_ctx_get (fd, this, &ctx); + + if (ret < 0) + goto out; + + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + for (i = 0; i < child_count; i++) { + if (fd_ctx->child_failed[i]) + pending[i] = 0; + } + +out: + return; +} + + static void __mark_down_children (int32_t *pending, int child_count, unsigned char *child_up) { @@ -70,39 +119,58 @@ __is_first_write_on_fd (xlator_t *this, fd_t *fd) int op_ret = 0; int _ret = -1; - LOCK (&fd->inode->lock); + uint64_t ctx; + afr_fd_ctx_t * fd_ctx = NULL; + + LOCK (&fd->lock); { - _ret = fd_ctx_get (fd, this, NULL); + _ret = __fd_ctx_get (fd, this, &ctx); + if (_ret < 0) { gf_log (this->name, GF_LOG_DEBUG, - "first writev() on fd=%p, writing changelog", + "could not get fd ctx on fd=%p", fd); + goto out; + } - _ret = fd_ctx_set (fd, this, 0xaf1); + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + if (fd_ctx->pre_op_done == 0) { + fd_ctx->pre_op_done = 1; op_ret = 1; } } - UNLOCK (&fd->inode->lock); +out: + UNLOCK (&fd->lock); return op_ret; } static int -__unset_fd_ctx_if_set (xlator_t *this, fd_t *fd) +__if_fd_pre_op_done (xlator_t *this, fd_t *fd) { int op_ret = 0; int _ret = -1; - LOCK (&fd->inode->lock); + uint64_t ctx; + afr_fd_ctx_t * fd_ctx = NULL; + + LOCK (&fd->lock); { - _ret = fd_ctx_get (fd, this, NULL); - if (_ret == 0) { - fd_ctx_del (fd, this, NULL); - op_ret = 1; + _ret = __fd_ctx_get (fd, this, &ctx); + + if (_ret < 0) { + goto out; } + + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + if (fd_ctx->pre_op_done) + op_ret = 1; } - UNLOCK (&fd->inode->lock); +out: + UNLOCK (&fd->lock); return op_ret; } @@ -206,7 +274,7 @@ __changelog_needed_post_op (call_frame_t *frame, xlator_t *this) break; case GF_FOP_FLUSH: - op_ret = __unset_fd_ctx_if_set (this, local->fd); + op_ret = __if_fd_pre_op_done (this, local->fd); break; default: @@ -413,8 +481,13 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this) local = frame->local; - __mark_all_success (local->pending_array, priv->child_count); - __mark_down_children (local->pending_array, priv->child_count, local->child_up); + __mark_down_children (local->pending_array, priv->child_count, + local->child_up); + + if (local->op == GF_FOP_FLUSH) { + __mark_failed_children (local->pending_array, priv->child_count, + this, local->fd); + } call_count = afr_up_children_count (priv->child_count, local->child_up); @@ -559,6 +632,9 @@ afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, (local->op_errno == ENOTSUP)) { local->transaction.resume (frame, this); } else { + __mark_all_success (local->pending_array, + priv->child_count); + local->transaction.fop (frame, this); } } @@ -826,6 +902,9 @@ int afr_lock_rec (call_frame_t *frame, xlator_t *this, int child_index) if (__changelog_needed_pre_op (frame, this)) { afr_changelog_pre_op (frame, this); } else { + __mark_all_success (local->pending_array, + priv->child_count); + local->transaction.fop (frame, this); } @@ -958,11 +1037,11 @@ afr_transaction_resume (call_frame_t *frame, xlator_t *this) /** - * afr_transaction_child_died - inform that a child died during an fop + * afr_transaction_fop_failed - inform that an fop failed */ void -afr_transaction_child_died (call_frame_t *frame, xlator_t *this, int child_index) +afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index) { afr_local_t * local = NULL; afr_private_t * priv = NULL; @@ -970,7 +1049,15 @@ afr_transaction_child_died (call_frame_t *frame, xlator_t *this, int child_index local = frame->local; priv = this->private; - __mark_child_dead (local->pending_array, priv->child_count, child_index); + switch (local->op) { + case GF_FOP_WRITE: + __mark_fop_failed_on_fd (local->fd, this, child_index); + break; + default: + __mark_child_dead (local->pending_array, priv->child_count, + child_index); + break; + } } @@ -992,6 +1079,9 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type) if (__changelog_needed_pre_op (frame, this)) { afr_changelog_pre_op (frame, this); } else { + __mark_all_success (local->pending_array, + priv->child_count); + local->transaction.fop (frame, this); } } else { diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index 77d7a813c46..95e29773902 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -27,7 +27,7 @@ #define AFR_ENTRY_PENDING "trusted.glusterfs.afr.entry-pending" void -afr_transaction_child_died (call_frame_t *frame, xlator_t *this, +afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index); int32_t diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index 76e28cc4c52..54a1d71af8f 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -44,6 +44,8 @@ #include "compat.h" #include "byte-order.h" +#include "fd.h" + #include "afr-inode-read.h" #include "afr-inode-write.h" #include "afr-dir-read.h" @@ -697,11 +699,71 @@ out: /* {{{ open */ +int +afr_fd_ctx_set (xlator_t *this, fd_t *fd) +{ + afr_private_t * priv = NULL; + + int op_ret = 0; + int ret = 0; + + uint64_t ctx; + afr_fd_ctx_t * fd_ctx = NULL; + + priv = this->private; + + LOCK (&fd->lock); + { + ret = __fd_ctx_get (fd, this, &ctx); + + if (ret == 0) + goto out; + + fd_ctx = CALLOC (1, sizeof (afr_fd_ctx_t)); + if (!fd_ctx) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + + op_ret = -ENOMEM; + goto out; + } + + fd_ctx->child_failed = CALLOC (sizeof (*fd_ctx->child_failed), + priv->child_count); + + if (!fd_ctx->child_failed) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + + op_ret = -ENOMEM; + goto out; + } + + ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx); + if (ret < 0) { + op_ret = ret; + } + } +out: + UNLOCK (&fd->lock); + + return ret; +} + + int afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct stat *buf) { afr_local_t * local = frame->local; + int ret = 0; + + ret = afr_fd_ctx_set (this, local->fd); + + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + } AFR_STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd); @@ -717,6 +779,8 @@ afr_open_cbk (call_frame_t *frame, void *cookie, afr_local_t * local = NULL; afr_private_t * priv = NULL; + int ret = 0; + int call_count = -1; priv = this->private; @@ -743,8 +807,15 @@ afr_open_cbk (call_frame_t *frame, void *cookie, this, this->fops->ftruncate, fd, 0); } else { - AFR_STACK_UNWIND (frame, local->op_ret, - local->op_errno, local->fd); + ret = afr_fd_ctx_set (this, fd); + + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + + AFR_STACK_UNWIND (frame, local->op_ret, + local->op_errno, local->fd); + } } } @@ -955,6 +1026,34 @@ out: /* }}} */ + +int +afr_release (xlator_t *this, fd_t *fd) +{ + uint64_t ctx; + afr_fd_ctx_t * fd_ctx; + + int ret = 0; + + ret = fd_ctx_get (fd, this, &ctx); + + if (ret < 0) + goto out; + + fd_ctx = (afr_fd_ctx_t *)(long) ctx; + + if (fd_ctx) { + if (fd_ctx->child_failed) + FREE (fd_ctx->child_failed); + + FREE (fd_ctx); + } + +out: + return 0; +} + + /* {{{ fsync */ int @@ -2376,8 +2475,10 @@ struct xlator_mops mops = { struct xlator_cbks cbks = { + .release = afr_release, }; + struct volume_options options[] = { { .key = {"read-subvolume" }, .type = GF_OPTION_TYPE_XLATOR diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 76ccec2ae0b..10e50c967ca 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -394,6 +394,12 @@ typedef struct _afr_local { } afr_local_t; +typedef struct { + unsigned char pre_op_done; + unsigned char *child_failed; +} afr_fd_ctx_t; + + /* try alloc and if it fails, goto label */ #define ALLOC_OR_GOTO(var, type, label) do { \ var = CALLOC (sizeof (type), 1); \ @@ -416,6 +422,9 @@ typedef struct _afr_local { /* have we tried all children? */ #define all_tried(i, count) ((i) == (count) - 1) +int +afr_fd_ctx_set (xlator_t *this, fd_t *fd); + uint64_t afr_read_child (xlator_t *this, inode_t *inode); -- cgit