From b8c42e5cef817bdd0270b1991c0a365aa321f7cc Mon Sep 17 00:00:00 2001 From: Anand Avati Date: Wed, 29 Sep 2010 06:53:03 +0000 Subject: replicate: keep read_child in inode ctx as up-to-date as possible In every transaction check if the currently set read child in the inode context failed in the fop and set it to another subvol on which the latest fop has passed. This will prevent read fops landing on subvols which have witnessed a failure. Signed-off-by: Anand V. Avati Signed-off-by: Vijay Bellur BUG: 1172 (ls -lh on NFS mount of 2-mirror replicate gives incorrect file size) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=1172 --- xlators/cluster/afr/src/afr-common.c | 3 -- xlators/cluster/afr/src/afr-transaction.c | 50 ++++++++++++++++++++++++++++++- xlators/cluster/afr/src/afr.h | 1 + 3 files changed, 50 insertions(+), 4 deletions(-) diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index b5f060a8730..01341526097 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1433,9 +1433,6 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd) fd_ctx = (afr_fd_ctx_t *)(long) ctx; - gf_log (this->name, GF_LOG_TRACE, - "hits=%d, miss=%d", fd_ctx->hit, fd_ctx->miss); - if (fd_ctx) { if (fd_ctx->pre_op_done) GF_FREE (fd_ctx->pre_op_done); diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index e31f0c1df31..3cdd1829d91 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -404,7 +404,51 @@ afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } -int +void +afr_update_read_child (call_frame_t *frame, xlator_t *this, inode_t *inode, + afr_transaction_type type) +{ + int curr_read_child = -1; + int new_read_child = -1; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int **pending = NULL; + int idx = 0; + + idx = afr_index_for_transaction_type (type); + + priv = this->private; + local = frame->local; + curr_read_child = afr_read_child (this, inode); + pending = local->pending; + + if (pending[curr_read_child][idx] != 0) + return; + + /* need to set new read_child */ + for (new_read_child = 0; new_read_child < priv->child_count; + new_read_child++) { + + if (!priv->child_up[new_read_child]) + /* child is down */ + continue; + + if (pending[new_read_child][idx] == 0) + /* op just failed */ + continue; + + break; + } + + if (new_read_child == priv->child_count) + /* all children uneligible. leave as-is */ + return; + + afr_set_read_child (this, inode, new_read_child); +} + + +int afr_changelog_post_op (call_frame_t *frame, xlator_t *this) { afr_private_t * priv = this->private; @@ -426,6 +470,10 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this) __mark_down_children (local->pending, priv->child_count, local->child_up, local->transaction.type); + if (local->fd) + afr_update_read_child (frame, this, local->fd->inode, + local->transaction.type); + xattr = alloca (priv->child_count * sizeof (*xattr)); memset (xattr, 0, (priv->child_count * sizeof (*xattr))); for (i = 0; i < priv->child_count; i++) { diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 749264a8d65..e64a6fe1529 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -609,6 +609,7 @@ typedef struct { unsigned int *pre_op_done; unsigned int *opened_on; /* which subvolumes the fd is open on */ unsigned int *pre_op_piggyback; + int flags; int32_t wbflags; uint64_t up_count; /* number of CHILD_UPs this fd has seen */ -- cgit