diff options
author | Anand Avati <avati@gluster.com> | 2010-09-29 06:53:03 +0000 |
---|---|---|
committer | Vijay Bellur <vijay@dev.gluster.com> | 2010-09-29 06:34:35 -0700 |
commit | b8c42e5cef817bdd0270b1991c0a365aa321f7cc (patch) | |
tree | 629473e5e85a810f8518bee099472a655589ae06 /xlators/cluster | |
parent | 87697e7ed88ca650fe70e26d59b67254ed2393f9 (diff) |
replicate: keep read_child in inode ctx as up-to-date as possible
In every transaction check if the currently set read child in the
inode context failed in the fop and set it to another subvol on
which the latest fop has passed. This will prevent read fops landing
on subvols which have witnessed a failure.
Signed-off-by: Anand V. Avati <avati@amp.gluster.com>
Signed-off-by: Vijay Bellur <vijay@dev.gluster.com>
BUG: 1172 (ls -lh on NFS mount of 2-mirror replicate gives incorrect file size)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=1172
Diffstat (limited to 'xlators/cluster')
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 3 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 50 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 1 |
3 files changed, 50 insertions, 4 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index b5f060a8730..01341526097 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1433,9 +1433,6 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd) fd_ctx = (afr_fd_ctx_t *)(long) ctx; - gf_log (this->name, GF_LOG_TRACE, - "hits=%d, miss=%d", fd_ctx->hit, fd_ctx->miss); - if (fd_ctx) { if (fd_ctx->pre_op_done) GF_FREE (fd_ctx->pre_op_done); diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index e31f0c1df31..3cdd1829d91 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -404,7 +404,51 @@ afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } -int +void +afr_update_read_child (call_frame_t *frame, xlator_t *this, inode_t *inode, + afr_transaction_type type) +{ + int curr_read_child = -1; + int new_read_child = -1; + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + int **pending = NULL; + int idx = 0; + + idx = afr_index_for_transaction_type (type); + + priv = this->private; + local = frame->local; + curr_read_child = afr_read_child (this, inode); + pending = local->pending; + + if (pending[curr_read_child][idx] != 0) + return; + + /* need to set new read_child */ + for (new_read_child = 0; new_read_child < priv->child_count; + new_read_child++) { + + if (!priv->child_up[new_read_child]) + /* child is down */ + continue; + + if (pending[new_read_child][idx] == 0) + /* op just failed */ + continue; + + break; + } + + if (new_read_child == priv->child_count) + /* all children uneligible. leave as-is */ + return; + + afr_set_read_child (this, inode, new_read_child); +} + + +int afr_changelog_post_op (call_frame_t *frame, xlator_t *this) { afr_private_t * priv = this->private; @@ -426,6 +470,10 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this) __mark_down_children (local->pending, priv->child_count, local->child_up, local->transaction.type); + if (local->fd) + afr_update_read_child (frame, this, local->fd->inode, + local->transaction.type); + xattr = alloca (priv->child_count * sizeof (*xattr)); memset (xattr, 0, (priv->child_count * sizeof (*xattr))); for (i = 0; i < priv->child_count; i++) { diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 749264a8d65..e64a6fe1529 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -609,6 +609,7 @@ typedef struct { unsigned int *pre_op_done; unsigned int *opened_on; /* which subvolumes the fd is open on */ unsigned int *pre_op_piggyback; + int flags; int32_t wbflags; uint64_t up_count; /* number of CHILD_UPs this fd has seen */ |