summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnand Avati <avati@gluster.com>2010-09-29 06:53:03 +0000
committerVijay Bellur <vijay@dev.gluster.com>2010-09-29 06:34:35 -0700
commitb8c42e5cef817bdd0270b1991c0a365aa321f7cc (patch)
tree629473e5e85a810f8518bee099472a655589ae06
parent87697e7ed88ca650fe70e26d59b67254ed2393f9 (diff)
replicate: keep read_child in inode ctx as up-to-date as possible
In every transaction check if the currently set read child in the inode context failed in the fop and set it to another subvol on which the latest fop has passed. This will prevent read fops landing on subvols which have witnessed a failure. Signed-off-by: Anand V. Avati <avati@amp.gluster.com> Signed-off-by: Vijay Bellur <vijay@dev.gluster.com> BUG: 1172 (ls -lh on NFS mount of 2-mirror replicate gives incorrect file size) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=1172
-rw-r--r--xlators/cluster/afr/src/afr-common.c3
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c50
-rw-r--r--xlators/cluster/afr/src/afr.h1
3 files changed, 50 insertions, 4 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index b5f060a8730..01341526097 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1433,9 +1433,6 @@ afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- gf_log (this->name, GF_LOG_TRACE,
- "hits=%d, miss=%d", fd_ctx->hit, fd_ctx->miss);
-
if (fd_ctx) {
if (fd_ctx->pre_op_done)
GF_FREE (fd_ctx->pre_op_done);
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index e31f0c1df31..3cdd1829d91 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -404,7 +404,51 @@ afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
-int
+void
+afr_update_read_child (call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_transaction_type type)
+{
+ int curr_read_child = -1;
+ int new_read_child = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int **pending = NULL;
+ int idx = 0;
+
+ idx = afr_index_for_transaction_type (type);
+
+ priv = this->private;
+ local = frame->local;
+ curr_read_child = afr_read_child (this, inode);
+ pending = local->pending;
+
+ if (pending[curr_read_child][idx] != 0)
+ return;
+
+ /* need to set new read_child */
+ for (new_read_child = 0; new_read_child < priv->child_count;
+ new_read_child++) {
+
+ if (!priv->child_up[new_read_child])
+ /* child is down */
+ continue;
+
+ if (pending[new_read_child][idx] == 0)
+ /* op just failed */
+ continue;
+
+ break;
+ }
+
+ if (new_read_child == priv->child_count)
+ /* all children uneligible. leave as-is */
+ return;
+
+ afr_set_read_child (this, inode, new_read_child);
+}
+
+
+int
afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
{
afr_private_t * priv = this->private;
@@ -426,6 +470,10 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
__mark_down_children (local->pending, priv->child_count,
local->child_up, local->transaction.type);
+ if (local->fd)
+ afr_update_read_child (frame, this, local->fd->inode,
+ local->transaction.type);
+
xattr = alloca (priv->child_count * sizeof (*xattr));
memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
for (i = 0; i < priv->child_count; i++) {
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 749264a8d65..e64a6fe1529 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -609,6 +609,7 @@ typedef struct {
unsigned int *pre_op_done;
unsigned int *opened_on; /* which subvolumes the fd is open on */
unsigned int *pre_op_piggyback;
+
int flags;
int32_t wbflags;
uint64_t up_count; /* number of CHILD_UPs this fd has seen */