summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src/afr-transaction.c
diff options
context:
space:
mode:
authorVikas Gorur <vikas@gluster.com>2009-11-24 08:45:09 +0000
committerAnand V. Avati <avati@dev.gluster.com>2009-11-24 06:40:08 -0800
commit74612a456ad1602f8038fae79fee654eb427602a (patch)
treea8c57ae1b5919688faa00985aad3677e0df9ea1b /xlators/cluster/afr/src/afr-transaction.c
parent218959e0597b16755a98b19786ed6a42cd15cbc4 (diff)
cluster/afr: Do self-heal on reopened fds.
This patch brings in partial support for self-heal of open fds. The precondition is that the fd should have been opened successfully during the initial open() (or create()), and we assume that protocol/client has successfully reopened the fd when the subvolume comes back up. It works by doing an "up/down flush" (a dummy flush transaction to do post-op wherever necessary) and then triggering data self-heal on the file in the post-post-op hook of the dummy flush transaction. This ensures that any writes that come in during self-heal will wait until self-heal completes. The up/down flush is also done when a subvolume goes down, so that post-op is done on all subvolumes where pre-op was done. Signed-off-by: Vikas Gorur <vikas@gluster.com> Signed-off-by: Anand V. Avati <avati@dev.gluster.com> BUG: 170 (Auto-heal fails on files that are open()-ed/mmap()-ed) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=170
Diffstat (limited to 'xlators/cluster/afr/src/afr-transaction.c')
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c130
1 files changed, 112 insertions, 18 deletions
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index f7604dbf6..94f0972a1 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -134,6 +134,34 @@ out:
static void
+__mark_pre_op_done_on_fd (call_frame_t *frame, xlator_t *this, int child_index)
+{
+ afr_local_t *local = NULL;
+
+ uint64_t ctx;
+ afr_fd_ctx_t * fd_ctx = NULL;
+ int ret = 0;
+
+ local = frame->local;
+
+ ret = fd_ctx_get (local->fd, this, &ctx);
+
+ if (ret < 0)
+ goto out;
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+
+ if ((local->op == GF_FOP_WRITE)
+ || (local->op == GF_FOP_FTRUNCATE)) {
+ fd_ctx->pre_op_done[child_index] = 1;
+ }
+
+out:
+ return;
+}
+
+
+static void
__mark_down_children (int32_t *pending[], int child_count,
unsigned char *child_up, afr_transaction_type type)
{
@@ -168,10 +196,15 @@ __is_first_write_on_fd (xlator_t *this, fd_t *fd)
{
int op_ret = 0;
int _ret = -1;
+ int i = 0;
uint64_t ctx;
afr_fd_ctx_t * fd_ctx = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
LOCK (&fd->lock);
{
_ret = __fd_ctx_get (fd, this, &ctx);
@@ -185,9 +218,12 @@ __is_first_write_on_fd (xlator_t *this, fd_t *fd)
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- if (fd_ctx->pre_op_done == 0) {
- fd_ctx->pre_op_done = 1;
- op_ret = 1;
+ op_ret = 1;
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_ctx->pre_op_done[i] == 0)
+ continue;
+
+ op_ret = 0;
}
}
out:
@@ -198,7 +234,7 @@ out:
static int
-__if_fd_pre_op_done (xlator_t *this, fd_t *fd)
+__if_fd_pre_op_done (xlator_t *this, fd_t *fd, int child_index)
{
int op_ret = 0;
int _ret = -1;
@@ -216,8 +252,7 @@ __if_fd_pre_op_done (xlator_t *this, fd_t *fd)
fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- if (fd_ctx->pre_op_done) {
- fd_ctx->pre_op_done = 0;
+ if (fd_ctx->pre_op_done[child_index]) {
op_ret = 1;
}
}
@@ -229,6 +264,43 @@ out:
static int
+afr_pre_op_done_count (xlator_t *this, fd_t *fd, unsigned char *child_up)
+{
+ int i = 0;
+ int count = 0;
+
+ int _ret = 0;
+ uint64_t ctx;
+ afr_fd_ctx_t * fd_ctx = NULL;
+
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ LOCK (&fd->lock);
+ {
+ _ret = __fd_ctx_get (fd, this, &ctx);
+
+ if (_ret < 0) {
+ goto out;
+ }
+
+ fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_ctx->pre_op_done[i] && child_up[i]) {
+ count++;
+ }
+ }
+ }
+out:
+ UNLOCK (&fd->lock);
+
+ return count;
+}
+
+
+static int
__changelog_enabled (afr_private_t *priv, afr_transaction_type type)
{
int ret = 0;
@@ -326,7 +398,7 @@ __changelog_needed_post_op (call_frame_t *frame, xlator_t *this)
break;
case GF_FOP_FLUSH:
- op_ret = __if_fd_pre_op_done (this, local->fd);
+ op_ret = 1;
break;
default:
@@ -665,11 +737,15 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
dict_ref (xattr[i]);
}
- call_count = afr_up_children_count (priv->child_count, local->child_up);
+ if (local->op == GF_FOP_FLUSH) {
+ call_count = afr_pre_op_done_count (this, local->fd, local->child_up);
+ } else {
+ call_count = afr_up_children_count (priv->child_count, local->child_up);
- if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
- call_count *= 2;
- }
+ if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ call_count *= 2;
+ }
+ }
local->call_count = call_count;
@@ -696,20 +772,33 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- case AFR_FLUSH_TRANSACTION:
{
if (local->fd)
STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
+ priv->children[i],
priv->children[i]->fops->fxattrop,
- local->fd,
+ local->fd,
GF_XATTROP_ADD_ARRAY, xattr[i]);
- else
+ else
STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
+ priv->children[i],
priv->children[i]->fops->xattrop,
- &local->loc,
+ &local->loc,
GF_XATTROP_ADD_ARRAY, xattr[i]);
+ call_count--;
+ }
+ break;
+
+ case AFR_FLUSH_TRANSACTION:
+ {
+ if (__if_fd_pre_op_done (this, local->fd, i)) {
+ STACK_WIND (frame, afr_changelog_post_op_cbk,
+ priv->children[i],
+ priv->children[i]->fops->fxattrop,
+ local->fd,
+ GF_XATTROP_ADD_ARRAY, xattr[i]);
+ call_count--;
+ }
}
break;
@@ -756,11 +845,12 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
priv->children[i]->fops->xattrop,
&local->transaction.parent_loc,
GF_XATTROP_ADD_ARRAY, xattr[i]);
+ call_count--;
}
break;
}
- if (!--call_count)
+ if (!call_count)
break;
}
}
@@ -789,6 +879,10 @@ afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
LOCK (&frame->lock);
{
+ if (op_ret == 0) {
+ __mark_pre_op_done_on_fd (frame, this, child_index);
+ }
+
if (op_ret == -1) {
local->child_up[child_index] = 0;