summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--xlators/cluster/afr/src/afr-common.c220
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c2
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c12
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c10
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c4
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c69
-rw-r--r--xlators/cluster/afr/src/afr.h30
9 files changed, 236 insertions, 115 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index ca470716aee..94335bd0298 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -197,9 +197,9 @@ afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
if (!ctx)
goto unlock;
- switch (params->mask_type) {
- case AFR_ICTX_READ_CHILD_MASK:
- fresh_children = params->u.read_ctx.fresh_children;
+ switch (params->op) {
+ case AFR_INODE_GET_READ_CTX:
+ fresh_children = params->u.read_ctx.children;
read_child = (int32_t)(ctx->masks &
AFR_ICTX_READ_CHILD_MASK);
params->u.read_ctx.read_child = read_child;
@@ -208,13 +208,16 @@ afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
for (i = 0; i < priv->child_count; i++)
fresh_children[i] = ctx->fresh_children[i];
break;
- case AFR_ICTX_OPENDIR_DONE_MASK:
+ case AFR_INODE_GET_OPENDIR_DONE:
params->u.value = ctx->masks &
AFR_ICTX_OPENDIR_DONE_MASK;
break;
- case AFR_ICTX_SPLIT_BRAIN_MASK:
+ case AFR_INODE_GET_SPLIT_BRAIN:
params->u.value = ctx->masks & AFR_ICTX_SPLIT_BRAIN_MASK;
break;
+ default:
+ GF_ASSERT (0);
+ break;
}
}
unlock:
@@ -226,7 +229,7 @@ afr_is_split_brain (xlator_t *this, inode_t *inode)
{
afr_inode_params_t params = {0};
- params.mask_type = AFR_ICTX_SPLIT_BRAIN_MASK;
+ params.op = AFR_INODE_GET_SPLIT_BRAIN;
afr_inode_get_ctx (this, inode, &params);
return params.u.value;
}
@@ -236,7 +239,7 @@ afr_is_opendir_done (xlator_t *this, inode_t *inode)
{
afr_inode_params_t params = {0};
- params.mask_type = AFR_ICTX_OPENDIR_DONE_MASK;
+ params.op = AFR_INODE_GET_OPENDIR_DONE;
afr_inode_get_ctx (this, inode, &params);
return params.u.value;
}
@@ -247,26 +250,31 @@ afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
{
afr_inode_params_t params = {0};
- params.mask_type = AFR_ICTX_READ_CHILD_MASK;
- params.u.read_ctx.fresh_children = fresh_children;
+ params.op = AFR_INODE_GET_READ_CTX;
+ params.u.read_ctx.children = fresh_children;
afr_inode_get_ctx (this, inode, &params);
return params.u.read_ctx.read_child;
}
void
-afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
- int32_t *fresh_children, int32_t child_count)
+afr_inode_ctx_set_read_child (afr_inode_ctx_t *ctx, int32_t read_child)
{
- uint64_t rest_of_mask = 0;
+ uint64_t remaining_mask = 0;
uint64_t mask = 0;
- int i = 0;
- rest_of_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks);
+ GF_ASSERT (read_child >= 0);
+ remaining_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks);
mask = (AFR_ICTX_READ_CHILD_MASK & read_child);
- ctx->masks = rest_of_mask | mask;
+ ctx->masks = remaining_mask | mask;
+}
- /* avoid memcpy as int, int32_t are used interchangeably
- */
+void
+afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
+ int32_t *fresh_children, int32_t child_count)
+{
+ int i = 0;
+
+ afr_inode_ctx_set_read_child (ctx, read_child);
for (i = 0; i < child_count; i++) {
if (fresh_children)
ctx->fresh_children[i] = fresh_children[i];
@@ -276,26 +284,42 @@ afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
}
void
+afr_inode_ctx_rm_stale_children (afr_inode_ctx_t *ctx, int32_t read_child,
+ int32_t *stale_children, int32_t child_count)
+{
+ int i = 0;
+
+ GF_ASSERT (stale_children);
+ afr_inode_ctx_set_read_child (ctx, read_child);
+ for (i = 0; i < child_count; i++) {
+ if ((ctx->fresh_children[i] == -1) || (stale_children[i] == -1))
+ break;
+ afr_children_rm_child (ctx->fresh_children,
+ stale_children[i], child_count);
+ }
+}
+
+void
afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)
{
- uint64_t rest_of_mask = 0;
+ uint64_t remaining_mask = 0;
uint64_t mask = 0;
- rest_of_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks);
+ remaining_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks);
mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
- ctx->masks = rest_of_mask | mask;
+ ctx->masks = remaining_mask | mask;
}
void
afr_inode_ctx_set_splitbrain (afr_inode_ctx_t *ctx, gf_boolean_t set)
{
- uint64_t rest_of_mask = 0;
+ uint64_t remaining_mask = 0;
uint64_t mask = 0;
if (set) {
- rest_of_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
+ remaining_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK);
- ctx->masks = rest_of_mask | mask;
+ ctx->masks = remaining_mask | mask;
} else {
ctx->masks = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
}
@@ -314,6 +338,7 @@ afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
gf_boolean_t set = _gf_false;
int32_t read_child = -1;
int32_t *fresh_children = NULL;
+ int32_t *stale_children = NULL;
priv = this->private;
LOCK (&inode->lock);
@@ -324,21 +349,31 @@ afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
if (!ctx)
goto unlock;
- switch (params->mask_type) {
- case AFR_ICTX_READ_CHILD_MASK:
+ switch (params->op) {
+ case AFR_INODE_SET_READ_CTX:
read_child = params->u.read_ctx.read_child;
- fresh_children = params->u.read_ctx.fresh_children;
+ fresh_children = params->u.read_ctx.children;
afr_inode_ctx_set_read_ctx (ctx, read_child,
fresh_children,
priv->child_count);
break;
- case AFR_ICTX_OPENDIR_DONE_MASK:
+ case AFR_INODE_RM_STALE_CHILDREN:
+ read_child = params->u.read_ctx.read_child;
+ stale_children = params->u.read_ctx.children;
+ afr_inode_ctx_rm_stale_children (ctx, read_child,
+ stale_children,
+ priv->child_count);
+ break;
+ case AFR_INODE_SET_OPENDIR_DONE:
afr_inode_ctx_set_opendir_done (ctx);
break;
- case AFR_ICTX_SPLIT_BRAIN_MASK:
+ case AFR_INODE_SET_SPLIT_BRAIN:
set = params->u.value;
afr_inode_ctx_set_splitbrain (ctx, set);
break;
+ default:
+ GF_ASSERT (0);
+ break;
}
ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
if (ret) {
@@ -356,7 +391,7 @@ afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set)
{
afr_inode_params_t params = {0};
- params.mask_type = AFR_ICTX_SPLIT_BRAIN_MASK;
+ params.op = AFR_INODE_SET_SPLIT_BRAIN;
params.u.value = set;
afr_inode_set_ctx (this, inode, &params);
}
@@ -366,7 +401,7 @@ afr_set_opendir_done (xlator_t *this, inode_t *inode)
{
afr_inode_params_t params = {0};
- params.mask_type = AFR_ICTX_OPENDIR_DONE_MASK;
+ params.op = AFR_INODE_SET_OPENDIR_DONE;
afr_inode_set_ctx (this, inode, &params);
}
@@ -375,13 +410,34 @@ afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
int32_t *fresh_children)
{
afr_inode_params_t params = {0};
+ afr_private_t *priv = NULL;
+ priv = this->private;
GF_ASSERT (read_child >= 0);
GF_ASSERT (fresh_children);
+ GF_ASSERT (afr_is_child_present (fresh_children, priv->child_count,
+ read_child));
+
+ params.op = AFR_INODE_SET_READ_CTX;
+ params.u.read_ctx.read_child = read_child;
+ params.u.read_ctx.children = fresh_children;
+ afr_inode_set_ctx (this, inode, &params);
+}
+
+void
+afr_inode_rm_stale_children (xlator_t *this, inode_t *inode, int32_t read_child,
+ int32_t *stale_children)
+{
+ afr_inode_params_t params = {0};
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT (read_child >= 0);
+ GF_ASSERT (stale_children);
- params.mask_type = AFR_ICTX_READ_CHILD_MASK;
+ params.op = AFR_INODE_RM_STALE_CHILDREN;
params.u.read_ctx.read_child = read_child;
- params.u.read_ctx.fresh_children = fresh_children;
+ params.u.read_ctx.children = stale_children;
afr_inode_set_ctx (this, inode, &params);
}
@@ -885,13 +941,14 @@ afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
uuid_copy (loc->pargfid, postparent->ia_gfid);
}
-void
+int
afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
{
int32_t read_child = -1;
struct iatt *buf = NULL;
struct iatt *postparent = NULL;
dict_t **xattr = NULL;
+ int ret = 0;
GF_ASSERT (local);
@@ -901,6 +958,12 @@ afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
read_child = afr_inode_get_read_ctx (this, local->cont.lookup.inode,
NULL);
+ if (read_child < 0) {
+ ret = -EIO;
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d",
read_child);
*xattr = dict_ref (local->cont.lookup.xattrs[read_child]);
@@ -911,6 +974,8 @@ afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
/* fix for RT #602 */
local->cont.lookup.inode->ia_type = buf->ia_type;
}
+out:
+ return ret;
}
static void
@@ -1101,9 +1166,9 @@ out:
}
static inline gf_boolean_t
-afr_is_self_heal_running (afr_local_t *local)
+afr_is_transaction_running (afr_local_t *local)
{
- GF_ASSERT (local);
+ GF_ASSERT (local->fop == GF_FOP_LOOKUP);
return ((local->inodelk_count > 0) || (local->entrylk_count > 0));
}
@@ -1397,7 +1462,7 @@ afr_lookup_perform_self_heal_if_needed (call_frame_t *frame, xlator_t *this,
afr_lookup_set_self_heal_data (local, this);
if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
- if (afr_is_self_heal_running (local))
+ if (afr_is_transaction_running (local))
goto out;
afr_launch_self_heal (frame, this, local->cont.lookup.inode,
@@ -1478,21 +1543,25 @@ afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
goto out;
}
- ret = afr_lookup_select_read_child (local, this, &read_child);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
- }
+ if (!afr_is_transaction_running (local)) {
+ ret = afr_lookup_select_read_child (local, this, &read_child);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
- ret = afr_lookup_set_read_ctx (local, this, read_child);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
+ ret = afr_lookup_set_read_ctx (local, this, read_child);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
}
- afr_lookup_build_response_params (local, this);
+ ret = afr_lookup_build_response_params (local, this);
+ if (ret)
+ goto out;
if (afr_is_fresh_lookup (&local->loc, this)) {
afr_update_loc_gfids (&local->loc,
&local->cont.lookup.buf,
@@ -1740,12 +1809,12 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
goto out;
local->cont.lookup.bufs = iatts;
- success_children = afr_fresh_children_create (child_count);
+ success_children = afr_children_create (child_count);
if (NULL == success_children)
goto out;
local->cont.lookup.success_children = success_children;
- local->fresh_children = afr_fresh_children_create (child_count);
+ local->fresh_children = afr_children_create (child_count);
if (NULL == local->fresh_children)
goto out;
@@ -1774,6 +1843,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
local->op_ret = -1;
frame->local = local;
+ local->fop = GF_FOP_LOOKUP;
if (!strcmp (loc->path, "/" GF_REPLICATE_TRASH_DIR)) {
op_errno = ENOENT;
@@ -3493,7 +3563,7 @@ afr_transaction_local_init (afr_local_t *local, afr_private_t *priv)
if (!local->pending)
goto out;
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children)
goto out;
@@ -3525,52 +3595,72 @@ afr_reset_children (int32_t *fresh_children, int32_t child_count)
}
int32_t*
-afr_fresh_children_create (int32_t child_count)
+afr_children_create (int32_t child_count)
{
- int32_t *fresh_children = NULL;
+ int32_t *children = NULL;
int i = 0;
GF_ASSERT (child_count > 0);
- fresh_children = GF_CALLOC (child_count, sizeof (*fresh_children),
- gf_afr_mt_int32_t);
- if (NULL == fresh_children)
+ children = GF_CALLOC (child_count, sizeof (*children),
+ gf_afr_mt_int32_t);
+ if (NULL == children)
goto out;
for (i = 0; i < child_count; i++)
- fresh_children[i] = -1;
+ children[i] = -1;
out:
- return fresh_children;
+ return children;
}
void
-afr_fresh_children_add_child (int32_t *fresh_children, int32_t child,
- int32_t child_count)
+afr_children_add_child (int32_t *children, int32_t child,
+ int32_t child_count)
{
gf_boolean_t child_found = _gf_false;
int i = 0;
for (i = 0; i < child_count; i++) {
- if (fresh_children[i] == -1)
+ if (children[i] == -1)
break;
- if (fresh_children[i] == child) {
+ if (children[i] == child) {
child_found = _gf_true;
break;
}
}
+
if (!child_found) {
GF_ASSERT (i < child_count);
- fresh_children[i] = child;
+ children[i] = child;
+ }
+}
+
+void
+afr_children_rm_child (int32_t *children, int32_t child, int32_t child_count)
+{
+ int i = 0;
+
+ GF_ASSERT ((child >= 0) && (child < child_count));
+ for (i = 0; i < child_count; i++) {
+ if (children[i] == -1)
+ break;
+ if (children[i] == child) {
+ if (i != (child_count - 1))
+ memmove (children + i, children + i + 1,
+ sizeof (*children)*(child_count - i - 1));
+ children[child_count - 1] = -1;
+ break;
+ }
}
}
int
-afr_get_children_count (int32_t *fresh_children, unsigned int child_count)
+afr_get_children_count (int32_t *children, unsigned int child_count)
{
int count = 0;
int i = 0;
for (i = 0; i < child_count; i++) {
- if (fresh_children[i] == -1)
+ if (children[i] == -1)
break;
count++;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 0fac7324c78..645da2a6c57 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -668,7 +668,7 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this,
goto out;
}
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 7d7cc3d80c8..f8157482758 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -136,7 +136,7 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
goto out;
}
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
@@ -254,7 +254,7 @@ afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
goto out;
}
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
@@ -375,7 +375,7 @@ afr_fstat (call_frame_t *frame, xlator_t *this,
goto out;
}
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
@@ -495,7 +495,7 @@ afr_readlink (call_frame_t *frame, xlator_t *this,
goto out;
}
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
@@ -879,7 +879,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
}
}
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
@@ -1013,7 +1013,7 @@ afr_readv (call_frame_t *frame, xlator_t *this,
goto out;
}
- local->fresh_children = afr_fresh_children_create (priv->child_count);
+ local->fresh_children = afr_children_create (priv->child_count);
if (!local->fresh_children) {
op_errno = ENOMEM;
goto out;
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 15b659fa837..f66bdff8446 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1468,8 +1468,8 @@ afr_sh_purge_stale_entry (call_frame_t *frame, xlator_t *this)
sh->entrybuf.ia_gfid)))
continue;
- afr_fresh_children_add_child (sh->fresh_children,
- i, priv->child_count);
+ afr_children_add_child (sh->fresh_children, i,
+ priv->child_count);
}
afr_sh_purge_entry_common (frame, this,
@@ -2094,9 +2094,9 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
priv->child_count,
gf_afr_mt_int32_t);
}
- sh->success_children = afr_fresh_children_create (priv->child_count);
- sh->fresh_children = afr_fresh_children_create (priv->child_count);
- sh->fresh_parent_dirs = afr_fresh_children_create (priv->child_count);
+ sh->success_children = afr_children_create (priv->child_count);
+ sh->fresh_children = afr_children_create (priv->child_count);
+ sh->fresh_parent_dirs = afr_children_create (priv->child_count);
FRAME_SU_DO (sh_frame, afr_local_t);
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 74a1bf35371..dcaad9c8b47 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -310,7 +310,7 @@ afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
sh = &local->self_heal;
i = (long)cookie;
- afr_fresh_children_add_child (sh->fresh_children, i, priv->child_count);
+ afr_children_add_child (sh->fresh_children, i, priv->child_count);
call_count = afr_frame_return (frame);
if (call_count == 0) {
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 556ea8027a3..9e80cb3d5a5 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -130,7 +130,7 @@ afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,
i = (long)cookie;
- afr_fresh_children_add_child (sh->fresh_children, i, priv->child_count);
+ afr_children_add_child (sh->fresh_children, i, priv->child_count);
if (op_ret == -1) {
gf_log (this->name, GF_LOG_INFO,
"%s: failed to erase pending xattrs on %s (%s)",
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index bacf2758880..5445132ab8c 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -160,8 +160,8 @@ afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
(!IA_ISDIR (sh->buf[sh->source].ia_type))) {
- afr_fresh_children_add_child (sh->fresh_children, i,
- priv->child_count);
+ afr_children_add_child (sh->fresh_children, i,
+ priv->child_count);
}
call_count = afr_frame_return (frame);
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 1fb0781d8b7..fc030433b69 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -404,53 +404,67 @@ afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
void
-afr_update_read_child (call_frame_t *frame, xlator_t *this, inode_t *inode,
- afr_transaction_type type)
+afr_transaction_rm_stale_children (call_frame_t *frame, xlator_t *this,
+ inode_t *inode, afr_transaction_type type)
{
- int curr_read_child = -1;
- int new_read_child = -1;
+ int i = -1;
+ int count = 0;
+ int read_child = -1;
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
int **pending = NULL;
int idx = 0;
+ int32_t *stale_children = NULL;
int32_t *fresh_children = NULL;
- size_t success_count = 0;
+ gf_boolean_t rm_stale_children = _gf_false;
idx = afr_index_for_transaction_type (type);
priv = this->private;
local = frame->local;
- curr_read_child = afr_inode_get_read_ctx (this, inode, NULL);
pending = local->pending;
- GF_ASSERT (curr_read_child >= 0);
-
- if (pending[curr_read_child][idx] != 0)
+ stale_children = afr_children_create (priv->child_count);
+ if (!stale_children)
goto out;
- fresh_children = afr_fresh_children_create (priv->child_count);
- if (!fresh_children)
- goto out;
+ fresh_children = local->fresh_children;
+ read_child = afr_inode_get_read_ctx (this, inode, fresh_children);
- for (new_read_child = 0; new_read_child < priv->child_count;
- new_read_child++) {
+ GF_ASSERT (read_child >= 0);
- if (!priv->child_up[new_read_child])
- /* child is down */
- continue;
+ if (pending[read_child][idx] == 0)
+ read_child = -1;
- if (pending[new_read_child][idx] == 0)
- /* op just failed */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!afr_is_child_present (fresh_children,
+ priv->child_count, i))
continue;
- fresh_children[success_count] = new_read_child;
- success_count++;
+ if ((!priv->child_up[i]) || (pending[i][idx] == 0)) {
+ /* child is down or op failed on it */
+ rm_stale_children = _gf_true;
+ afr_children_rm_child (fresh_children, i,
+ priv->child_count);
+ stale_children[count++] = i;
+ }
+ }
+
+ if (!rm_stale_children) {
+ GF_ASSERT (read_child >= 0);
+ goto out;
+ }
+
+ if (fresh_children[0] == -1) {
+ //All children failed. leave as-is
+ goto out;
}
- afr_inode_set_read_ctx (this, inode, fresh_children[0],
- fresh_children);
+ if (read_child == -1)
+ read_child = fresh_children[0];
+ afr_inode_rm_stale_children (this, inode, read_child, stale_children);
out:
- if (fresh_children)
- GF_FREE (fresh_children);
+ if (stale_children)
+ GF_FREE (stale_children);
return;
}
@@ -478,8 +492,9 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
local->child_up, local->transaction.type);
if (local->fd)
- afr_update_read_child (frame, this, local->fd->inode,
- local->transaction.type);
+ afr_transaction_rm_stale_children (frame, this,
+ local->fd->inode,
+ local->transaction.type);
xattr = alloca (priv->child_count * sizeof (*xattr));
memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 2ca13078ef7..236a24a6057 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -48,13 +48,23 @@ typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this);
typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
+typedef enum {
+ AFR_INODE_SET_READ_CTX = 1,
+ AFR_INODE_RM_STALE_CHILDREN,
+ AFR_INODE_SET_OPENDIR_DONE,
+ AFR_INODE_SET_SPLIT_BRAIN,
+ AFR_INODE_GET_READ_CTX,
+ AFR_INODE_GET_OPENDIR_DONE,
+ AFR_INODE_GET_SPLIT_BRAIN,
+} afr_inode_op_t;
+
typedef struct afr_inode_params_ {
- uint64_t mask_type;
+ afr_inode_op_t op;
union {
gf_boolean_t value;
struct {
int32_t read_child;
- int32_t *fresh_children;
+ int32_t *children;
} read_ctx;
} u;
} afr_inode_params_t;
@@ -869,7 +879,7 @@ afr_marker_getxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, const char *name,afr_local_t *local, afr_private_t *priv );
int32_t *
-afr_fresh_children_create (int32_t child_count);
+afr_children_create (int32_t child_count);
int
AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv);
@@ -902,19 +912,22 @@ afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
int32_t read_child);
void
afr_get_fresh_children (int32_t *success_children, int32_t *sources,
- int32_t *fresh_children, unsigned int child_count);
+ int32_t *children, unsigned int child_count);
void
-afr_fresh_children_add_child (int32_t *fresh_children, int32_t child,
+afr_children_add_child (int32_t *children, int32_t child,
int32_t child_count);
void
-afr_reset_children (int32_t *fresh_children, int32_t child_count);
+afr_children_rm_child (int32_t *children, int32_t child,
+ int32_t child_count);
+void
+afr_reset_children (int32_t *children, int32_t child_count);
gf_boolean_t
afr_error_more_important (int32_t old_errno, int32_t new_errno);
int
afr_errno_count (int32_t *children, int *child_errno,
unsigned int child_count, int32_t op_errno);
int
-afr_get_children_count (int32_t *fresh_children, unsigned int child_count);
+afr_get_children_count (int32_t *children, unsigned int child_count);
gf_boolean_t
afr_is_child_present (int32_t *success_children, int32_t child_count,
int32_t child);
@@ -941,4 +954,7 @@ afr_transaction_type_get (ia_type_t ia_type);
int32_t
afr_resultant_errno_get (int32_t *children,
int *child_errno, unsigned int child_count);
+void
+afr_inode_rm_stale_children (xlator_t *this, inode_t *inode, int32_t read_child,
+ int32_t *stale_children);
#endif /* __AFR_H__ */