diff options
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 220 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-dir-read.c | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-inode-read.c | 12 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 10 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-entry.c | 2 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 4 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 69 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 30 |
9 files changed, 236 insertions, 115 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index ca470716aee..94335bd0298 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -197,9 +197,9 @@ afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params) ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count); if (!ctx) goto unlock; - switch (params->mask_type) { - case AFR_ICTX_READ_CHILD_MASK: - fresh_children = params->u.read_ctx.fresh_children; + switch (params->op) { + case AFR_INODE_GET_READ_CTX: + fresh_children = params->u.read_ctx.children; read_child = (int32_t)(ctx->masks & AFR_ICTX_READ_CHILD_MASK); params->u.read_ctx.read_child = read_child; @@ -208,13 +208,16 @@ afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params) for (i = 0; i < priv->child_count; i++) fresh_children[i] = ctx->fresh_children[i]; break; - case AFR_ICTX_OPENDIR_DONE_MASK: + case AFR_INODE_GET_OPENDIR_DONE: params->u.value = ctx->masks & AFR_ICTX_OPENDIR_DONE_MASK; break; - case AFR_ICTX_SPLIT_BRAIN_MASK: + case AFR_INODE_GET_SPLIT_BRAIN: params->u.value = ctx->masks & AFR_ICTX_SPLIT_BRAIN_MASK; break; + default: + GF_ASSERT (0); + break; } } unlock: @@ -226,7 +229,7 @@ afr_is_split_brain (xlator_t *this, inode_t *inode) { afr_inode_params_t params = {0}; - params.mask_type = AFR_ICTX_SPLIT_BRAIN_MASK; + params.op = AFR_INODE_GET_SPLIT_BRAIN; afr_inode_get_ctx (this, inode, ¶ms); return params.u.value; } @@ -236,7 +239,7 @@ afr_is_opendir_done (xlator_t *this, inode_t *inode) { afr_inode_params_t params = {0}; - params.mask_type = AFR_ICTX_OPENDIR_DONE_MASK; + params.op = AFR_INODE_GET_OPENDIR_DONE; afr_inode_get_ctx (this, inode, ¶ms); return params.u.value; } @@ -247,26 +250,31 @@ afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children) { afr_inode_params_t params = {0}; - params.mask_type = AFR_ICTX_READ_CHILD_MASK; - params.u.read_ctx.fresh_children = fresh_children; + params.op = AFR_INODE_GET_READ_CTX; + params.u.read_ctx.children = fresh_children; afr_inode_get_ctx (this, inode, ¶ms); return params.u.read_ctx.read_child; } void -afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child, - int32_t *fresh_children, int32_t child_count) +afr_inode_ctx_set_read_child (afr_inode_ctx_t *ctx, int32_t read_child) { - uint64_t rest_of_mask = 0; + uint64_t remaining_mask = 0; uint64_t mask = 0; - int i = 0; - rest_of_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks); + GF_ASSERT (read_child >= 0); + remaining_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks); mask = (AFR_ICTX_READ_CHILD_MASK & read_child); - ctx->masks = rest_of_mask | mask; + ctx->masks = remaining_mask | mask; +} - /* avoid memcpy as int, int32_t are used interchangeably - */ +void +afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child, + int32_t *fresh_children, int32_t child_count) +{ + int i = 0; + + afr_inode_ctx_set_read_child (ctx, read_child); for (i = 0; i < child_count; i++) { if (fresh_children) ctx->fresh_children[i] = fresh_children[i]; @@ -276,26 +284,42 @@ afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child, } void +afr_inode_ctx_rm_stale_children (afr_inode_ctx_t *ctx, int32_t read_child, + int32_t *stale_children, int32_t child_count) +{ + int i = 0; + + GF_ASSERT (stale_children); + afr_inode_ctx_set_read_child (ctx, read_child); + for (i = 0; i < child_count; i++) { + if ((ctx->fresh_children[i] == -1) || (stale_children[i] == -1)) + break; + afr_children_rm_child (ctx->fresh_children, + stale_children[i], child_count); + } +} + +void afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx) { - uint64_t rest_of_mask = 0; + uint64_t remaining_mask = 0; uint64_t mask = 0; - rest_of_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks); + remaining_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks); mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK); - ctx->masks = rest_of_mask | mask; + ctx->masks = remaining_mask | mask; } void afr_inode_ctx_set_splitbrain (afr_inode_ctx_t *ctx, gf_boolean_t set) { - uint64_t rest_of_mask = 0; + uint64_t remaining_mask = 0; uint64_t mask = 0; if (set) { - rest_of_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks); + remaining_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks); mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK); - ctx->masks = rest_of_mask | mask; + ctx->masks = remaining_mask | mask; } else { ctx->masks = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks); } @@ -314,6 +338,7 @@ afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params) gf_boolean_t set = _gf_false; int32_t read_child = -1; int32_t *fresh_children = NULL; + int32_t *stale_children = NULL; priv = this->private; LOCK (&inode->lock); @@ -324,21 +349,31 @@ afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params) ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count); if (!ctx) goto unlock; - switch (params->mask_type) { - case AFR_ICTX_READ_CHILD_MASK: + switch (params->op) { + case AFR_INODE_SET_READ_CTX: read_child = params->u.read_ctx.read_child; - fresh_children = params->u.read_ctx.fresh_children; + fresh_children = params->u.read_ctx.children; afr_inode_ctx_set_read_ctx (ctx, read_child, fresh_children, priv->child_count); break; - case AFR_ICTX_OPENDIR_DONE_MASK: + case AFR_INODE_RM_STALE_CHILDREN: + read_child = params->u.read_ctx.read_child; + stale_children = params->u.read_ctx.children; + afr_inode_ctx_rm_stale_children (ctx, read_child, + stale_children, + priv->child_count); + break; + case AFR_INODE_SET_OPENDIR_DONE: afr_inode_ctx_set_opendir_done (ctx); break; - case AFR_ICTX_SPLIT_BRAIN_MASK: + case AFR_INODE_SET_SPLIT_BRAIN: set = params->u.value; afr_inode_ctx_set_splitbrain (ctx, set); break; + default: + GF_ASSERT (0); + break; } ret = __inode_ctx_put (inode, this, (uint64_t)ctx); if (ret) { @@ -356,7 +391,7 @@ afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set) { afr_inode_params_t params = {0}; - params.mask_type = AFR_ICTX_SPLIT_BRAIN_MASK; + params.op = AFR_INODE_SET_SPLIT_BRAIN; params.u.value = set; afr_inode_set_ctx (this, inode, ¶ms); } @@ -366,7 +401,7 @@ afr_set_opendir_done (xlator_t *this, inode_t *inode) { afr_inode_params_t params = {0}; - params.mask_type = AFR_ICTX_OPENDIR_DONE_MASK; + params.op = AFR_INODE_SET_OPENDIR_DONE; afr_inode_set_ctx (this, inode, ¶ms); } @@ -375,13 +410,34 @@ afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child, int32_t *fresh_children) { afr_inode_params_t params = {0}; + afr_private_t *priv = NULL; + priv = this->private; GF_ASSERT (read_child >= 0); GF_ASSERT (fresh_children); + GF_ASSERT (afr_is_child_present (fresh_children, priv->child_count, + read_child)); + + params.op = AFR_INODE_SET_READ_CTX; + params.u.read_ctx.read_child = read_child; + params.u.read_ctx.children = fresh_children; + afr_inode_set_ctx (this, inode, ¶ms); +} + +void +afr_inode_rm_stale_children (xlator_t *this, inode_t *inode, int32_t read_child, + int32_t *stale_children) +{ + afr_inode_params_t params = {0}; + afr_private_t *priv = NULL; + + priv = this->private; + GF_ASSERT (read_child >= 0); + GF_ASSERT (stale_children); - params.mask_type = AFR_ICTX_READ_CHILD_MASK; + params.op = AFR_INODE_RM_STALE_CHILDREN; params.u.read_ctx.read_child = read_child; - params.u.read_ctx.fresh_children = fresh_children; + params.u.read_ctx.children = stale_children; afr_inode_set_ctx (this, inode, ¶ms); } @@ -885,13 +941,14 @@ afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent) uuid_copy (loc->pargfid, postparent->ia_gfid); } -void +int afr_lookup_build_response_params (afr_local_t *local, xlator_t *this) { int32_t read_child = -1; struct iatt *buf = NULL; struct iatt *postparent = NULL; dict_t **xattr = NULL; + int ret = 0; GF_ASSERT (local); @@ -901,6 +958,12 @@ afr_lookup_build_response_params (afr_local_t *local, xlator_t *this) read_child = afr_inode_get_read_ctx (this, local->cont.lookup.inode, NULL); + if (read_child < 0) { + ret = -EIO; + local->op_ret = -1; + local->op_errno = EIO; + goto out; + } gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d", read_child); *xattr = dict_ref (local->cont.lookup.xattrs[read_child]); @@ -911,6 +974,8 @@ afr_lookup_build_response_params (afr_local_t *local, xlator_t *this) /* fix for RT #602 */ local->cont.lookup.inode->ia_type = buf->ia_type; } +out: + return ret; } static void @@ -1101,9 +1166,9 @@ out: } static inline gf_boolean_t -afr_is_self_heal_running (afr_local_t *local) +afr_is_transaction_running (afr_local_t *local) { - GF_ASSERT (local); + GF_ASSERT (local->fop == GF_FOP_LOOKUP); return ((local->inodelk_count > 0) || (local->entrylk_count > 0)); } @@ -1397,7 +1462,7 @@ afr_lookup_perform_self_heal_if_needed (call_frame_t *frame, xlator_t *this, afr_lookup_set_self_heal_data (local, this); if (afr_can_self_heal_proceed (&local->self_heal, priv)) { - if (afr_is_self_heal_running (local)) + if (afr_is_transaction_running (local)) goto out; afr_launch_self_heal (frame, this, local->cont.lookup.inode, @@ -1478,21 +1543,25 @@ afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this, goto out; } - ret = afr_lookup_select_read_child (local, this, &read_child); - if (ret) { - local->op_ret = -1; - local->op_errno = EIO; - goto out; - } + if (!afr_is_transaction_running (local)) { + ret = afr_lookup_select_read_child (local, this, &read_child); + if (ret) { + local->op_ret = -1; + local->op_errno = EIO; + goto out; + } - ret = afr_lookup_set_read_ctx (local, this, read_child); - if (ret) { - local->op_ret = -1; - local->op_errno = EIO; - goto out; + ret = afr_lookup_set_read_ctx (local, this, read_child); + if (ret) { + local->op_ret = -1; + local->op_errno = EIO; + goto out; + } } - afr_lookup_build_response_params (local, this); + ret = afr_lookup_build_response_params (local, this); + if (ret) + goto out; if (afr_is_fresh_lookup (&local->loc, this)) { afr_update_loc_gfids (&local->loc, &local->cont.lookup.buf, @@ -1740,12 +1809,12 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count) goto out; local->cont.lookup.bufs = iatts; - success_children = afr_fresh_children_create (child_count); + success_children = afr_children_create (child_count); if (NULL == success_children) goto out; local->cont.lookup.success_children = success_children; - local->fresh_children = afr_fresh_children_create (child_count); + local->fresh_children = afr_children_create (child_count); if (NULL == local->fresh_children) goto out; @@ -1774,6 +1843,7 @@ afr_lookup (call_frame_t *frame, xlator_t *this, local->op_ret = -1; frame->local = local; + local->fop = GF_FOP_LOOKUP; if (!strcmp (loc->path, "/" GF_REPLICATE_TRASH_DIR)) { op_errno = ENOENT; @@ -3493,7 +3563,7 @@ afr_transaction_local_init (afr_local_t *local, afr_private_t *priv) if (!local->pending) goto out; - local->fresh_children = afr_fresh_children_create (priv->child_count); + local->fresh_children = afr_children_create (priv->child_count); if (!local->fresh_children) goto out; @@ -3525,52 +3595,72 @@ afr_reset_children (int32_t *fresh_children, int32_t child_count) } int32_t* -afr_fresh_children_create (int32_t child_count) +afr_children_create (int32_t child_count) { - int32_t *fresh_children = NULL; + int32_t *children = NULL; int i = 0; GF_ASSERT (child_count > 0); - fresh_children = GF_CALLOC (child_count, sizeof (*fresh_children), - gf_afr_mt_int32_t); - if (NULL == fresh_children) + children = GF_CALLOC (child_count, sizeof (*children), + gf_afr_mt_int32_t); + if (NULL == children) goto out; for (i = 0; i < child_count; i++) - fresh_children[i] = -1; + children[i] = -1; out: - return fresh_children; + return children; } void -afr_fresh_children_add_child (int32_t *fresh_children, int32_t child, - int32_t child_count) +afr_children_add_child (int32_t *children, int32_t child, + int32_t child_count) { gf_boolean_t child_found = _gf_false; int i = 0; for (i = 0; i < child_count; i++) { - if (fresh_children[i] == -1) + if (children[i] == -1) break; - if (fresh_children[i] == child) { + if (children[i] == child) { child_found = _gf_true; break; } } + if (!child_found) { GF_ASSERT (i < child_count); - fresh_children[i] = child; + children[i] = child; + } +} + +void +afr_children_rm_child (int32_t *children, int32_t child, int32_t child_count) +{ + int i = 0; + + GF_ASSERT ((child >= 0) && (child < child_count)); + for (i = 0; i < child_count; i++) { + if (children[i] == -1) + break; + if (children[i] == child) { + if (i != (child_count - 1)) + memmove (children + i, children + i + 1, + sizeof (*children)*(child_count - i - 1)); + children[child_count - 1] = -1; + break; + } } } int -afr_get_children_count (int32_t *fresh_children, unsigned int child_count) +afr_get_children_count (int32_t *children, unsigned int child_count) { int count = 0; int i = 0; for (i = 0; i < child_count; i++) { - if (fresh_children[i] == -1) + if (children[i] == -1) break; count++; } diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c index 0fac7324c78..645da2a6c57 100644 --- a/xlators/cluster/afr/src/afr-dir-read.c +++ b/xlators/cluster/afr/src/afr-dir-read.c @@ -668,7 +668,7 @@ afr_do_readdir (call_frame_t *frame, xlator_t *this, goto out; } - local->fresh_children = afr_fresh_children_create (priv->child_count); + local->fresh_children = afr_children_create (priv->child_count); if (!local->fresh_children) { op_errno = ENOMEM; goto out; diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c index 7d7cc3d80c8..f8157482758 100644 --- a/xlators/cluster/afr/src/afr-inode-read.c +++ b/xlators/cluster/afr/src/afr-inode-read.c @@ -136,7 +136,7 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask) goto out; } - local->fresh_children = afr_fresh_children_create (priv->child_count); + local->fresh_children = afr_children_create (priv->child_count); if (!local->fresh_children) { op_errno = ENOMEM; goto out; @@ -254,7 +254,7 @@ afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) goto out; } - local->fresh_children = afr_fresh_children_create (priv->child_count); + local->fresh_children = afr_children_create (priv->child_count); if (!local->fresh_children) { op_errno = ENOMEM; goto out; @@ -375,7 +375,7 @@ afr_fstat (call_frame_t *frame, xlator_t *this, goto out; } - local->fresh_children = afr_fresh_children_create (priv->child_count); + local->fresh_children = afr_children_create (priv->child_count); if (!local->fresh_children) { op_errno = ENOMEM; goto out; @@ -495,7 +495,7 @@ afr_readlink (call_frame_t *frame, xlator_t *this, goto out; } - local->fresh_children = afr_fresh_children_create (priv->child_count); + local->fresh_children = afr_children_create (priv->child_count); if (!local->fresh_children) { op_errno = ENOMEM; goto out; @@ -879,7 +879,7 @@ afr_getxattr (call_frame_t *frame, xlator_t *this, } } - local->fresh_children = afr_fresh_children_create (priv->child_count); + local->fresh_children = afr_children_create (priv->child_count); if (!local->fresh_children) { op_errno = ENOMEM; goto out; @@ -1013,7 +1013,7 @@ afr_readv (call_frame_t *frame, xlator_t *this, goto out; } - local->fresh_children = afr_fresh_children_create (priv->child_count); + local->fresh_children = afr_children_create (priv->child_count); if (!local->fresh_children) { op_errno = ENOMEM; goto out; diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 15b659fa837..f66bdff8446 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -1468,8 +1468,8 @@ afr_sh_purge_stale_entry (call_frame_t *frame, xlator_t *this) sh->entrybuf.ia_gfid))) continue; - afr_fresh_children_add_child (sh->fresh_children, - i, priv->child_count); + afr_children_add_child (sh->fresh_children, i, + priv->child_count); } afr_sh_purge_entry_common (frame, this, @@ -2094,9 +2094,9 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode) priv->child_count, gf_afr_mt_int32_t); } - sh->success_children = afr_fresh_children_create (priv->child_count); - sh->fresh_children = afr_fresh_children_create (priv->child_count); - sh->fresh_parent_dirs = afr_fresh_children_create (priv->child_count); + sh->success_children = afr_children_create (priv->child_count); + sh->fresh_children = afr_children_create (priv->child_count); + sh->fresh_parent_dirs = afr_children_create (priv->child_count); FRAME_SU_DO (sh_frame, afr_local_t); diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 74a1bf35371..dcaad9c8b47 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -310,7 +310,7 @@ afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie, sh = &local->self_heal; i = (long)cookie; - afr_fresh_children_add_child (sh->fresh_children, i, priv->child_count); + afr_children_add_child (sh->fresh_children, i, priv->child_count); call_count = afr_frame_return (frame); if (call_count == 0) { diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c index 556ea8027a3..9e80cb3d5a5 100644 --- a/xlators/cluster/afr/src/afr-self-heal-entry.c +++ b/xlators/cluster/afr/src/afr-self-heal-entry.c @@ -130,7 +130,7 @@ afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie, i = (long)cookie; - afr_fresh_children_add_child (sh->fresh_children, i, priv->child_count); + afr_children_add_child (sh->fresh_children, i, priv->child_count); if (op_ret == -1) { gf_log (this->name, GF_LOG_INFO, "%s: failed to erase pending xattrs on %s (%s)", diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index bacf2758880..5445132ab8c 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -160,8 +160,8 @@ afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie, if ((!IA_ISREG (sh->buf[sh->source].ia_type)) && (!IA_ISDIR (sh->buf[sh->source].ia_type))) { - afr_fresh_children_add_child (sh->fresh_children, i, - priv->child_count); + afr_children_add_child (sh->fresh_children, i, + priv->child_count); } call_count = afr_frame_return (frame); diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 1fb0781d8b7..fc030433b69 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -404,53 +404,67 @@ afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this, void -afr_update_read_child (call_frame_t *frame, xlator_t *this, inode_t *inode, - afr_transaction_type type) +afr_transaction_rm_stale_children (call_frame_t *frame, xlator_t *this, + inode_t *inode, afr_transaction_type type) { - int curr_read_child = -1; - int new_read_child = -1; + int i = -1; + int count = 0; + int read_child = -1; afr_private_t *priv = NULL; afr_local_t *local = NULL; int **pending = NULL; int idx = 0; + int32_t *stale_children = NULL; int32_t *fresh_children = NULL; - size_t success_count = 0; + gf_boolean_t rm_stale_children = _gf_false; idx = afr_index_for_transaction_type (type); priv = this->private; local = frame->local; - curr_read_child = afr_inode_get_read_ctx (this, inode, NULL); pending = local->pending; - GF_ASSERT (curr_read_child >= 0); - - if (pending[curr_read_child][idx] != 0) + stale_children = afr_children_create (priv->child_count); + if (!stale_children) goto out; - fresh_children = afr_fresh_children_create (priv->child_count); - if (!fresh_children) - goto out; + fresh_children = local->fresh_children; + read_child = afr_inode_get_read_ctx (this, inode, fresh_children); - for (new_read_child = 0; new_read_child < priv->child_count; - new_read_child++) { + GF_ASSERT (read_child >= 0); - if (!priv->child_up[new_read_child]) - /* child is down */ - continue; + if (pending[read_child][idx] == 0) + read_child = -1; - if (pending[new_read_child][idx] == 0) - /* op just failed */ + for (i = 0; i < priv->child_count; i++) { + if (!afr_is_child_present (fresh_children, + priv->child_count, i)) continue; - fresh_children[success_count] = new_read_child; - success_count++; + if ((!priv->child_up[i]) || (pending[i][idx] == 0)) { + /* child is down or op failed on it */ + rm_stale_children = _gf_true; + afr_children_rm_child (fresh_children, i, + priv->child_count); + stale_children[count++] = i; + } + } + + if (!rm_stale_children) { + GF_ASSERT (read_child >= 0); + goto out; + } + + if (fresh_children[0] == -1) { + //All children failed. leave as-is + goto out; } - afr_inode_set_read_ctx (this, inode, fresh_children[0], - fresh_children); + if (read_child == -1) + read_child = fresh_children[0]; + afr_inode_rm_stale_children (this, inode, read_child, stale_children); out: - if (fresh_children) - GF_FREE (fresh_children); + if (stale_children) + GF_FREE (stale_children); return; } @@ -478,8 +492,9 @@ afr_changelog_post_op (call_frame_t *frame, xlator_t *this) local->child_up, local->transaction.type); if (local->fd) - afr_update_read_child (frame, this, local->fd->inode, - local->transaction.type); + afr_transaction_rm_stale_children (frame, this, + local->fd->inode, + local->transaction.type); xattr = alloca (priv->child_count * sizeof (*xattr)); memset (xattr, 0, (priv->child_count * sizeof (*xattr))); diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 2ca13078ef7..236a24a6057 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -48,13 +48,23 @@ typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this); typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this); +typedef enum { + AFR_INODE_SET_READ_CTX = 1, + AFR_INODE_RM_STALE_CHILDREN, + AFR_INODE_SET_OPENDIR_DONE, + AFR_INODE_SET_SPLIT_BRAIN, + AFR_INODE_GET_READ_CTX, + AFR_INODE_GET_OPENDIR_DONE, + AFR_INODE_GET_SPLIT_BRAIN, +} afr_inode_op_t; + typedef struct afr_inode_params_ { - uint64_t mask_type; + afr_inode_op_t op; union { gf_boolean_t value; struct { int32_t read_child; - int32_t *fresh_children; + int32_t *children; } read_ctx; } u; } afr_inode_params_t; @@ -869,7 +879,7 @@ afr_marker_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,afr_local_t *local, afr_private_t *priv ); int32_t * -afr_fresh_children_create (int32_t child_count); +afr_children_create (int32_t child_count); int AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv); @@ -902,19 +912,22 @@ afr_next_call_child (int32_t *fresh_children, unsigned char *child_up, int32_t read_child); void afr_get_fresh_children (int32_t *success_children, int32_t *sources, - int32_t *fresh_children, unsigned int child_count); + int32_t *children, unsigned int child_count); void -afr_fresh_children_add_child (int32_t *fresh_children, int32_t child, +afr_children_add_child (int32_t *children, int32_t child, int32_t child_count); void -afr_reset_children (int32_t *fresh_children, int32_t child_count); +afr_children_rm_child (int32_t *children, int32_t child, + int32_t child_count); +void +afr_reset_children (int32_t *children, int32_t child_count); gf_boolean_t afr_error_more_important (int32_t old_errno, int32_t new_errno); int afr_errno_count (int32_t *children, int *child_errno, unsigned int child_count, int32_t op_errno); int -afr_get_children_count (int32_t *fresh_children, unsigned int child_count); +afr_get_children_count (int32_t *children, unsigned int child_count); gf_boolean_t afr_is_child_present (int32_t *success_children, int32_t child_count, int32_t child); @@ -941,4 +954,7 @@ afr_transaction_type_get (ia_type_t ia_type); int32_t afr_resultant_errno_get (int32_t *children, int *child_errno, unsigned int child_count); +void +afr_inode_rm_stale_children (xlator_t *this, inode_t *inode, int32_t read_child, + int32_t *stale_children); #endif /* __AFR_H__ */ |