diff options
author | Pranith K <pranithk@gluster.com> | 2011-07-14 08:07:04 +0000 |
---|---|---|
committer | Anand Avati <avati@gluster.com> | 2011-07-17 07:45:05 -0700 |
commit | bfc0e16e43815ab6d6e67f4bd26694ebd72b3360 (patch) | |
tree | 221eff40a09ce8e42ab6460bc9040d5b5f9f41fb /xlators/cluster/afr/src/afr-common.c | |
parent | 64b2a56ad0f8ddae9ece8696f7d50a3129c145a3 (diff) |
cluster/afr: Add fresh children along with read-child to inode context
Signed-off-by: Pranith Kumar K <pranithk@gluster.com>
Signed-off-by: Anand Avati <avati@gluster.com>
BUG: 2840 (files not getting self-healed when the first child goes down)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2840
Diffstat (limited to 'xlators/cluster/afr/src/afr-common.c')
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 680 |
1 files changed, 522 insertions, 158 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index b753cbfa85c..e8afc6d8de6 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -87,187 +87,444 @@ out: return ret; } -uint64_t -afr_is_split_brain (xlator_t *this, inode_t *inode) +afr_inode_ctx_t* +afr_inode_ctx_get_from_addr (uint64_t addr, int32_t child_count) { - int ret = 0; + int ret = -1; + afr_inode_ctx_t *ctx = NULL; + size_t size = 0; - uint64_t ctx = 0; - uint64_t split_brain = 0; + GF_ASSERT (child_count > 0); - VALIDATE_OR_GOTO (inode, out); + if (!addr) { + ctx = GF_CALLOC (1, sizeof (*ctx), + gf_afr_mt_inode_ctx_t); + if (!ctx) + goto out; + size = sizeof (*ctx->fresh_children); + ctx->fresh_children = GF_CALLOC (child_count, size, + gf_afr_mt_int32_t); + if (!ctx->fresh_children) + goto out; + } else { + ctx = (afr_inode_ctx_t*) (long) addr; + } + ret = 0; +out: + if (ret && ctx) { + if (ctx->fresh_children) + GF_FREE (ctx->fresh_children); + GF_FREE (ctx); + ctx = NULL; + } + return ctx; +} + +void +afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params) +{ + GF_ASSERT (inode); + GF_ASSERT (params); + int ret = 0; + afr_inode_ctx_t *ctx = NULL; + afr_private_t *priv = NULL; + int i = 0; + uint64_t ctx_addr = 0; + int32_t read_child = -1; + int32_t *fresh_children = NULL; + + priv = this->private; LOCK (&inode->lock); { - ret = __inode_ctx_get (inode, this, &ctx); - + ret = __inode_ctx_get (inode, this, &ctx_addr); if (ret < 0) goto unlock; - - split_brain = ctx & AFR_ICTX_SPLIT_BRAIN_MASK; + ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count); + if (!ctx) + goto unlock; + switch (params->mask_type) { + case AFR_ICTX_READ_CHILD_MASK: + fresh_children = params->u.read_ctx.fresh_children; + read_child = (int32_t)(ctx->masks & + AFR_ICTX_READ_CHILD_MASK); + params->u.read_ctx.read_child = read_child; + if (!fresh_children) + goto unlock; + for (i = 0; i < priv->child_count; i++) + fresh_children[i] = ctx->fresh_children[i]; + break; + case AFR_ICTX_OPENDIR_DONE_MASK: + params->u.value = ctx->masks & + AFR_ICTX_OPENDIR_DONE_MASK; + break; + case AFR_ICTX_SPLIT_BRAIN_MASK: + params->u.value = ctx->masks & AFR_ICTX_SPLIT_BRAIN_MASK; + break; + } } unlock: UNLOCK (&inode->lock); +} -out: - return split_brain; +uint64_t +afr_is_split_brain (xlator_t *this, inode_t *inode) +{ + afr_inode_params_t params = {0}; + + params.mask_type = AFR_ICTX_SPLIT_BRAIN_MASK; + afr_inode_get_ctx (this, inode, ¶ms); + return params.u.value; +} + +gf_boolean_t +afr_is_opendir_done (xlator_t *this, inode_t *inode) +{ + afr_inode_params_t params = {0}; + + params.mask_type = AFR_ICTX_OPENDIR_DONE_MASK; + afr_inode_get_ctx (this, inode, ¶ms); + return params.u.value; } +int32_t +afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children) +{ + afr_inode_params_t params = {0}; + + params.mask_type = AFR_ICTX_READ_CHILD_MASK; + params.u.read_ctx.fresh_children = fresh_children; + afr_inode_get_ctx (this, inode, ¶ms); + return params.u.read_ctx.read_child; +} + void -afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set) +afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child, + int32_t *fresh_children, int32_t child_count) { - uint64_t ctx = 0; - int ret = 0; + uint64_t rest_of_mask = 0; + uint64_t mask = 0; + int i = 0; - VALIDATE_OR_GOTO (inode, out); + rest_of_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks); + mask = (AFR_ICTX_READ_CHILD_MASK & read_child); + ctx->masks = rest_of_mask | mask; - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &ctx); + /* avoid memcpy as int, int32_t are used interchangeably + */ + for (i = 0; i < child_count; i++) { + if (fresh_children) + ctx->fresh_children[i] = fresh_children[i]; + else + ctx->fresh_children[i] = -1; + } +} - if (ret < 0) { - ctx = 0; - } +void +afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx) +{ + uint64_t rest_of_mask = 0; + uint64_t mask = 0; - if (set) { - ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx) - | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK); - } else { - ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx); - } + rest_of_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks); + mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK); + ctx->masks = rest_of_mask | mask; +} - ret = __inode_ctx_put (inode, this, ctx); - if (ret) { - gf_log_callingfn (this->name, GF_LOG_INFO, - "failed to set the inode ctx (%s)", - uuid_utoa (inode->gfid)); - } +void +afr_inode_ctx_set_splitbrain (afr_inode_ctx_t *ctx, gf_boolean_t set) +{ + uint64_t rest_of_mask = 0; + uint64_t mask = 0; + + if (set) { + rest_of_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks); + mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK); + ctx->masks = rest_of_mask | mask; + } else { + ctx->masks = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks); } - UNLOCK (&inode->lock); -out: - return; } - -uint64_t -afr_is_opendir_done (xlator_t *this, inode_t *inode) +void +afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params) { - int ret = 0; - uint64_t ctx = 0; - uint64_t opendir_done = 0; + GF_ASSERT (inode); + GF_ASSERT (params); - VALIDATE_OR_GOTO (inode, out); + int ret = 0; + afr_inode_ctx_t *ctx = NULL; + afr_private_t *priv = NULL; + uint64_t ctx_addr = 0; + gf_boolean_t set = _gf_false; + int32_t read_child = -1; + int32_t *fresh_children = NULL; + priv = this->private; LOCK (&inode->lock); { - ret = __inode_ctx_get (inode, this, &ctx); - + ret = __inode_ctx_get (inode, this, &ctx_addr); if (ret < 0) + ctx_addr = 0; + ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count); + if (!ctx) goto unlock; - - opendir_done = ctx & AFR_ICTX_OPENDIR_DONE_MASK; + switch (params->mask_type) { + case AFR_ICTX_READ_CHILD_MASK: + read_child = params->u.read_ctx.read_child; + fresh_children = params->u.read_ctx.fresh_children; + afr_inode_ctx_set_read_ctx (ctx, read_child, + fresh_children, + priv->child_count); + break; + case AFR_ICTX_OPENDIR_DONE_MASK: + afr_inode_ctx_set_opendir_done (ctx); + break; + case AFR_ICTX_SPLIT_BRAIN_MASK: + set = params->u.value; + afr_inode_ctx_set_splitbrain (ctx, set); + break; + } + ret = __inode_ctx_put (inode, this, (uint64_t)ctx); + if (ret) { + gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to " + "set the inode ctx (%s)", + uuid_utoa (inode->gfid)); + } } unlock: UNLOCK (&inode->lock); - -out: - return opendir_done; } +void +afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set) +{ + afr_inode_params_t params = {0}; + + params.mask_type = AFR_ICTX_SPLIT_BRAIN_MASK; + params.u.value = set; + afr_inode_set_ctx (this, inode, ¶ms); +} void afr_set_opendir_done (xlator_t *this, inode_t *inode) { - uint64_t ctx = 0; - int ret = 0; + afr_inode_params_t params = {0}; - VALIDATE_OR_GOTO (inode, out); + params.mask_type = AFR_ICTX_OPENDIR_DONE_MASK; + afr_inode_set_ctx (this, inode, ¶ms); +} - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &ctx); +void +afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child, + int32_t *fresh_children) +{ + afr_inode_params_t params = {0}; - if (ret < 0) { - ctx = 0; - } + GF_ASSERT (read_child >= 0); + GF_ASSERT (fresh_children); - ctx = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx) - | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK); + params.mask_type = AFR_ICTX_READ_CHILD_MASK; + params.u.read_ctx.read_child = read_child; + params.u.read_ctx.fresh_children = fresh_children; + afr_inode_set_ctx (this, inode, ¶ms); +} - ret = __inode_ctx_put (inode, this, ctx); - if (ret) { - gf_log_callingfn (this->name, GF_LOG_INFO, - "failed to set the inode ctx (%s)", - uuid_utoa (inode->gfid)); - } +gf_boolean_t +afr_is_source_child (int32_t *sources, int32_t child_count, int32_t child) +{ + gf_boolean_t source_xattrs = _gf_false; + + GF_ASSERT (child < child_count); + + if ((child >= 0) && (child < child_count) && + sources[child]) { + source_xattrs = _gf_true; } - UNLOCK (&inode->lock); -out: - return; + return source_xattrs; } +gf_boolean_t +afr_is_success_child (int32_t *success_children, int32_t child_count, + int32_t child) +{ + gf_boolean_t success_child = _gf_false; + int i = 0; -uint64_t -afr_read_child (xlator_t *this, inode_t *inode) + GF_ASSERT (child < child_count); + + for (i = 0; i < child_count; i++) { + if (success_children[i] == -1) + break; + if (child == success_children[i]) { + success_child = _gf_true; + break; + } + } + return success_child; +} + +gf_boolean_t +afr_is_read_child (int32_t *success_children, int32_t *sources, + int32_t child_count, int32_t child) { - int ret = 0; + gf_boolean_t success_child = _gf_false; + gf_boolean_t source = _gf_false; - uint64_t ctx = 0; - uint64_t read_child = 0; + GF_ASSERT (success_children); + GF_ASSERT (child_count > 0); - VALIDATE_OR_GOTO (inode, out); + success_child = afr_is_success_child (success_children, child_count, + child); + if (!success_child) + goto out; + if (NULL == sources) { + source = _gf_true; + goto out; + } + source = afr_is_source_child (sources, child_count, child); +out: + return (success_child && source); +} - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &ctx); +/* If sources is NULL the xattrs are assumed to be of source for all + * success_children. + */ +int +afr_select_read_child_from_policy (int32_t *success_children, int32_t child_count, + int32_t prev_read_child, + int32_t config_read_child, int32_t *sources) +{ + int32_t read_child = -1; + int i = 0; - if (ret < 0) - goto unlock; + GF_ASSERT (success_children); + + read_child = prev_read_child; + if (afr_is_read_child (success_children, sources, child_count, + read_child)) + goto out; + + read_child = config_read_child; + if (afr_is_read_child (success_children, sources, child_count, + read_child)) + goto out; - read_child = ctx & AFR_ICTX_READ_CHILD_MASK; + for (i = 0; i < child_count; i++) { + read_child = success_children[i]; + if (read_child < 0) + break; + if (afr_is_read_child (success_children, sources, child_count, + read_child)) + goto out; } -unlock: - UNLOCK (&inode->lock); + read_child = -1; out: return read_child; } - +/* This function should be used when all the success_children are sources + */ void -afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child) +afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode, + int32_t *fresh_children, int32_t prev_read_child, + int32_t config_read_child) { - uint64_t ctx = 0; - int ret = 0; + int read_child = -1; + afr_private_t *priv = NULL; - VALIDATE_OR_GOTO (inode, out); + priv = this->private; + read_child = afr_select_read_child_from_policy (fresh_children, + priv->child_count, + prev_read_child, + config_read_child, + NULL); + afr_inode_set_read_ctx (this, inode, read_child, fresh_children); +} + +/* afr_next_call_child () + * This is a common function used by all the read-type fops + * This function should not be called with the inode's read_children array. + * The fop's handler should make a copy of the inode's read_children, + * preferred read_child into the local vars, because while this function is + * in execution there is a chance for inode's read_ctx to change. + */ +int32_t +afr_next_call_child (int32_t *fresh_children, size_t child_count, + int32_t *last_index, int32_t read_child) +{ + int next_index = 0; + int32_t next_call_child = -1; - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &ctx); + GF_ASSERT (last_index); - if (ret < 0) { - ctx = 0; - } + next_index = *last_index; +retry: + next_index++; + if (next_index >= child_count) + goto out; + if (fresh_children[next_index] == read_child) + goto retry; + if (fresh_children[next_index] == -1) + goto out; + *last_index = next_index; + next_call_child = fresh_children[next_index]; +out: + return next_call_child; +} - ctx = (~AFR_ICTX_READ_CHILD_MASK & ctx) - | (AFR_ICTX_READ_CHILD_MASK & read_child); + /* This function should not be called with the inode's read_children array. + * The fop's handler should make a copy of the inode's read_children, + * preferred read_child into the local vars, because while this function is + * in execution there is a chance for inode's read_ctx to change. + */ +int32_t +afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child, + int32_t *fresh_children, + int32_t *call_child, int32_t *last_index) +{ + int ret = 0; + afr_private_t *priv = NULL; + int i = 0; - ret = __inode_ctx_put (inode, this, ctx); - if (ret) { - gf_log_callingfn (this->name, GF_LOG_INFO, - "failed to set the inode ctx (%s)", - uuid_utoa (inode->gfid)); + GF_ASSERT (child_up); + GF_ASSERT (call_child); + GF_ASSERT (last_index); + GF_ASSERT (fresh_children); + GF_ASSERT (read_child >= 0); + + priv = this->private; + *call_child = -1; + *last_index = -1; + + if (child_up[read_child]) { + *call_child = read_child; + } else { + for (i = 0; i < priv->child_count; i++) { + if (fresh_children[i] == -1) + break; + if (child_up[fresh_children[i]]) { + *call_child = fresh_children[i]; + ret = 0; + break; + } } - } - UNLOCK (&inode->lock); + if (*call_child == -1) { + ret = -ENOTCONN; + goto out; + } + + *last_index = i; + } out: - return; + gf_log (this->name, GF_LOG_DEBUG, "Returning %d, call_child: %d, " + "last_index: %d", ret, *call_child, *last_index); + return ret; } - void afr_local_sh_cleanup (afr_local_t *local, xlator_t *this) { @@ -325,8 +582,12 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this) if (sh->linkname) GF_FREE ((char *)sh->linkname); - if (sh->child_success) - GF_FREE (sh->child_success); + + if (sh->success_children) + GF_FREE (sh->success_children); + + if (sh->fresh_children) + GF_FREE (sh->fresh_children); loc_wipe (&sh->parent_loc); } @@ -398,6 +659,9 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this) if (local->child_up) GF_FREE (local->child_up); + if (local->fresh_children) + GF_FREE (local->fresh_children); + { /* lookup */ if (local->cont.lookup.xattrs) { for (i = 0; i < priv->child_count; i++) { @@ -424,8 +688,8 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this) if (local->cont.lookup.bufs) GF_FREE (local->cont.lookup.bufs); - if (local->cont.lookup.child_success) - GF_FREE (local->cont.lookup.child_success); + if (local->cont.lookup.success_children) + GF_FREE (local->cont.lookup.success_children); if (local->cont.lookup.sources) GF_FREE (local->cont.lookup.sources); @@ -734,20 +998,21 @@ int afr_lookup_select_read_child (afr_local_t *local, xlator_t *this, int32_t *read_child) { - int32_t source = -1; - ia_type_t ia_type = 0; - int ret = -1; - afr_transaction_type type = AFR_METADATA_TRANSACTION; - dict_t **xattrs = NULL; - int32_t *child_success = NULL; - struct iatt *bufs = NULL; + int32_t source = -1; + ia_type_t ia_type = 0; + int ret = -1; + afr_transaction_type type = AFR_METADATA_TRANSACTION; + dict_t **xattrs = NULL; + int32_t *success_children = NULL; + struct iatt *bufs = NULL; GF_ASSERT (local); GF_ASSERT (this); + GF_ASSERT (local->success_count > 0); bufs = local->cont.lookup.bufs; - child_success = local->cont.lookup.child_success; - ia_type = local->cont.lookup.bufs[child_success[0]].ia_type; + success_children = local->cont.lookup.success_children; + ia_type = local->cont.lookup.bufs[success_children[0]].ia_type; if (IA_ISDIR (ia_type)) { type = AFR_ENTRY_TRANSACTION; } else if (IA_ISREG (ia_type)) { @@ -773,7 +1038,7 @@ afr_is_self_heal_running (afr_local_t *local) } static void -afr_launch_self_heal (call_frame_t *frame, xlator_t *this, +afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode, gf_boolean_t is_background, ia_type_t ia_type, int (*unwind) (call_frame_t *frame, xlator_t *this)) { @@ -782,6 +1047,7 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, GF_ASSERT (frame); GF_ASSERT (this); + GF_ASSERT (inode); local = frame->local; local->self_heal.background = is_background; @@ -796,7 +1062,7 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, "background %s self-heal triggered. path: %s", sh_type_str, local->loc.path); - afr_self_heal (frame, this); + afr_self_heal (frame, this, inode); } static void @@ -813,8 +1079,8 @@ afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this) bufs = local->cont.lookup.bufs; for (i = 1; i < local->success_count; i++) { - child1 = local->cont.lookup.child_success[i-1]; - child2 = local->cont.lookup.child_success[i];; + child1 = local->cont.lookup.success_children[i-1]; + child2 = local->cont.lookup.success_children[i]; afr_detect_self_heal_by_iatt (local, this, &bufs[child1], &bufs[child2]); } @@ -822,7 +1088,7 @@ afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this) xattr = local->cont.lookup.xattrs; priv = this->private; for (i = 0; i < local->success_count; i++) { - child1 = local->cont.lookup.child_success[i];; + child1 = local->cont.lookup.success_children[i]; afr_lookup_detect_self_heal_by_xattr (local, this, xattr[child1]); } @@ -860,8 +1126,8 @@ afr_lookup_perform_self_heal_if_needed (call_frame_t *frame, xlator_t *this, goto out; } - afr_launch_self_heal (frame, this, _gf_true, - local->cont.lookup.buf.ia_type, + afr_launch_self_heal (frame, this, local->cont.lookup.inode, + _gf_true, local->cont.lookup.buf.ia_type, afr_self_heal_lookup_unwind); *sh_launched = _gf_true; } @@ -875,22 +1141,22 @@ afr_lookup_split_brain (afr_local_t *local, xlator_t *this) int i = 0; gf_boolean_t symptom = _gf_false; struct iatt *bufs = NULL; - int32_t *child_success = NULL; + int32_t *success_children = NULL; struct iatt *child1 = NULL; struct iatt *child2 = NULL; const char *path = NULL; bufs = local->cont.lookup.bufs; - child_success = local->cont.lookup.child_success; + success_children = local->cont.lookup.success_children; for (i = 1; i < local->success_count; i++) { - child1 = &bufs[child_success[i-1]]; - child2 = &bufs[child_success[i]]; + child1 = &bufs[success_children[i-1]]; + child2 = &bufs[success_children[i]]; /* * TODO: gfid self-heal * if (uuid_compare (child1->ia_gfid, child2->ia_gfid)) { * gf_log (this->name, GF_LOG_WARNING, "%s: gfid differs" * " on subvolumes (%d, %d)", local->loc.path, - * child_success[i-1], child_success[i]); + * success_children[i-1], success_children[i]); * symptom = _gf_true; * } */ @@ -899,7 +1165,7 @@ afr_lookup_split_brain (afr_local_t *local, xlator_t *this) path = local->loc.path; gf_log (this->name, GF_LOG_WARNING, "%s: filetype " "differs on subvolumes (%d, %d)", path, - child_success[i-1], child_success[i]); + success_children[i-1], success_children[i]); symptom = _gf_true; local->govinda_gOvinda = 1; } @@ -909,13 +1175,42 @@ afr_lookup_split_brain (afr_local_t *local, xlator_t *this) return symptom; } +void +afr_get_fresh_children (int32_t *success_children, int32_t *sources, + int32_t *fresh_children, unsigned int child_count) +{ + unsigned int i = 0; + unsigned int j = 0; + + GF_ASSERT (success_children); + GF_ASSERT (sources); + GF_ASSERT (fresh_children); + + for (i = 0; i < child_count; i++) { + if (success_children[i] == -1) + break; + if (afr_is_read_child (success_children, sources, child_count, + success_children[i])) { + fresh_children[j] = success_children[i]; + j++; + } + } +} + static int -afr_lookup_set_read_child (afr_local_t *local, xlator_t *this, int32_t read_child) +afr_lookup_set_read_ctx (afr_local_t *local, xlator_t *this, int32_t read_child) { + afr_private_t *priv = NULL; + GF_ASSERT (read_child >= 0); - afr_set_read_child (this, local->cont.lookup.inode, read_child); + priv = this->private; local->cont.lookup.read_child = read_child; + afr_get_fresh_children (local->cont.lookup.success_children, + local->cont.lookup.sources, + local->fresh_children, priv->child_count); + afr_inode_set_read_ctx (this, local->cont.lookup.inode, read_child, + local->fresh_children); return 0; } @@ -949,7 +1244,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this) goto unwind; } - ret = afr_lookup_set_read_child (local, this, read_child); + ret = afr_lookup_set_read_ctx (local, this, read_child); if (ret) goto unwind; @@ -1070,7 +1365,7 @@ afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_ind afr_lookup_cache_args (local, child_index, xattr, buf, postparent); - local->cont.lookup.child_success[local->success_count] = child_index; + local->cont.lookup.success_children[local->success_count] = child_index; local->success_count++; } @@ -1114,9 +1409,8 @@ int afr_lookup_cont_init (afr_local_t *local, unsigned int child_count) { int ret = -ENOMEM; - int32_t *child_success = NULL; struct iatt *iatts = NULL; - int i = 0; + int32_t *success_children = NULL; GF_ASSERT (local); local->cont.lookup.xattrs = GF_CALLOC (child_count, @@ -1135,14 +1429,14 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count) goto out; local->cont.lookup.bufs = iatts; - child_success = GF_CALLOC (child_count, sizeof (*child_success), - gf_afr_mt_char); - if (NULL == child_success) + success_children = afr_fresh_children_create (child_count); + if (NULL == success_children) goto out; - for (i = 0; i < child_count; i++) - child_success[i] = -1; + local->cont.lookup.success_children = success_children; - local->cont.lookup.child_success = child_success; + local->fresh_children = afr_fresh_children_create (child_count); + if (NULL == local->fresh_children) + goto out; local->cont.lookup.read_child = -1; ret = 0; @@ -1181,8 +1475,8 @@ afr_lookup (call_frame_t *frame, xlator_t *this, if (ret == 0) { /* lookup is a revalidate */ - local->read_child_index = afr_read_child (this, - loc->inode); + local->read_child_index = afr_inode_get_read_ctx (this, loc->inode, + NULL); } else { LOCK (&priv->read_child_lock); { @@ -1611,7 +1905,7 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; - read_child = afr_read_child (this, local->fd->inode); + read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL); LOCK (&frame->lock); { @@ -2533,6 +2827,25 @@ out: } int +afr_forget (xlator_t *this, inode_t *inode) +{ + uint64_t ctx_addr = 0; + afr_inode_ctx_t *ctx = NULL; + + inode_ctx_get (inode, this, &ctx_addr); + + if (!ctx_addr) + goto out; + + ctx = (afr_inode_ctx_t *)(long)ctx_addr; + if (ctx->fresh_children) + GF_FREE (ctx->fresh_children); + GF_FREE (ctx); +out: + return 0; +} + +int afr_priv_dump (xlator_t *this) { afr_private_t *priv = NULL; @@ -2759,6 +3072,16 @@ out: int AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv) { + local->op_ret = -1; + local->op_errno = EUCLEAN; + local->call_count = afr_up_children_count (priv->child_count, + priv->child_up); + if (local->call_count == 0) { + gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up"); + return -ENOTCONN; + } + + local->child_up = GF_CALLOC (sizeof (*local->child_up), priv->child_count, gf_afr_mt_char); @@ -2769,16 +3092,6 @@ AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv) memcpy (local->child_up, priv->child_up, sizeof (*local->child_up) * priv->child_count); - local->call_count = afr_up_children_count (priv->child_count, - local->child_up); - local->op_ret = -1; - local->op_errno = EUCLEAN; - - if (local->call_count == 0) { - gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up"); - return -ENOTCONN; - } - return 0; } @@ -2849,6 +3162,10 @@ afr_transaction_local_init (afr_local_t *local, afr_private_t *priv) if (!local->pending) goto out; + local->fresh_children = afr_fresh_children_create (priv->child_count); + if (!local->fresh_children) + goto out; + for (i = 0; i < priv->child_count; i++) { local->pending[i] = GF_CALLOC (sizeof (*local->pending[i]), 3, /* data + metadata + entry */ @@ -2867,3 +3184,50 @@ afr_transaction_local_init (afr_local_t *local, afr_private_t *priv) out: return ret; } + +void +afr_reset_children (int32_t *fresh_children, int32_t child_count) +{ + unsigned int i = 0; + for (i = 0; i < child_count; i++) + fresh_children[i] = -1; +} + +int32_t* +afr_fresh_children_create (int32_t child_count) +{ + int32_t *fresh_children = NULL; + int i = 0; + + GF_ASSERT (child_count > 0); + + fresh_children = GF_CALLOC (child_count, sizeof (*fresh_children), + gf_afr_mt_int32_t); + if (NULL == fresh_children) + goto out; + for (i = 0; i < child_count; i++) + fresh_children[i] = -1; +out: + return fresh_children; +} + +void +afr_fresh_children_add_child (int32_t *fresh_children, int32_t child, + int32_t child_count) +{ + gf_boolean_t child_found = _gf_false; + int i = 0; + + for (i = 0; i < child_count; i++) { + if (fresh_children[i] == -1) + break; + if (fresh_children[i] == child) { + child_found = _gf_true; + break; + } + } + if (!child_found) { + GF_ASSERT (i < child_count); + fresh_children[i] = child; + } +} |