summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src
diff options
context:
space:
mode:
authorPranith K <pranithk@gluster.com>2011-07-14 08:07:04 +0000
committerAnand Avati <avati@gluster.com>2011-07-17 07:45:05 -0700
commitbfc0e16e43815ab6d6e67f4bd26694ebd72b3360 (patch)
tree221eff40a09ce8e42ab6460bc9040d5b5f9f41fb /xlators/cluster/afr/src
parent64b2a56ad0f8ddae9ece8696f7d50a3129c145a3 (diff)
cluster/afr: Add fresh children along with read-child to inode context
Signed-off-by: Pranith Kumar K <pranithk@gluster.com> Signed-off-by: Anand Avati <avati@gluster.com> BUG: 2840 (files not getting self-healed when the first child goes down) URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=2840
Diffstat (limited to 'xlators/cluster/afr/src')
-rw-r--r--xlators/cluster/afr/src/afr-common.c680
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c2
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c134
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c55
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c12
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h1
-rw-r--r--xlators/cluster/afr/src/afr-open.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c18
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c88
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c30
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c38
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h5
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c38
-rw-r--r--xlators/cluster/afr/src/afr.c1
-rw-r--r--xlators/cluster/afr/src/afr.h57
-rw-r--r--xlators/cluster/afr/src/pump.c9
16 files changed, 817 insertions, 353 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index b753cbfa85c..e8afc6d8de6 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -87,187 +87,444 @@ out:
return ret;
}
-uint64_t
-afr_is_split_brain (xlator_t *this, inode_t *inode)
+afr_inode_ctx_t*
+afr_inode_ctx_get_from_addr (uint64_t addr, int32_t child_count)
{
- int ret = 0;
+ int ret = -1;
+ afr_inode_ctx_t *ctx = NULL;
+ size_t size = 0;
- uint64_t ctx = 0;
- uint64_t split_brain = 0;
+ GF_ASSERT (child_count > 0);
- VALIDATE_OR_GOTO (inode, out);
+ if (!addr) {
+ ctx = GF_CALLOC (1, sizeof (*ctx),
+ gf_afr_mt_inode_ctx_t);
+ if (!ctx)
+ goto out;
+ size = sizeof (*ctx->fresh_children);
+ ctx->fresh_children = GF_CALLOC (child_count, size,
+ gf_afr_mt_int32_t);
+ if (!ctx->fresh_children)
+ goto out;
+ } else {
+ ctx = (afr_inode_ctx_t*) (long) addr;
+ }
+ ret = 0;
+out:
+ if (ret && ctx) {
+ if (ctx->fresh_children)
+ GF_FREE (ctx->fresh_children);
+ GF_FREE (ctx);
+ ctx = NULL;
+ }
+ return ctx;
+}
+
+void
+afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
+{
+ GF_ASSERT (inode);
+ GF_ASSERT (params);
+ int ret = 0;
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ uint64_t ctx_addr = 0;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+
+ priv = this->private;
LOCK (&inode->lock);
{
- ret = __inode_ctx_get (inode, this, &ctx);
-
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
if (ret < 0)
goto unlock;
-
- split_brain = ctx & AFR_ICTX_SPLIT_BRAIN_MASK;
+ ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
+ if (!ctx)
+ goto unlock;
+ switch (params->mask_type) {
+ case AFR_ICTX_READ_CHILD_MASK:
+ fresh_children = params->u.read_ctx.fresh_children;
+ read_child = (int32_t)(ctx->masks &
+ AFR_ICTX_READ_CHILD_MASK);
+ params->u.read_ctx.read_child = read_child;
+ if (!fresh_children)
+ goto unlock;
+ for (i = 0; i < priv->child_count; i++)
+ fresh_children[i] = ctx->fresh_children[i];
+ break;
+ case AFR_ICTX_OPENDIR_DONE_MASK:
+ params->u.value = ctx->masks &
+ AFR_ICTX_OPENDIR_DONE_MASK;
+ break;
+ case AFR_ICTX_SPLIT_BRAIN_MASK:
+ params->u.value = ctx->masks & AFR_ICTX_SPLIT_BRAIN_MASK;
+ break;
+ }
}
unlock:
UNLOCK (&inode->lock);
+}
-out:
- return split_brain;
+uint64_t
+afr_is_split_brain (xlator_t *this, inode_t *inode)
+{
+ afr_inode_params_t params = {0};
+
+ params.mask_type = AFR_ICTX_SPLIT_BRAIN_MASK;
+ afr_inode_get_ctx (this, inode, &params);
+ return params.u.value;
+}
+
+gf_boolean_t
+afr_is_opendir_done (xlator_t *this, inode_t *inode)
+{
+ afr_inode_params_t params = {0};
+
+ params.mask_type = AFR_ICTX_OPENDIR_DONE_MASK;
+ afr_inode_get_ctx (this, inode, &params);
+ return params.u.value;
}
+int32_t
+afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
+{
+ afr_inode_params_t params = {0};
+
+ params.mask_type = AFR_ICTX_READ_CHILD_MASK;
+ params.u.read_ctx.fresh_children = fresh_children;
+ afr_inode_get_ctx (this, inode, &params);
+ return params.u.read_ctx.read_child;
+}
+
void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set)
+afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
+ int32_t *fresh_children, int32_t child_count)
{
- uint64_t ctx = 0;
- int ret = 0;
+ uint64_t rest_of_mask = 0;
+ uint64_t mask = 0;
+ int i = 0;
- VALIDATE_OR_GOTO (inode, out);
+ rest_of_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks);
+ mask = (AFR_ICTX_READ_CHILD_MASK & read_child);
+ ctx->masks = rest_of_mask | mask;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+ /* avoid memcpy as int, int32_t are used interchangeably
+ */
+ for (i = 0; i < child_count; i++) {
+ if (fresh_children)
+ ctx->fresh_children[i] = fresh_children[i];
+ else
+ ctx->fresh_children[i] = -1;
+ }
+}
- if (ret < 0) {
- ctx = 0;
- }
+void
+afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)
+{
+ uint64_t rest_of_mask = 0;
+ uint64_t mask = 0;
- if (set) {
- ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx)
- | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK);
- } else {
- ctx = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx);
- }
+ rest_of_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks);
+ mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
+ ctx->masks = rest_of_mask | mask;
+}
- ret = __inode_ctx_put (inode, this, ctx);
- if (ret) {
- gf_log_callingfn (this->name, GF_LOG_INFO,
- "failed to set the inode ctx (%s)",
- uuid_utoa (inode->gfid));
- }
+void
+afr_inode_ctx_set_splitbrain (afr_inode_ctx_t *ctx, gf_boolean_t set)
+{
+ uint64_t rest_of_mask = 0;
+ uint64_t mask = 0;
+
+ if (set) {
+ rest_of_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
+ mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK);
+ ctx->masks = rest_of_mask | mask;
+ } else {
+ ctx->masks = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
}
- UNLOCK (&inode->lock);
-out:
- return;
}
-
-uint64_t
-afr_is_opendir_done (xlator_t *this, inode_t *inode)
+void
+afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
{
- int ret = 0;
- uint64_t ctx = 0;
- uint64_t opendir_done = 0;
+ GF_ASSERT (inode);
+ GF_ASSERT (params);
- VALIDATE_OR_GOTO (inode, out);
+ int ret = 0;
+ afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+ uint64_t ctx_addr = 0;
+ gf_boolean_t set = _gf_false;
+ int32_t read_child = -1;
+ int32_t *fresh_children = NULL;
+ priv = this->private;
LOCK (&inode->lock);
{
- ret = __inode_ctx_get (inode, this, &ctx);
-
+ ret = __inode_ctx_get (inode, this, &ctx_addr);
if (ret < 0)
+ ctx_addr = 0;
+ ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
+ if (!ctx)
goto unlock;
-
- opendir_done = ctx & AFR_ICTX_OPENDIR_DONE_MASK;
+ switch (params->mask_type) {
+ case AFR_ICTX_READ_CHILD_MASK:
+ read_child = params->u.read_ctx.read_child;
+ fresh_children = params->u.read_ctx.fresh_children;
+ afr_inode_ctx_set_read_ctx (ctx, read_child,
+ fresh_children,
+ priv->child_count);
+ break;
+ case AFR_ICTX_OPENDIR_DONE_MASK:
+ afr_inode_ctx_set_opendir_done (ctx);
+ break;
+ case AFR_ICTX_SPLIT_BRAIN_MASK:
+ set = params->u.value;
+ afr_inode_ctx_set_splitbrain (ctx, set);
+ break;
+ }
+ ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
+ if (ret) {
+ gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
+ "set the inode ctx (%s)",
+ uuid_utoa (inode->gfid));
+ }
}
unlock:
UNLOCK (&inode->lock);
-
-out:
- return opendir_done;
}
+void
+afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set)
+{
+ afr_inode_params_t params = {0};
+
+ params.mask_type = AFR_ICTX_SPLIT_BRAIN_MASK;
+ params.u.value = set;
+ afr_inode_set_ctx (this, inode, &params);
+}
void
afr_set_opendir_done (xlator_t *this, inode_t *inode)
{
- uint64_t ctx = 0;
- int ret = 0;
+ afr_inode_params_t params = {0};
- VALIDATE_OR_GOTO (inode, out);
+ params.mask_type = AFR_ICTX_OPENDIR_DONE_MASK;
+ afr_inode_set_ctx (this, inode, &params);
+}
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+void
+afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
+ int32_t *fresh_children)
+{
+ afr_inode_params_t params = {0};
- if (ret < 0) {
- ctx = 0;
- }
+ GF_ASSERT (read_child >= 0);
+ GF_ASSERT (fresh_children);
- ctx = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx)
- | (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
+ params.mask_type = AFR_ICTX_READ_CHILD_MASK;
+ params.u.read_ctx.read_child = read_child;
+ params.u.read_ctx.fresh_children = fresh_children;
+ afr_inode_set_ctx (this, inode, &params);
+}
- ret = __inode_ctx_put (inode, this, ctx);
- if (ret) {
- gf_log_callingfn (this->name, GF_LOG_INFO,
- "failed to set the inode ctx (%s)",
- uuid_utoa (inode->gfid));
- }
+gf_boolean_t
+afr_is_source_child (int32_t *sources, int32_t child_count, int32_t child)
+{
+ gf_boolean_t source_xattrs = _gf_false;
+
+ GF_ASSERT (child < child_count);
+
+ if ((child >= 0) && (child < child_count) &&
+ sources[child]) {
+ source_xattrs = _gf_true;
}
- UNLOCK (&inode->lock);
-out:
- return;
+ return source_xattrs;
}
+gf_boolean_t
+afr_is_success_child (int32_t *success_children, int32_t child_count,
+ int32_t child)
+{
+ gf_boolean_t success_child = _gf_false;
+ int i = 0;
-uint64_t
-afr_read_child (xlator_t *this, inode_t *inode)
+ GF_ASSERT (child < child_count);
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ if (child == success_children[i]) {
+ success_child = _gf_true;
+ break;
+ }
+ }
+ return success_child;
+}
+
+gf_boolean_t
+afr_is_read_child (int32_t *success_children, int32_t *sources,
+ int32_t child_count, int32_t child)
{
- int ret = 0;
+ gf_boolean_t success_child = _gf_false;
+ gf_boolean_t source = _gf_false;
- uint64_t ctx = 0;
- uint64_t read_child = 0;
+ GF_ASSERT (success_children);
+ GF_ASSERT (child_count > 0);
- VALIDATE_OR_GOTO (inode, out);
+ success_child = afr_is_success_child (success_children, child_count,
+ child);
+ if (!success_child)
+ goto out;
+ if (NULL == sources) {
+ source = _gf_true;
+ goto out;
+ }
+ source = afr_is_source_child (sources, child_count, child);
+out:
+ return (success_child && source);
+}
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+/* If sources is NULL the xattrs are assumed to be of source for all
+ * success_children.
+ */
+int
+afr_select_read_child_from_policy (int32_t *success_children, int32_t child_count,
+ int32_t prev_read_child,
+ int32_t config_read_child, int32_t *sources)
+{
+ int32_t read_child = -1;
+ int i = 0;
- if (ret < 0)
- goto unlock;
+ GF_ASSERT (success_children);
+
+ read_child = prev_read_child;
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child))
+ goto out;
+
+ read_child = config_read_child;
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child))
+ goto out;
- read_child = ctx & AFR_ICTX_READ_CHILD_MASK;
+ for (i = 0; i < child_count; i++) {
+ read_child = success_children[i];
+ if (read_child < 0)
+ break;
+ if (afr_is_read_child (success_children, sources, child_count,
+ read_child))
+ goto out;
}
-unlock:
- UNLOCK (&inode->lock);
+ read_child = -1;
out:
return read_child;
}
-
+/* This function should be used when all the success_children are sources
+ */
void
-afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child)
+afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
+ int32_t *fresh_children, int32_t prev_read_child,
+ int32_t config_read_child)
{
- uint64_t ctx = 0;
- int ret = 0;
+ int read_child = -1;
+ afr_private_t *priv = NULL;
- VALIDATE_OR_GOTO (inode, out);
+ priv = this->private;
+ read_child = afr_select_read_child_from_policy (fresh_children,
+ priv->child_count,
+ prev_read_child,
+ config_read_child,
+ NULL);
+ afr_inode_set_read_ctx (this, inode, read_child, fresh_children);
+}
+
+/* afr_next_call_child ()
+ * This is a common function used by all the read-type fops
+ * This function should not be called with the inode's read_children array.
+ * The fop's handler should make a copy of the inode's read_children,
+ * preferred read_child into the local vars, because while this function is
+ * in execution there is a chance for inode's read_ctx to change.
+ */
+int32_t
+afr_next_call_child (int32_t *fresh_children, size_t child_count,
+ int32_t *last_index, int32_t read_child)
+{
+ int next_index = 0;
+ int32_t next_call_child = -1;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx);
+ GF_ASSERT (last_index);
- if (ret < 0) {
- ctx = 0;
- }
+ next_index = *last_index;
+retry:
+ next_index++;
+ if (next_index >= child_count)
+ goto out;
+ if (fresh_children[next_index] == read_child)
+ goto retry;
+ if (fresh_children[next_index] == -1)
+ goto out;
+ *last_index = next_index;
+ next_call_child = fresh_children[next_index];
+out:
+ return next_call_child;
+}
- ctx = (~AFR_ICTX_READ_CHILD_MASK & ctx)
- | (AFR_ICTX_READ_CHILD_MASK & read_child);
+ /* This function should not be called with the inode's read_children array.
+ * The fop's handler should make a copy of the inode's read_children,
+ * preferred read_child into the local vars, because while this function is
+ * in execution there is a chance for inode's read_ctx to change.
+ */
+int32_t
+afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
+ int32_t *fresh_children,
+ int32_t *call_child, int32_t *last_index)
+{
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
- ret = __inode_ctx_put (inode, this, ctx);
- if (ret) {
- gf_log_callingfn (this->name, GF_LOG_INFO,
- "failed to set the inode ctx (%s)",
- uuid_utoa (inode->gfid));
+ GF_ASSERT (child_up);
+ GF_ASSERT (call_child);
+ GF_ASSERT (last_index);
+ GF_ASSERT (fresh_children);
+ GF_ASSERT (read_child >= 0);
+
+ priv = this->private;
+ *call_child = -1;
+ *last_index = -1;
+
+ if (child_up[read_child]) {
+ *call_child = read_child;
+ } else {
+ for (i = 0; i < priv->child_count; i++) {
+ if (fresh_children[i] == -1)
+ break;
+ if (child_up[fresh_children[i]]) {
+ *call_child = fresh_children[i];
+ ret = 0;
+ break;
+ }
}
- }
- UNLOCK (&inode->lock);
+ if (*call_child == -1) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ *last_index = i;
+ }
out:
- return;
+ gf_log (this->name, GF_LOG_DEBUG, "Returning %d, call_child: %d, "
+ "last_index: %d", ret, *call_child, *last_index);
+ return ret;
}
-
void
afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
{
@@ -325,8 +582,12 @@ afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
if (sh->linkname)
GF_FREE ((char *)sh->linkname);
- if (sh->child_success)
- GF_FREE (sh->child_success);
+
+ if (sh->success_children)
+ GF_FREE (sh->success_children);
+
+ if (sh->fresh_children)
+ GF_FREE (sh->fresh_children);
loc_wipe (&sh->parent_loc);
}
@@ -398,6 +659,9 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
if (local->child_up)
GF_FREE (local->child_up);
+ if (local->fresh_children)
+ GF_FREE (local->fresh_children);
+
{ /* lookup */
if (local->cont.lookup.xattrs) {
for (i = 0; i < priv->child_count; i++) {
@@ -424,8 +688,8 @@ afr_local_cleanup (afr_local_t *local, xlator_t *this)
if (local->cont.lookup.bufs)
GF_FREE (local->cont.lookup.bufs);
- if (local->cont.lookup.child_success)
- GF_FREE (local->cont.lookup.child_success);
+ if (local->cont.lookup.success_children)
+ GF_FREE (local->cont.lookup.success_children);
if (local->cont.lookup.sources)
GF_FREE (local->cont.lookup.sources);
@@ -734,20 +998,21 @@ int
afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,
int32_t *read_child)
{
- int32_t source = -1;
- ia_type_t ia_type = 0;
- int ret = -1;
- afr_transaction_type type = AFR_METADATA_TRANSACTION;
- dict_t **xattrs = NULL;
- int32_t *child_success = NULL;
- struct iatt *bufs = NULL;
+ int32_t source = -1;
+ ia_type_t ia_type = 0;
+ int ret = -1;
+ afr_transaction_type type = AFR_METADATA_TRANSACTION;
+ dict_t **xattrs = NULL;
+ int32_t *success_children = NULL;
+ struct iatt *bufs = NULL;
GF_ASSERT (local);
GF_ASSERT (this);
+ GF_ASSERT (local->success_count > 0);
bufs = local->cont.lookup.bufs;
- child_success = local->cont.lookup.child_success;
- ia_type = local->cont.lookup.bufs[child_success[0]].ia_type;
+ success_children = local->cont.lookup.success_children;
+ ia_type = local->cont.lookup.bufs[success_children[0]].ia_type;
if (IA_ISDIR (ia_type)) {
type = AFR_ENTRY_TRANSACTION;
} else if (IA_ISREG (ia_type)) {
@@ -773,7 +1038,7 @@ afr_is_self_heal_running (afr_local_t *local)
}
static void
-afr_launch_self_heal (call_frame_t *frame, xlator_t *this,
+afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
gf_boolean_t is_background, ia_type_t ia_type,
int (*unwind) (call_frame_t *frame, xlator_t *this))
{
@@ -782,6 +1047,7 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this,
GF_ASSERT (frame);
GF_ASSERT (this);
+ GF_ASSERT (inode);
local = frame->local;
local->self_heal.background = is_background;
@@ -796,7 +1062,7 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this,
"background %s self-heal triggered. path: %s",
sh_type_str, local->loc.path);
- afr_self_heal (frame, this);
+ afr_self_heal (frame, this, inode);
}
static void
@@ -813,8 +1079,8 @@ afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this)
bufs = local->cont.lookup.bufs;
for (i = 1; i < local->success_count; i++) {
- child1 = local->cont.lookup.child_success[i-1];
- child2 = local->cont.lookup.child_success[i];;
+ child1 = local->cont.lookup.success_children[i-1];
+ child2 = local->cont.lookup.success_children[i];
afr_detect_self_heal_by_iatt (local, this,
&bufs[child1], &bufs[child2]);
}
@@ -822,7 +1088,7 @@ afr_lookup_detect_self_heal (afr_local_t *local, xlator_t *this)
xattr = local->cont.lookup.xattrs;
priv = this->private;
for (i = 0; i < local->success_count; i++) {
- child1 = local->cont.lookup.child_success[i];;
+ child1 = local->cont.lookup.success_children[i];
afr_lookup_detect_self_heal_by_xattr (local, this,
xattr[child1]);
}
@@ -860,8 +1126,8 @@ afr_lookup_perform_self_heal_if_needed (call_frame_t *frame, xlator_t *this,
goto out;
}
- afr_launch_self_heal (frame, this, _gf_true,
- local->cont.lookup.buf.ia_type,
+ afr_launch_self_heal (frame, this, local->cont.lookup.inode,
+ _gf_true, local->cont.lookup.buf.ia_type,
afr_self_heal_lookup_unwind);
*sh_launched = _gf_true;
}
@@ -875,22 +1141,22 @@ afr_lookup_split_brain (afr_local_t *local, xlator_t *this)
int i = 0;
gf_boolean_t symptom = _gf_false;
struct iatt *bufs = NULL;
- int32_t *child_success = NULL;
+ int32_t *success_children = NULL;
struct iatt *child1 = NULL;
struct iatt *child2 = NULL;
const char *path = NULL;
bufs = local->cont.lookup.bufs;
- child_success = local->cont.lookup.child_success;
+ success_children = local->cont.lookup.success_children;
for (i = 1; i < local->success_count; i++) {
- child1 = &bufs[child_success[i-1]];
- child2 = &bufs[child_success[i]];
+ child1 = &bufs[success_children[i-1]];
+ child2 = &bufs[success_children[i]];
/*
* TODO: gfid self-heal
* if (uuid_compare (child1->ia_gfid, child2->ia_gfid)) {
* gf_log (this->name, GF_LOG_WARNING, "%s: gfid differs"
* " on subvolumes (%d, %d)", local->loc.path,
- * child_success[i-1], child_success[i]);
+ * success_children[i-1], success_children[i]);
* symptom = _gf_true;
* }
*/
@@ -899,7 +1165,7 @@ afr_lookup_split_brain (afr_local_t *local, xlator_t *this)
path = local->loc.path;
gf_log (this->name, GF_LOG_WARNING, "%s: filetype "
"differs on subvolumes (%d, %d)", path,
- child_success[i-1], child_success[i]);
+ success_children[i-1], success_children[i]);
symptom = _gf_true;
local->govinda_gOvinda = 1;
}
@@ -909,13 +1175,42 @@ afr_lookup_split_brain (afr_local_t *local, xlator_t *this)
return symptom;
}
+void
+afr_get_fresh_children (int32_t *success_children, int32_t *sources,
+ int32_t *fresh_children, unsigned int child_count)
+{
+ unsigned int i = 0;
+ unsigned int j = 0;
+
+ GF_ASSERT (success_children);
+ GF_ASSERT (sources);
+ GF_ASSERT (fresh_children);
+
+ for (i = 0; i < child_count; i++) {
+ if (success_children[i] == -1)
+ break;
+ if (afr_is_read_child (success_children, sources, child_count,
+ success_children[i])) {
+ fresh_children[j] = success_children[i];
+ j++;
+ }
+ }
+}
+
static int
-afr_lookup_set_read_child (afr_local_t *local, xlator_t *this, int32_t read_child)
+afr_lookup_set_read_ctx (afr_local_t *local, xlator_t *this, int32_t read_child)
{
+ afr_private_t *priv = NULL;
+
GF_ASSERT (read_child >= 0);
- afr_set_read_child (this, local->cont.lookup.inode, read_child);
+ priv = this->private;
local->cont.lookup.read_child = read_child;
+ afr_get_fresh_children (local->cont.lookup.success_children,
+ local->cont.lookup.sources,
+ local->fresh_children, priv->child_count);
+ afr_inode_set_read_ctx (this, local->cont.lookup.inode, read_child,
+ local->fresh_children);
return 0;
}
@@ -949,7 +1244,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
goto unwind;
}
- ret = afr_lookup_set_read_child (local, this, read_child);
+ ret = afr_lookup_set_read_ctx (local, this, read_child);
if (ret)
goto unwind;
@@ -1070,7 +1365,7 @@ afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_ind
afr_lookup_cache_args (local, child_index, xattr,
buf, postparent);
- local->cont.lookup.child_success[local->success_count] = child_index;
+ local->cont.lookup.success_children[local->success_count] = child_index;
local->success_count++;
}
@@ -1114,9 +1409,8 @@ int
afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
{
int ret = -ENOMEM;
- int32_t *child_success = NULL;
struct iatt *iatts = NULL;
- int i = 0;
+ int32_t *success_children = NULL;
GF_ASSERT (local);
local->cont.lookup.xattrs = GF_CALLOC (child_count,
@@ -1135,14 +1429,14 @@ afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
goto out;
local->cont.lookup.bufs = iatts;
- child_success = GF_CALLOC (child_count, sizeof (*child_success),
- gf_afr_mt_char);
- if (NULL == child_success)
+ success_children = afr_fresh_children_create (child_count);
+ if (NULL == success_children)
goto out;
- for (i = 0; i < child_count; i++)
- child_success[i] = -1;
+ local->cont.lookup.success_children = success_children;
- local->cont.lookup.child_success = child_success;
+ local->fresh_children = afr_fresh_children_create (child_count);
+ if (NULL == local->fresh_children)
+ goto out;
local->cont.lookup.read_child = -1;
ret = 0;
@@ -1181,8 +1475,8 @@ afr_lookup (call_frame_t *frame, xlator_t *this,
if (ret == 0) {
/* lookup is a revalidate */
- local->read_child_index = afr_read_child (this,
- loc->inode);
+ local->read_child_index = afr_inode_get_read_ctx (this, loc->inode,
+ NULL);
} else {
LOCK (&priv->read_child_lock);
{
@@ -1611,7 +1905,7 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
- read_child = afr_read_child (this, local->fd->inode);
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
LOCK (&frame->lock);
{
@@ -2533,6 +2827,25 @@ out:
}
int
+afr_forget (xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ afr_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_get (inode, this, &ctx_addr);
+
+ if (!ctx_addr)
+ goto out;
+
+ ctx = (afr_inode_ctx_t *)(long)ctx_addr;
+ if (ctx->fresh_children)
+ GF_FREE (ctx->fresh_children);
+ GF_FREE (ctx);
+out:
+ return 0;
+}
+
+int
afr_priv_dump (xlator_t *this)
{
afr_private_t *priv = NULL;
@@ -2759,6 +3072,16 @@ out:
int
AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv)
{
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+ local->call_count = afr_up_children_count (priv->child_count,
+ priv->child_up);
+ if (local->call_count == 0) {
+ gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
+ return -ENOTCONN;
+ }
+
+
local->child_up = GF_CALLOC (sizeof (*local->child_up),
priv->child_count,
gf_afr_mt_char);
@@ -2769,16 +3092,6 @@ AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv)
memcpy (local->child_up, priv->child_up,
sizeof (*local->child_up) * priv->child_count);
- local->call_count = afr_up_children_count (priv->child_count,
- local->child_up);
- local->op_ret = -1;
- local->op_errno = EUCLEAN;
-
- if (local->call_count == 0) {
- gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
- return -ENOTCONN;
- }
-
return 0;
}
@@ -2849,6 +3162,10 @@ afr_transaction_local_init (afr_local_t *local, afr_private_t *priv)
if (!local->pending)
goto out;
+ local->fresh_children = afr_fresh_children_create (priv->child_count);
+ if (!local->fresh_children)
+ goto out;
+
for (i = 0; i < priv->child_count; i++) {
local->pending[i] = GF_CALLOC (sizeof (*local->pending[i]),
3, /* data + metadata + entry */
@@ -2867,3 +3184,50 @@ afr_transaction_local_init (afr_local_t *local, afr_private_t *priv)
out:
return ret;
}
+
+void
+afr_reset_children (int32_t *fresh_children, int32_t child_count)
+{
+ unsigned int i = 0;
+ for (i = 0; i < child_count; i++)
+ fresh_children[i] = -1;
+}
+
+int32_t*
+afr_fresh_children_create (int32_t child_count)
+{
+ int32_t *fresh_children = NULL;
+ int i = 0;
+
+ GF_ASSERT (child_count > 0);
+
+ fresh_children = GF_CALLOC (child_count, sizeof (*fresh_children),
+ gf_afr_mt_int32_t);
+ if (NULL == fresh_children)
+ goto out;
+ for (i = 0; i < child_count; i++)
+ fresh_children[i] = -1;
+out:
+ return fresh_children;
+}
+
+void
+afr_fresh_children_add_child (int32_t *fresh_children, int32_t child,
+ int32_t child_count)
+{
+ gf_boolean_t child_found = _gf_false;
+ int i = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (fresh_children[i] == -1)
+ break;
+ if (fresh_children[i] == child) {
+ child_found = _gf_true;
+ break;
+ }
+ }
+ if (!child_found) {
+ GF_ASSERT (i < child_count);
+ fresh_children[i] = child;
+ }
+}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 1bd2cc96392..8593d0c14c5 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -177,7 +177,7 @@ out:
" forced merge option set",
sh_type_str, local->loc.path);
- afr_self_heal (frame, this);
+ afr_self_heal (frame, this, local->fd->inode);
} else {
afr_set_opendir_done (this, local->fd->inode);
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 88c3f728f49..6da666804de 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -119,13 +119,14 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
fd_t *fd, inode_t *inode, struct iatt *buf,
struct iatt *preparent, struct iatt *postparent)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
- int call_count = -1;
- int child_index = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = 0;
+ int call_count = -1;
+ int child_index = -1;
+ int32_t *fresh_children = NULL;
local = frame->local;
priv = this->private;
@@ -166,18 +167,9 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
fd_ctx->opened_on[child_index] = 1;
fd_ctx->flags = local->cont.create.flags;
- if (local->success_count == 0) {
+ if (local->success_count == 0)
local->cont.create.buf = *buf;
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
if (child_index == local->read_child_index) {
local->cont.create.read_child_buf = *buf;
local->cont.create.preparent = *preparent;
@@ -186,6 +178,8 @@ afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->cont.create.inode = inode;
+ fresh_children = local->fresh_children;
+ fresh_children[local->success_count] = child_index;
local->success_count++;
}
@@ -198,6 +192,10 @@ unlock:
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ afr_set_read_ctx_from_policy (this, inode,
+ local->fresh_children,
+ local->read_child_index,
+ priv->read_child);
local->transaction.unwind (frame, this);
local->transaction.resume (frame, this);
@@ -382,10 +380,11 @@ afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int child_index = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int child_index = -1;
+ int32_t *fresh_children = NULL;
local = frame->local;
priv = this->private;
@@ -400,18 +399,9 @@ afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret != -1) {
local->op_ret = op_ret;
- if (local->success_count == 0){
+ if (local->success_count == 0)
local->cont.mknod.buf = *buf;
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
if (child_index == local->read_child_index) {
local->cont.mknod.read_child_buf = *buf;
local->cont.mknod.preparent = *preparent;
@@ -420,6 +410,8 @@ afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->cont.mknod.inode = inode;
+ fresh_children = local->fresh_children;
+ fresh_children[local->success_count] = child_index;
local->success_count++;
}
@@ -430,6 +422,10 @@ afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ afr_set_read_ctx_from_policy (this, inode,
+ local->fresh_children,
+ local->read_child_index,
+ priv->read_child);
local->transaction.unwind (frame, this);
local->transaction.resume (frame, this);
@@ -609,10 +605,11 @@ afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int child_index = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int child_index = -1;
+ int32_t *fresh_children = NULL;
local = frame->local;
priv = this->private;
@@ -627,18 +624,9 @@ afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret != -1) {
local->op_ret = op_ret;
- if (local->success_count == 0) {
+ if (local->success_count == 0)
local->cont.mkdir.buf = *buf;
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
-
if (child_index == local->read_child_index) {
local->cont.mkdir.read_child_buf = *buf;
local->cont.mkdir.preparent = *preparent;
@@ -647,6 +635,8 @@ afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->cont.mkdir.inode = inode;
+ fresh_children = local->fresh_children;
+ fresh_children[local->success_count] = child_index;
local->success_count++;
}
@@ -657,6 +647,10 @@ afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ afr_set_read_ctx_from_policy (this, inode,
+ local->fresh_children,
+ local->read_child_index,
+ priv->read_child);
local->transaction.unwind (frame, this);
local->transaction.resume (frame, this);
@@ -837,10 +831,11 @@ afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int child_index = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int child_index = -1;
+ int32_t *fresh_children = NULL;
local = frame->local;
priv = this->private;
@@ -857,14 +852,6 @@ afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (local->success_count == 0) {
local->cont.link.buf = *buf;
-
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
}
if (child_index == local->read_child_index) {
@@ -875,6 +862,8 @@ afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->cont.link.inode = inode;
+ fresh_children = local->fresh_children;
+ fresh_children[local->success_count] = child_index;
local->success_count++;
}
@@ -885,6 +874,10 @@ afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ afr_set_read_ctx_from_policy (this, inode,
+ local->fresh_children,
+ local->read_child_index,
+ priv->read_child);
local->transaction.unwind (frame, this);
local->transaction.resume (frame, this);
@@ -1062,10 +1055,11 @@ afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int child_index = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int child_index = -1;
+ int32_t *fresh_children = NULL;
local = frame->local;
priv = this->private;
@@ -1080,16 +1074,8 @@ afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (op_ret != -1) {
local->op_ret = op_ret;
- if (local->success_count == 0) {
+ if (local->success_count == 0)
local->cont.symlink.buf = *buf;
- if (priv->read_child >= 0) {
- afr_set_read_child (this, inode,
- priv->read_child);
- } else {
- afr_set_read_child (this, inode,
- local->read_child_index);
- }
- }
if (child_index == local->read_child_index) {
local->cont.symlink.read_child_buf = *buf;
@@ -1099,6 +1085,8 @@ afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->cont.symlink.inode = inode;
+ fresh_children = local->fresh_children;
+ fresh_children[local->success_count] = child_index;
local->success_count++;
}
@@ -1109,6 +1097,10 @@ afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ afr_set_read_ctx_from_policy (this, inode,
+ local->fresh_children,
+ local->read_child_index,
+ priv->read_child);
local->transaction.unwind (frame, this);
local->transaction.resume (frame, this);
@@ -1424,7 +1416,7 @@ afr_rename (call_frame_t *frame, xlator_t *this,
loc_copy (&local->loc, oldloc);
loc_copy (&local->newloc, newloc);
- local->read_child_index = afr_read_child (this, oldloc->inode);
+ local->read_child_index = afr_inode_get_read_ctx (this, oldloc->inode, NULL);
local->cont.rename.ino = oldloc->inode->ino;
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 742d3687ceb..f2507f07ef4 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -134,7 +134,15 @@ afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask)
ALLOC_OR_GOTO (local, afr_local_t, out);
- read_child = afr_read_child (this, loc->inode);
+ local->fresh_children = GF_CALLOC (priv->child_count,
+ sizeof (*local->fresh_children),
+ gf_afr_mt_int32_t);
+ if (local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
if ((read_child >= 0) && (priv->child_up[read_child])) {
call_child = read_child;
@@ -252,7 +260,14 @@ afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc)
frame->local = local;
- read_child = afr_read_child (this, loc->inode);
+ local->fresh_children = GF_CALLOC (priv->child_count,
+ sizeof (*local->fresh_children),
+ gf_afr_mt_int32_t);
+ if (local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
if ((read_child >= 0) && (priv->child_up[read_child])) {
call_child = read_child;
@@ -375,7 +390,14 @@ afr_fstat (call_frame_t *frame, xlator_t *this,
VALIDATE_OR_GOTO (fd->inode, out);
- read_child = afr_read_child (this, fd->inode);
+ local->fresh_children = GF_CALLOC (priv->child_count,
+ sizeof (*local->fresh_children),
+ gf_afr_mt_int32_t);
+ if (local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
if ((read_child >= 0) && (priv->child_up[read_child])) {
call_child = read_child;
@@ -494,7 +516,14 @@ afr_readlink (call_frame_t *frame, xlator_t *this,
frame->local = local;
- read_child = afr_read_child (this, loc->inode);
+ local->fresh_children = GF_CALLOC (priv->child_count,
+ sizeof (*local->fresh_children),
+ gf_afr_mt_int32_t);
+ if (local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
if ((read_child >= 0) && (priv->child_up[read_child])) {
call_child = read_child;
@@ -879,7 +908,14 @@ afr_getxattr (call_frame_t *frame, xlator_t *this,
}
}
- read_child = afr_read_child (this, loc->inode);
+ local->fresh_children = GF_CALLOC (priv->child_count,
+ sizeof (*local->fresh_children),
+ gf_afr_mt_int32_t);
+ if (local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
if ((read_child >= 0) && (priv->child_up[read_child])) {
call_child = read_child;
@@ -1020,7 +1056,14 @@ afr_readv (call_frame_t *frame, xlator_t *this,
frame->local = local;
- read_child = afr_read_child (this, fd->inode);
+ local->fresh_children = GF_CALLOC (priv->child_count,
+ sizeof (*local->fresh_children),
+ gf_afr_mt_int32_t);
+ if (local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
if ((read_child >= 0) && (priv->child_up[read_child])) {
call_child = read_child;
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 8b82add6276..564bb953a52 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -87,7 +87,7 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
- read_child = afr_read_child (this, local->fd->inode);
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
LOCK (&frame->lock);
{
@@ -343,7 +343,7 @@ afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
priv = this->private;
- read_child = afr_read_child (this, local->loc.inode);
+ read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
LOCK (&frame->lock);
{
@@ -550,7 +550,7 @@ afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
priv = this->private;
- read_child = afr_read_child (this, local->fd->inode);
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
LOCK (&frame->lock);
{
@@ -712,8 +712,8 @@ afr_ftruncate (call_frame_t *frame, xlator_t *this,
priv = this->private;
ALLOC_OR_GOTO (local, afr_local_t, out);
-
ret = AFR_LOCAL_INIT (local, priv);
+
if (ret < 0) {
op_errno = -ret;
goto out;
@@ -797,7 +797,7 @@ afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
priv = this->private;
- read_child = afr_read_child (this, local->loc.inode);
+ read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
LOCK (&frame->lock);
{
@@ -1004,7 +1004,7 @@ afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
priv = this->private;
- read_child = afr_read_child (this, local->fd->inode);
+ read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
LOCK (&frame->lock);
{
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 14064ebcd76..de2049589e7 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -42,6 +42,7 @@ enum gf_afr_mem_types_ {
gf_afr_mt_entry_name,
gf_afr_mt_pump_priv,
gf_afr_mt_locked_fd,
+ gf_afr_mt_inode_ctx_t,
gf_afr_mt_end
};
#endif
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index e6304a5ea7d..4aa587399b5 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -434,7 +434,7 @@ afr_openfd_sh (call_frame_t *frame, xlator_t *this)
"path: %s, reason: Replicate up down flush, data lock is held",
sh_type_str, local->loc.path);
- afr_self_heal (frame, this);
+ afr_self_heal (frame, this, local->fd->inode);
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index abc9ccb0fbe..16345bee738 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1640,18 +1640,15 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
afr_local_t * local = NULL;
afr_self_heal_t * sh = NULL;
char sh_type_str[256] = {0,};
+ gf_boolean_t split_brain = _gf_false;
priv = this->private;
local = bgsh_frame->local;
sh = &local->self_heal;
- if (local->govinda_gOvinda) {
- afr_set_split_brain (this, local->cont.lookup.inode,
- _gf_true);
- } else {
- afr_set_split_brain (this, local->cont.lookup.inode,
- _gf_false);
- }
+ if (local->govinda_gOvinda)
+ split_brain = _gf_true;
+ afr_set_split_brain (this, sh->inode, split_brain);
afr_self_heal_type_str_get (sh, sh_type_str,
sizeof(sh_type_str));
@@ -1683,7 +1680,7 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
}
int
-afr_self_heal (call_frame_t *frame, xlator_t *this)
+afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
{
afr_local_t *local = NULL;
afr_self_heal_t *sh = NULL;
@@ -1726,6 +1723,7 @@ afr_self_heal (call_frame_t *frame, xlator_t *this)
sh_local = afr_local_copy (local, this);
sh_frame->local = sh_local;
sh = &sh_local->self_heal;
+ sh->inode = inode;
sh->orig_frame = frame;
@@ -1761,8 +1759,8 @@ afr_self_heal (call_frame_t *frame, xlator_t *this)
priv->child_count,
gf_afr_mt_int32_t);
}
- sh->child_success = GF_CALLOC (sizeof (*sh->child_success),
- priv->child_count, gf_afr_mt_int32_t);
+ sh->success_children = afr_fresh_children_create (priv->child_count);
+ sh->fresh_children = afr_fresh_children_create (priv->child_count);
FRAME_SU_DO (sh_frame, afr_local_t);
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 3ee1db0e726..f9a25797275 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -299,12 +299,25 @@ afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xattr)
{
+ afr_local_t *local = NULL;
int call_count = 0;
+ long i = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
+ i = (long)cookie;
+
+ afr_fresh_children_add_child (sh->fresh_children, i, priv->child_count);
call_count = afr_frame_return (frame);
- if (call_count == 0)
+ if (call_count == 0) {
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
afr_sh_data_finish (frame, this);
+ }
return 0;
}
@@ -602,7 +615,7 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf,
priv->child_count, AFR_SELF_HEAL_DATA,
- sh->child_success, this->name);
+ sh->success_children, this->name);
if (nsources == 0) {
gf_log (this->name, GF_LOG_TRACE,
@@ -670,7 +683,11 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
sh->sources[i] = 0;
}
- afr_set_read_child (this, local->loc.inode, sh->source);
+ afr_reset_children (sh->fresh_children, priv->child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
/*
quick-read might have read the file, so send xattr from
@@ -691,56 +708,6 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
return 0;
}
-gf_boolean_t
-afr_is_fresh_read_child (int32_t *sources, int32_t child_count,
- int32_t read_child)
-{
- gf_boolean_t is_fresh_child = _gf_false;
-
- GF_ASSERT (read_child < child_count);
-
- if ((read_child >= 0) && (read_child < child_count) &&
- sources[read_child]) {
- is_fresh_child = _gf_true;
- }
- return is_fresh_child;
-}
-
-static int
-afr_select_read_child_from_policy (int32_t *sources, int32_t child_count,
- int32_t prev_read_child,
- int32_t config_read_child,
- int32_t *valid_children)
-{
- int32_t read_child = -1;
- int i = 0;
-
- GF_ASSERT (sources);
-
- read_child = prev_read_child;
- if (_gf_true == afr_is_fresh_read_child (sources, child_count,
- read_child))
- goto out;
-
- read_child = config_read_child;
- if (_gf_true == afr_is_fresh_read_child (sources, child_count,
- read_child))
- goto out;
-
- for (i = 0; i < child_count; i++) {
- read_child = valid_children[i];
- if (read_child < 0)
- break;
- if (_gf_true == afr_is_fresh_read_child (sources, child_count,
- read_child))
- goto out;
- }
- read_child = -1;
-
-out:
- return read_child;
-}
-
static void
afr_destroy_pending_matrix (int32_t **pending_matrix, int32_t child_count)
{
@@ -796,7 +763,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
int32_t **pending_matrix = NULL;
int32_t *sources = NULL;
- int32_t *valid_children = NULL;
+ int32_t *success_children = NULL;
struct iatt *bufs = NULL;
int32_t nsources = 0;
int32_t prev_read_child = -1;
@@ -805,7 +772,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
priv = this->private;
bufs = local->cont.lookup.bufs;
- valid_children = local->cont.lookup.child_success;
+ success_children = local->cont.lookup.success_children;
sh = &local->self_heal;
pending_matrix = afr_create_pending_matrix (priv->child_count);
@@ -826,7 +793,7 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
nsources = afr_mark_sources (sources, pending_matrix, bufs,
priv->child_count, sh_type,
- valid_children, this->name);
+ success_children, this->name);
if (nsources < 0) {
ret = -1;
goto out;
@@ -834,11 +801,11 @@ afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
prev_read_child = local->read_child_index;
config_read_child = priv->read_child;
- read_child = afr_select_read_child_from_policy (sources,
+ read_child = afr_select_read_child_from_policy (success_children,
priv->child_count,
prev_read_child,
config_read_child,
- valid_children);
+ sources);
ret = 0;
local->cont.lookup.sources = sources;
out:
@@ -875,7 +842,7 @@ afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
priv->children[child_index]->name);
sh->buf[child_index] = *buf;
- sh->child_success[sh->success_count] = child_index;
+ sh->success_children[sh->success_count] = child_index;
sh->success_count++;
}
}
@@ -909,8 +876,7 @@ afr_sh_data_fstat (call_frame_t *frame, xlator_t *this)
local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++)
- sh->child_success[i] = -1;
+ afr_reset_children (sh->success_children, priv->child_count);
sh->success_count = 0;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index 0425644b3fc..50870afb204 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -125,12 +125,16 @@ afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,
afr_local_t *orig_local = NULL;
call_frame_t *orig_frame = NULL;
afr_private_t *priv = NULL;
+ int32_t read_child = -1;
local = frame->local;
priv = this->private;
+ sh = &local->self_heal;
+ i = (long)cookie;
+
+ afr_fresh_children_add_child (sh->fresh_children, i, priv->child_count);
if (op_ret == -1) {
- i = (long)cookie;
gf_log (this->name, GF_LOG_INFO,
"%s: failed to erase pending xattrs on %s (%s)",
local->loc.path, priv->children[i]->name,
@@ -140,8 +144,14 @@ afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,
call_count = afr_frame_return (frame);
if (call_count == 0) {
- sh = &local->self_heal;
-
+ if (sh->source == -1) {
+ //this happens if the forced merge option is set
+ read_child = sh->fresh_children[0];
+ } else {
+ read_child = sh->source;
+ }
+ afr_inode_set_read_ctx (this, sh->inode, read_child,
+ sh->fresh_children);
orig_frame = sh->orig_frame;
orig_local = orig_frame->local;
@@ -2165,7 +2175,7 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this)
nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf,
priv->child_count, AFR_SELF_HEAL_ENTRY,
- sh->child_success, this->name);
+ sh->success_children, this->name);
if (nsources == 0) {
gf_log (this->name, GF_LOG_TRACE,
@@ -2180,6 +2190,13 @@ afr_sh_entry_fix (call_frame_t *frame, xlator_t *this)
sh->source = source;
+ afr_reset_children (sh->fresh_children, priv->child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
+
+
heal:
afr_sh_entry_sync_prepare (frame, this);
@@ -2208,7 +2225,7 @@ afr_sh_entry_lookup_cbk (call_frame_t *frame, void *cookie,
if (op_ret != -1) {
sh->xattr[child_index] = dict_ref (xattr);
sh->buf[child_index] = *buf;
- sh->child_success[sh->success_count] = child_index;
+ sh->success_children[sh->success_count] = child_index;
sh->success_count++;
}
}
@@ -2258,8 +2275,7 @@ afr_sh_entry_lookup (call_frame_t *frame, xlator_t *this)
}
}
- for (i = 0; i < priv->child_count; i++)
- sh->child_success[i] = -1;
+ afr_reset_children (sh->success_children, priv->child_count);
sh->success_count = 0;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index fe1db60e2e4..5993e9596ba 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -147,15 +147,32 @@ afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
xlator_t *this, int32_t op_ret,
int32_t op_errno, dict_t *xattr)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
int call_count = 0;
+ long i = 0;
+ afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
local = frame->local;
+ priv = this->private;
+ sh = &local->self_heal;
+ i = (long)cookie;
+ if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
+ (!IA_ISDIR (sh->buf[sh->source].ia_type))) {
+ afr_fresh_children_add_child (sh->fresh_children, i,
+ priv->child_count);
+ }
call_count = afr_frame_return (frame);
- if (call_count == 0)
+ if (call_count == 0) {
+ if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
+ (!IA_ISDIR (sh->buf[sh->source].ia_type))) {
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
+ }
afr_sh_metadata_finish (frame, this);
+ }
return 0;
}
@@ -483,7 +500,7 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
nsources = afr_mark_sources (sh->sources, sh->pending_matrix, sh->buf,
priv->child_count, AFR_SELF_HEAL_METADATA,
- sh->child_success, this->name);
+ sh->success_children, this->name);
if (nsources == 0) {
gf_log (this->name, GF_LOG_TRACE,
@@ -545,6 +562,16 @@ afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this)
sh->sources[i] = 0;
}
+ if ((!IA_ISREG (sh->buf[source].ia_type)) &&
+ (!IA_ISDIR (sh->buf[source].ia_type))) {
+ afr_reset_children (sh->fresh_children,
+ priv->child_count);
+ afr_get_fresh_children (sh->success_children, sh->sources,
+ sh->fresh_children, priv->child_count);
+ afr_inode_set_read_ctx (this, sh->inode, sh->source,
+ sh->fresh_children);
+ }
+
afr_sh_metadata_sync_prepare (frame, this);
return 0;
@@ -582,7 +609,7 @@ afr_sh_metadata_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
sh->buf[child_index] = *buf;
if (xattr)
sh->xattr[child_index] = dict_ref (xattr);
- sh->child_success[sh->success_count] = child_index;
+ sh->success_children[sh->success_count] = child_index;
sh->success_count++;
} else {
gf_log (this->name, GF_LOG_INFO,
@@ -637,8 +664,7 @@ afr_sh_metadata_lookup (call_frame_t *frame, xlator_t *this)
}
}
- for (i = 0; i < priv->child_count; i++)
- sh->child_success[i] = -1;
+ afr_reset_children (sh->success_children, priv->child_count);
sh->success_count = 0;
for (i = 0; i < priv->child_count; i++) {
if (local->child_up[i]) {
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 976dae4754d..1056a366223 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -49,11 +49,8 @@ int
afr_self_heal_get_source (xlator_t *this, afr_local_t *local, dict_t **xattr);
int
-afr_self_heal (call_frame_t *frame, xlator_t *this);
+afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode);
-gf_boolean_t
-afr_is_fresh_read_child (int32_t *sources, int32_t child_count,
- int32_t read_child);
int
afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
dict_t **xattr,
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 7652d3d1e35..b8d2e27a448 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -407,24 +407,31 @@ void
afr_update_read_child (call_frame_t *frame, xlator_t *this, inode_t *inode,
afr_transaction_type type)
{
- int curr_read_child = -1;
- int new_read_child = -1;
+ int curr_read_child = -1;
+ int new_read_child = -1;
afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int **pending = NULL;
- int idx = 0;
+ afr_local_t *local = NULL;
+ int **pending = NULL;
+ int idx = 0;
+ int32_t *fresh_children = NULL;
+ size_t success_count = 0;
idx = afr_index_for_transaction_type (type);
priv = this->private;
local = frame->local;
- curr_read_child = afr_read_child (this, inode);
+ curr_read_child = afr_inode_get_read_ctx (this, inode, NULL);
pending = local->pending;
+ GF_ASSERT (curr_read_child >= 0);
+
if (pending[curr_read_child][idx] != 0)
- return;
+ goto out;
- /* need to set new read_child */
+ fresh_children = GF_CALLOC (priv->child_count, sizeof (*fresh_children),
+ gf_afr_mt_int32_t);
+ if (!fresh_children)
+ goto out;
for (new_read_child = 0; new_read_child < priv->child_count;
new_read_child++) {
@@ -435,15 +442,16 @@ afr_update_read_child (call_frame_t *frame, xlator_t *this, inode_t *inode,
if (pending[new_read_child][idx] == 0)
/* op just failed */
continue;
-
- break;
+ fresh_children[success_count] = new_read_child;
+ success_count++;
}
- if (new_read_child == priv->child_count)
- /* all children uneligible. leave as-is */
- return;
-
- afr_set_read_child (this, inode, new_read_child);
+ afr_inode_set_read_ctx (this, inode, fresh_children[0],
+ fresh_children);
+out:
+ if (fresh_children)
+ GF_FREE (fresh_children);
+ return;
}
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index d8939ab4d62..c6705fc68d0 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -814,6 +814,7 @@ struct xlator_dumpops dumpops = {
struct xlator_cbks cbks = {
.release = afr_release,
.releasedir = afr_releasedir,
+ .forget = afr_forget,
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 55c485f14d8..0b8f96ec8b9 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -37,6 +37,22 @@
struct _pump_private;
+typedef struct afr_inode_params_ {
+ uint64_t mask_type;
+ union {
+ gf_boolean_t value;
+ struct {
+ int32_t read_child;
+ int32_t *fresh_children;
+ } read_ctx;
+ } u;
+} afr_inode_params_t;
+
+typedef struct afr_inode_ctx_ {
+ uint64_t masks;
+ int32_t *fresh_children;//increasing order of latency
+} afr_inode_ctx_t;
+
typedef struct _afr_private {
gf_lock_t lock; /* to guard access to child_count, etc */
unsigned int child_count; /* total number of children */
@@ -121,6 +137,8 @@ typedef struct {
ia_type_t type; /* st_mode of the entry we're doing
self-heal on */
+ inode_t *inode; /* inode on which the self-heal is
+ performed on */
/* Function to call to unwind. If self-heal is being done in the
background, this function will be called as soon as possible. */
@@ -140,8 +158,10 @@ typedef struct {
/* array containing if the lookups succeeded in the order of response
*/
- int32_t *child_success;
+ int32_t *success_children;
int success_count;
+ /* array containing the fresh children found in the self-heal process */
+ int32_t *fresh_children;
/* array of errno's, one for each child */
int *child_errno;
@@ -311,6 +331,7 @@ typedef struct _afr_local {
glusterfs_fop_t fop;
unsigned char *child_up;
+ int32_t *fresh_children; //in the order of response
int32_t *child_errno;
@@ -354,8 +375,8 @@ typedef struct _afr_local {
struct iatt *postparents;
struct iatt *bufs;
int32_t read_child;
- int32_t *child_success;//in the order of response
int32_t *sources;
+ int32_t *success_children;
} lookup;
struct {
@@ -732,11 +753,12 @@ int pump_start (call_frame_t *frame, xlator_t *this);
int
afr_fd_ctx_set (xlator_t *this, fd_t *fd);
-uint64_t
-afr_read_child (xlator_t *this, inode_t *inode);
+int32_t
+afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children);
void
-afr_set_read_child (xlator_t *this, inode_t *inode, int32_t read_child);
+afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
+ int32_t *fresh_children);
void
afr_build_parent_loc (loc_t *parent, loc_t *child);
@@ -772,7 +794,7 @@ afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
void
afr_set_opendir_done (xlator_t *this, inode_t *inode);
-uint64_t
+gf_boolean_t
afr_is_opendir_done (xlator_t *this, inode_t *inode);
void
@@ -829,12 +851,24 @@ int32_t
afr_marker_getxattr (call_frame_t *frame, xlator_t *this,
loc_t *loc, const char *name,afr_local_t *local, afr_private_t *priv );
+int32_t *
+afr_fresh_children_create (int32_t child_count);
+
int
AFR_LOCAL_INIT (afr_local_t *local, afr_private_t *priv);
int
afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
transaction_lk_type_t lk_type);
+int
+afr_select_read_child_from_policy (int32_t *fresh_children, int32_t child_count,
+ int32_t prev_read_child,
+ int32_t config_read_child, int32_t *sources);
+
+void
+afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
+ int32_t *fresh_children, int32_t prev_read_child,
+ int32_t config_read_child);
/**
* first_up_child - return the index of the first child that is up
@@ -862,4 +896,15 @@ afr_first_up_child (afr_private_t *priv)
return ret;
}
+int32_t
+afr_next_call_child (int32_t *fresh_children, size_t child_count,
+ int32_t *last_index, int32_t read_child);
+void
+afr_get_fresh_children (int32_t *success_children, int32_t *sources,
+ int32_t *fresh_children, unsigned int child_count);
+void
+afr_fresh_children_add_child (int32_t *fresh_children, int32_t child,
+ int32_t child_count);
+void
+afr_reset_children (int32_t *fresh_children, int32_t child_count);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
index 48ce2c94568..300b0850443 100644
--- a/xlators/cluster/afr/src/pump.c
+++ b/xlators/cluster/afr/src/pump.c
@@ -1536,7 +1536,14 @@ pump_getxattr (call_frame_t *frame, xlator_t *this,
return 0;
}
- read_child = afr_read_child (this, loc->inode);
+ local->fresh_children = GF_CALLOC (priv->child_count,
+ sizeof (*local->fresh_children),
+ gf_afr_mt_int32_t);
+ if (local->fresh_children) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
if (read_child >= 0) {
call_child = read_child;