summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pranithk@gluster.com>2012-04-03 08:12:34 +0530
committerVijay Bellur <vijay@gluster.com>2012-04-05 05:07:19 -0700
commit7d62749f14663ea6c0000a4aab2c32041cbb4e75 (patch)
treea7b8228ece64b1e842a4f29b56a23b80d8173019
parent1f3a0dd4742a2fcd3215aee4a5e22125d7ea4f4d (diff)
self-heald: Find self-heal failures, split-brain
Change-Id: Ib967f0fe0b537fe60e51d7d05462b58a7f16596e BUG: 806745 Signed-off-by: Pranith Kumar K <pranithk@gluster.com> Reviewed-on: http://review.gluster.com/3077 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-by: Vijay Bellur <vijay@gluster.com>
-rw-r--r--xlators/cluster/afr/src/afr-common.c15
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c5
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c3
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c19
-rw-r--r--xlators/cluster/afr/src/afr.h5
6 files changed, 36 insertions, 13 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 2cfe92acfca..9874b2619b0 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1301,7 +1301,8 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
xlator_t *this),
int (*unwind) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno))
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed))
{
afr_local_t *local = NULL;
char sh_type_str[256] = {0,};
@@ -1527,9 +1528,12 @@ afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
int
afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed)
{
afr_local_t *local = NULL;
+ int ret = -1;
+ dict_t *xattr = NULL;
local = frame->local;
@@ -1544,6 +1548,13 @@ afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
}
afr_lookup_done_success_action (frame, this, _gf_true);
+ xattr = local->cont.lookup.xattr;
+ if (xattr) {
+ ret = dict_set_int32 (xattr, "sh-failed", sh_failed);
+ if (ret)
+ gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
+ "sh-failed to %d", local->loc.path, sh_failed);
+ }
out:
AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
local->cont.lookup.inode, &local->cont.lookup.buf,
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index 5abbd9c138a..87282256575 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -51,7 +51,7 @@
int
afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+ int32_t op_errno, int32_t sh_failed)
{
afr_local_t *local = NULL;
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index bd0e0462616..af5aadc3c96 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -2048,7 +2048,8 @@ afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
FRAME_SU_UNDO (bgsh_frame, afr_local_t);
if (!sh->unwound && sh->unwind) {
- sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno);
+ sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
+ sh->op_failed);
}
if (sh->background) {
@@ -2187,7 +2188,7 @@ afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
out:
if (op_errno) {
- orig_sh->unwind (frame, this, -1, op_errno);
+ orig_sh->unwind (frame, this, -1, op_errno, 1);
if (sh_frame)
AFR_STACK_DESTROY (sh_frame);
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index d362d5add6f..69494157cbb 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -769,7 +769,8 @@ afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
}
if (sh->background && sh->unwind) {
- sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno);
+ sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
+ sh->op_failed);
sh->unwound = _gf_true;
}
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 3068d5c46d0..55ede78ad63 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -277,7 +277,7 @@ out:
void
_crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child,
- int32_t op_ret, int32_t op_errno,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr_rsp,
afr_crawl_data_t *crawl_data)
{
int ret = 0;
@@ -286,6 +286,8 @@ _crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child,
eh_t *eh = NULL;
char *path = NULL;
shd_event_t *event = NULL;
+ int32_t sh_failed = 0;
+ gf_boolean_t split_brain = 0;
priv = this->private;
shd = &priv->shd;
@@ -307,9 +309,12 @@ _crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child,
}
}
- if (op_ret < 0 && op_errno == EIO)
+ if (xattr_rsp)
+ ret = dict_get_int32 (xattr_rsp, "sh-failed", &sh_failed);
+ split_brain = afr_is_split_brain (this, child->inode);
+ if ((op_ret < 0 && op_errno == EIO) || split_brain)
eh = shd->split_brain;
- else if (op_ret < 0)
+ else if ((op_ret < 0) || sh_failed)
eh = shd->heal_failed;
else
eh = shd->healed;
@@ -338,6 +343,7 @@ _self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *ent
{
struct iatt parentbuf = {0};
int ret = 0;
+ dict_t *xattr_rsp = NULL;
if (uuid_is_null (child->gfid))
gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path);
@@ -346,8 +352,11 @@ _self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *ent
uuid_utoa (child->gfid));
ret = syncop_lookup (this, child, NULL,
- iattr, NULL, &parentbuf);
- _crawl_post_sh_action (this, parent, child, ret, errno, crawl_data);
+ iattr, &xattr_rsp, &parentbuf);
+ _crawl_post_sh_action (this, parent, child, ret, errno, xattr_rsp,
+ crawl_data);
+ if (xattr_rsp)
+ dict_unref (xattr_rsp);
return ret;
}
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 815f4667c0b..fccb39b1a86 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -191,7 +191,7 @@ typedef struct {
background, this function will be called as soon as possible. */
int (*unwind) (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno);
+ int32_t op_errno, int32_t sh_failed);
/* End of external interface members */
@@ -1016,7 +1016,8 @@ afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
xlator_t *this),
int (*unwind) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno));
+ int32_t op_ret, int32_t op_errno,
+ int32_t sh_failed));
int
afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
int need_open_count, int *need_open);