summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAshish Pandey <aspandey@redhat.com>2016-02-04 12:07:36 +0530
committerXavier Hernandez <xhernandez@datalab.es>2016-02-08 23:20:43 -0800
commit12fbcd22d80be9cdd7e60341a173741d1d00f711 (patch)
treebd6588b3a0270f5fb01376f70897c4515a0e3405
parentdd8306b2f2e38f5de1386461d7f33ac0b64f2886 (diff)
cluster/ec: Automate heal for replace brick
Problem: After a replace brick command, newly added brick does not contain data which existed on old brick. Solution: Do getxattr after initialization of all the bricks. This will trigger heal for brick root as soon as it finds the version mismatch on newly added brick. Removing tests from ec-new-entry.t which were required to simulate automation of heal after replace brick. Change-Id: I08e3dfa565374097f6c08856325ea77727437e11 BUG: 1304686 Signed-off-by: Ashish Pandey <aspandey@redhat.com> Reviewed-on: http://review.gluster.org/13353 Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
-rw-r--r--tests/basic/ec/ec-new-entry.t10
-rw-r--r--tests/basic/ec/ec-read-policy.t2
-rw-r--r--xlators/cluster/ec/src/ec-heal.c60
-rw-r--r--xlators/cluster/ec/src/ec-helpers.h3
-rw-r--r--xlators/cluster/ec/src/ec.c5
5 files changed, 69 insertions, 11 deletions
diff --git a/tests/basic/ec/ec-new-entry.t b/tests/basic/ec/ec-new-entry.t
index a08bae7a810..3a5c2ee11ec 100644
--- a/tests/basic/ec/ec-new-entry.t
+++ b/tests/basic/ec/ec-new-entry.t
@@ -23,16 +23,6 @@ touch $M0/11
for i in {1..10}; do dd if=/dev/zero of=$M0/$i bs=1M count=1; done
TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}5 $H0:$B0/${V0}6 commit force
EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
-TEST kill_brick $V0 $H0 $B0/${V0}6
-#simulate pending heal on just the root directory
-TEST touch $M0/a
-TEST rm -f $M0/a
-EXPECT_WITHIN $HEAL_TIMEOUT "^5$" get_pending_heal_count $V0
-TEST $CLI volume start $V0 force
-EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count_shd $V0 0
-EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
-TEST $CLI volume heal $V0
EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
#ls -l gives "Total" line so number of lines will be 1 more
EXPECT "^12$" num_entries $B0/${V0}6
diff --git a/tests/basic/ec/ec-read-policy.t b/tests/basic/ec/ec-read-policy.t
index 891508063e6..4a85bf6ac53 100644
--- a/tests/basic/ec/ec-read-policy.t
+++ b/tests/basic/ec/ec-read-policy.t
@@ -37,7 +37,7 @@ TEST dd if=/dev/zero of=$M0/1 bs=1M count=4
#Perform reads now from file on the mount, this only tests dispatch_min
TEST dd if=$M0/1 of=/dev/null bs=1M count=4
#TEST that reads are executed on all bricks
-rr_reads=$($CLI volume profile $V0 info cumulative| grep READ | wc -l)
+rr_reads=$($CLI volume profile $V0 info cumulative| grep -w READ | wc -l)
EXPECT "^6$" echo $rr_reads
TEST $CLI volume profile $V0 info clear
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index d80cdabc5ab..94ff4757b4d 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -2554,3 +2554,63 @@ fail:
if (func)
func (frame, NULL, this, -1, err, 0, 0, 0, NULL);
}
+
+int
+ec_replace_heal_done (int ret, call_frame_t *heal, void *opaque)
+{
+ ec_t *ec = opaque;
+
+ gf_msg_debug (ec->xl->name, 0,
+ "getxattr on bricks is done ret %d", ret);
+ return 0;
+}
+
+int32_t
+ec_replace_heal (ec_t *ec, inode_t *inode)
+{
+ loc_t loc = {0};
+ int ret = 0;
+
+ loc.inode = inode_ref (inode);
+ gf_uuid_copy (loc.gfid, inode->gfid);
+ ret = syncop_getxattr (ec->xl, &loc, NULL, EC_XATTR_HEAL,
+ NULL, NULL);
+ if (ret < 0)
+ gf_msg_debug (ec->xl->name, 0,
+ "Heal failed for replace brick ret = %d", ret);
+
+ loc_wipe (&loc);
+ return ret;
+}
+
+int32_t
+ec_replace_brick_heal_wrap (void *opaque)
+{
+ ec_t *ec = opaque;
+ inode_table_t *itable = NULL;
+ int32_t ret = -1;
+
+ if (ec->xl->itable)
+ itable = ec->xl->itable;
+ else
+ goto out;
+ ret = ec_replace_heal (ec, itable->root);
+out:
+ return ret;
+}
+
+int32_t
+ec_launch_replace_heal (ec_t *ec)
+{
+ int ret = -1;
+
+ if (!ec)
+ return ret;
+ ret = synctask_new (ec->xl->ctx->env, ec_replace_brick_heal_wrap,
+ ec_replace_heal_done, NULL, ec);
+ if (ret < 0) {
+ gf_msg_debug (ec->xl->name, 0,
+ "Heal failed for replace brick ret = %d", ret);
+ }
+ return ret;
+}
diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h
index 14243df54f3..1f39da2c09f 100644
--- a/xlators/cluster/ec/src/ec-helpers.h
+++ b/xlators/cluster/ec/src/ec-helpers.h
@@ -62,6 +62,9 @@ ec_filter_internal_xattrs (dict_t *xattr);
gf_boolean_t
ec_is_data_fop (glusterfs_fop_t fop);
+
+int32_t
+ec_launch_replace_heal (ec_t *ec);
/*
gf_boolean_t
ec_is_metadata_fop (glusterfs_fop_t fop);
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index aad24e4dab7..1f15af10dab 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -515,6 +515,11 @@ unlock:
if (propagate) {
error = default_notify (this, event, data);
}
+
+ if (ec->shd.iamshd &&
+ ec->xl_notify_count == ec->nodes) {
+ ec_launch_replace_heal (ec);
+ }
out:
return error;
}