diff options
author | Ashish Pandey <aspandey@redhat.com> | 2016-02-04 12:07:36 +0530 |
---|---|---|
committer | Xavier Hernandez <xhernandez@datalab.es> | 2016-02-08 23:20:43 -0800 |
commit | 12fbcd22d80be9cdd7e60341a173741d1d00f711 (patch) | |
tree | bd6588b3a0270f5fb01376f70897c4515a0e3405 | |
parent | dd8306b2f2e38f5de1386461d7f33ac0b64f2886 (diff) |
cluster/ec: Automate heal for replace brick
Problem:
After a replace brick command, newly added
brick does not contain data which existed
on old brick.
Solution:
Do getxattr after initialization of all the
bricks. This will trigger heal for brick root
as soon as it finds the version mismatch on
newly added brick.
Removing tests from ec-new-entry.t which were
required to simulate automation of heal after
replace brick.
Change-Id: I08e3dfa565374097f6c08856325ea77727437e11
BUG: 1304686
Signed-off-by: Ashish Pandey <aspandey@redhat.com>
Reviewed-on: http://review.gluster.org/13353
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
-rw-r--r-- | tests/basic/ec/ec-new-entry.t | 10 | ||||
-rw-r--r-- | tests/basic/ec/ec-read-policy.t | 2 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-heal.c | 60 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-helpers.h | 3 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec.c | 5 |
5 files changed, 69 insertions, 11 deletions
diff --git a/tests/basic/ec/ec-new-entry.t b/tests/basic/ec/ec-new-entry.t index a08bae7a810..3a5c2ee11ec 100644 --- a/tests/basic/ec/ec-new-entry.t +++ b/tests/basic/ec/ec-new-entry.t @@ -23,16 +23,6 @@ touch $M0/11 for i in {1..10}; do dd if=/dev/zero of=$M0/$i bs=1M count=1; done TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}5 $H0:$B0/${V0}6 commit force EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 -TEST kill_brick $V0 $H0 $B0/${V0}6 -#simulate pending heal on just the root directory -TEST touch $M0/a -TEST rm -f $M0/a -EXPECT_WITHIN $HEAL_TIMEOUT "^5$" get_pending_heal_count $V0 -TEST $CLI volume start $V0 force -EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid -EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count_shd $V0 0 -EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 -TEST $CLI volume heal $V0 EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 #ls -l gives "Total" line so number of lines will be 1 more EXPECT "^12$" num_entries $B0/${V0}6 diff --git a/tests/basic/ec/ec-read-policy.t b/tests/basic/ec/ec-read-policy.t index 891508063e6..4a85bf6ac53 100644 --- a/tests/basic/ec/ec-read-policy.t +++ b/tests/basic/ec/ec-read-policy.t @@ -37,7 +37,7 @@ TEST dd if=/dev/zero of=$M0/1 bs=1M count=4 #Perform reads now from file on the mount, this only tests dispatch_min TEST dd if=$M0/1 of=/dev/null bs=1M count=4 #TEST that reads are executed on all bricks -rr_reads=$($CLI volume profile $V0 info cumulative| grep READ | wc -l) +rr_reads=$($CLI volume profile $V0 info cumulative| grep -w READ | wc -l) EXPECT "^6$" echo $rr_reads TEST $CLI volume profile $V0 info clear diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index d80cdabc5ab..94ff4757b4d 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -2554,3 +2554,63 @@ fail: if (func) func (frame, NULL, this, -1, err, 0, 0, 0, NULL); } + +int +ec_replace_heal_done (int ret, call_frame_t *heal, void *opaque) +{ + ec_t *ec = opaque; + + gf_msg_debug (ec->xl->name, 0, + "getxattr on bricks is done ret %d", ret); + return 0; +} + +int32_t +ec_replace_heal (ec_t *ec, inode_t *inode) +{ + loc_t loc = {0}; + int ret = 0; + + loc.inode = inode_ref (inode); + gf_uuid_copy (loc.gfid, inode->gfid); + ret = syncop_getxattr (ec->xl, &loc, NULL, EC_XATTR_HEAL, + NULL, NULL); + if (ret < 0) + gf_msg_debug (ec->xl->name, 0, + "Heal failed for replace brick ret = %d", ret); + + loc_wipe (&loc); + return ret; +} + +int32_t +ec_replace_brick_heal_wrap (void *opaque) +{ + ec_t *ec = opaque; + inode_table_t *itable = NULL; + int32_t ret = -1; + + if (ec->xl->itable) + itable = ec->xl->itable; + else + goto out; + ret = ec_replace_heal (ec, itable->root); +out: + return ret; +} + +int32_t +ec_launch_replace_heal (ec_t *ec) +{ + int ret = -1; + + if (!ec) + return ret; + ret = synctask_new (ec->xl->ctx->env, ec_replace_brick_heal_wrap, + ec_replace_heal_done, NULL, ec); + if (ret < 0) { + gf_msg_debug (ec->xl->name, 0, + "Heal failed for replace brick ret = %d", ret); + } + return ret; +} diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h index 14243df54f3..1f39da2c09f 100644 --- a/xlators/cluster/ec/src/ec-helpers.h +++ b/xlators/cluster/ec/src/ec-helpers.h @@ -62,6 +62,9 @@ ec_filter_internal_xattrs (dict_t *xattr); gf_boolean_t ec_is_data_fop (glusterfs_fop_t fop); + +int32_t +ec_launch_replace_heal (ec_t *ec); /* gf_boolean_t ec_is_metadata_fop (glusterfs_fop_t fop); diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index aad24e4dab7..1f15af10dab 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -515,6 +515,11 @@ unlock: if (propagate) { error = default_notify (this, event, data); } + + if (ec->shd.iamshd && + ec->xl_notify_count == ec->nodes) { + ec_launch_replace_heal (ec); + } out: return error; } |