diff options
-rwxr-xr-x | tests/basic/ec/ec-data-heal.t | 87 | ||||
-rw-r--r-- | tests/include.rc | 1 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-common.c | 11 | ||||
-rw-r--r-- | xlators/cluster/ec/src/ec-heal.c | 17 |
4 files changed, 107 insertions, 9 deletions
diff --git a/tests/basic/ec/ec-data-heal.t b/tests/basic/ec/ec-data-heal.t new file mode 100755 index 00000000000..4599c8a336b --- /dev/null +++ b/tests/basic/ec/ec-data-heal.t @@ -0,0 +1,87 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +#This test checks data corruption after heal while IO is going on + +cleanup +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2} +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 + +############ Start IO ########### +TEST touch $M0/file +#start background IO on file +dd if=/dev/urandom of=$M0/file conv=fdatasync & +iopid=$(echo $!) + + +############ Kill and start brick0 for heal ########### +brick0=$(ps -p $(get_brick_pid $V0 $H0 $B0/${V0}0) -o args) +WORDTOREMOVE=COMMAND +brick0=${brick0//$WORDTOREMOVE/} +TEST kill_brick $V0 $H0 $B0/${V0}0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0 +#sleep so that data can be written which will be healed later +sleep 10 +TEST eval $brick0 +##wait for heal info to become 0 and kill IO +EXPECT_WITHIN $IO_HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 +kill $iopid +EXPECT_WITHIN $IO_HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 + +############### Check md5sum ######################### + +## unmount and mount get md5sum after killing brick0 + +brick0=$(ps -p $(get_brick_pid $V0 $H0 $B0/${V0}0) -o args) +WORDTOREMOVE=COMMAND +brick0=${brick0//$WORDTOREMOVE/} +TEST kill_brick $V0 $H0 $B0/${V0}0 + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0 +mdsum0=`md5sum $M0/file | awk '{print $1}'` +TEST eval $brick0 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 + +## unmount and mount get md5sum after killing brick1 + +brick1=$(ps -p $(get_brick_pid $V0 $H0 $B0/${V0}1) -o args) +WORDTOREMOVE=COMMAND +brick1=${brick1//$WORDTOREMOVE/} +TEST kill_brick $V0 $H0 $B0/${V0}1 + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0 +mdsum1=`md5sum $M0/file | awk '{print $1}'` +TEST eval $brick1 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 + +## unmount and mount get md5sum after killing brick2 + +brick2=$(ps -p $(get_brick_pid $V0 $H0 $B0/${V0}2) -o args) +WORDTOREMOVE=COMMAND +brick2=${brick2//$WORDTOREMOVE/} +TEST kill_brick $V0 $H0 $B0/${V0}2 + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; +EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0 +mdsum2=`md5sum $M0/file | awk '{print $1}'` +TEST eval $brick2 +EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 + +# compare all the three md5sums +EXPECT "$mdsum0" echo $mdsum1 +EXPECT "$mdsum0" echo $mdsum2 +EXPECT "$mdsum1" echo $mdsum2 + +cleanup diff --git a/tests/include.rc b/tests/include.rc index d92361006eb..ed8775107d7 100644 --- a/tests/include.rc +++ b/tests/include.rc @@ -76,6 +76,7 @@ PROBE_TIMEOUT=60 REBALANCE_TIMEOUT=360 REOPEN_TIMEOUT=20 HEAL_TIMEOUT=80 +IO_HEAL_TIMEOUT=120 MARKER_UPDATE_TIMEOUT=20 JANITOR_TIMEOUT=60 UMOUNT_TIMEOUT=5 diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c index 2231a8da1f5..284c2cd5a62 100644 --- a/xlators/cluster/ec/src/ec-common.c +++ b/xlators/cluster/ec/src/ec-common.c @@ -1753,6 +1753,9 @@ ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk, } } + if (fop->healing) { + lock->healing = fop->healing & (fop->good | fop->remaining); + } ec_lock_update_good(lock, fop); lock->exclusive -= (fop->flags & EC_FLAG_LOCK_SHARED) == 0; @@ -1950,6 +1953,8 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version, ec_lock_t *lock; ec_inode_t *ctx; dict_t * dict; + uintptr_t update_on = 0; + int32_t err = -ENOMEM; fop = link->fop; @@ -2003,12 +2008,14 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version, fop->frame->root->uid = 0; fop->frame->root->gid = 0; + update_on = lock->good_mask | lock->healing; + if (link->lock->fd == NULL) { - ec_xattrop(fop->frame, fop->xl, lock->good_mask, EC_MINIMUM_MIN, + ec_xattrop(fop->frame, fop->xl, update_on, EC_MINIMUM_MIN, ec_update_size_version_done, link, &link->lock->loc, GF_XATTROP_ADD_ARRAY64, dict, NULL); } else { - ec_fxattrop(fop->frame, fop->xl, lock->good_mask, EC_MINIMUM_MIN, + ec_fxattrop(fop->frame, fop->xl, update_on, EC_MINIMUM_MIN, ec_update_size_version_done, link, link->lock->fd, GF_XATTROP_ADD_ARRAY64, dict, NULL); } diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index afe7833f385..fae31778532 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -1905,7 +1905,7 @@ ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size, heal->fd = fd_ref (fd); heal->xl = ec->xl; heal->data = &barrier; - syncbarrier_init (heal->data); + size = ec_adjust_size (ec, size, 0); heal->total_size = size; heal->size = (128 * GF_UNIT_KB * (ec->self_heal_window_size)); /* We need to adjust the size to a multiple of the stripe size of the @@ -1943,13 +1943,15 @@ ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size, } int -__ec_heal_trim_sinks (call_frame_t *frame, ec_t *ec, fd_t *fd, - unsigned char *healed_sinks, unsigned char *trim) +__ec_heal_trim_sinks (call_frame_t *frame, ec_t *ec, + fd_t *fd, unsigned char *healed_sinks, + unsigned char *trim, uint64_t size) { default_args_cbk_t *replies = NULL; unsigned char *output = NULL; int ret = 0; int i = 0; + off_t trim_offset = 0; EC_REPLIES_ALLOC (replies, ec->nodes); output = alloca0 (ec->nodes); @@ -1958,9 +1960,9 @@ __ec_heal_trim_sinks (call_frame_t *frame, ec_t *ec, fd_t *fd, ret = 0; goto out; } - + trim_offset = ec_adjust_size (ec, size, 1); ret = cluster_ftruncate (ec->xl_list, trim, ec->nodes, replies, output, - frame, ec->xl, fd, 0, NULL); + frame, ec->xl, fd, trim_offset, NULL); for (i = 0; i < ec->nodes; i++) { if (!output[i] && trim[i]) healed_sinks[i] = 0; @@ -2014,7 +2016,7 @@ ec_data_undo_pending (call_frame_t *frame, ec_t *ec, fd_t *fd, dict_t *xattr, if ((memcmp (versions_xattr, allzero, sizeof (allzero)) == 0) && (memcmp (dirty_xattr, allzero, sizeof (allzero)) == 0) && - (size == 0)) { + (size_xattr == 0)) { ret = 0; goto out; } @@ -2220,7 +2222,8 @@ __ec_heal_data (call_frame_t *frame, ec_t *ec, fd_t *fd, unsigned char *heal_on, if (ret < 0) goto unlock; - ret = __ec_heal_trim_sinks (frame, ec, fd, healed_sinks, trim); + ret = __ec_heal_trim_sinks (frame, ec, fd, healed_sinks, + trim, size[source]); } unlock: cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, replies, output, |