summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rwxr-xr-xtests/basic/ec/ec-data-heal.t87
-rw-r--r--tests/include.rc1
-rw-r--r--xlators/cluster/ec/src/ec-common.c11
-rw-r--r--xlators/cluster/ec/src/ec-heal.c17
4 files changed, 107 insertions, 9 deletions
diff --git a/tests/basic/ec/ec-data-heal.t b/tests/basic/ec/ec-data-heal.t
new file mode 100755
index 00000000000..4599c8a336b
--- /dev/null
+++ b/tests/basic/ec/ec-data-heal.t
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This test checks data corruption after heal while IO is going on
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+############ Start IO ###########
+TEST touch $M0/file
+#start background IO on file
+dd if=/dev/urandom of=$M0/file conv=fdatasync &
+iopid=$(echo $!)
+
+
+############ Kill and start brick0 for heal ###########
+brick0=$(ps -p $(get_brick_pid $V0 $H0 $B0/${V0}0) -o args)
+WORDTOREMOVE=COMMAND
+brick0=${brick0//$WORDTOREMOVE/}
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+#sleep so that data can be written which will be healed later
+sleep 10
+TEST eval $brick0
+##wait for heal info to become 0 and kill IO
+EXPECT_WITHIN $IO_HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+kill $iopid
+EXPECT_WITHIN $IO_HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############### Check md5sum #########################
+
+## unmount and mount get md5sum after killing brick0
+
+brick0=$(ps -p $(get_brick_pid $V0 $H0 $B0/${V0}0) -o args)
+WORDTOREMOVE=COMMAND
+brick0=${brick0//$WORDTOREMOVE/}
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+mdsum0=`md5sum $M0/file | awk '{print $1}'`
+TEST eval $brick0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+## unmount and mount get md5sum after killing brick1
+
+brick1=$(ps -p $(get_brick_pid $V0 $H0 $B0/${V0}1) -o args)
+WORDTOREMOVE=COMMAND
+brick1=${brick1//$WORDTOREMOVE/}
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+mdsum1=`md5sum $M0/file | awk '{print $1}'`
+TEST eval $brick1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+## unmount and mount get md5sum after killing brick2
+
+brick2=$(ps -p $(get_brick_pid $V0 $H0 $B0/${V0}2) -o args)
+WORDTOREMOVE=COMMAND
+brick2=${brick2//$WORDTOREMOVE/}
+TEST kill_brick $V0 $H0 $B0/${V0}2
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+mdsum2=`md5sum $M0/file | awk '{print $1}'`
+TEST eval $brick2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+# compare all the three md5sums
+EXPECT "$mdsum0" echo $mdsum1
+EXPECT "$mdsum0" echo $mdsum2
+EXPECT "$mdsum1" echo $mdsum2
+
+cleanup
diff --git a/tests/include.rc b/tests/include.rc
index d92361006eb..ed8775107d7 100644
--- a/tests/include.rc
+++ b/tests/include.rc
@@ -76,6 +76,7 @@ PROBE_TIMEOUT=60
REBALANCE_TIMEOUT=360
REOPEN_TIMEOUT=20
HEAL_TIMEOUT=80
+IO_HEAL_TIMEOUT=120
MARKER_UPDATE_TIMEOUT=20
JANITOR_TIMEOUT=60
UMOUNT_TIMEOUT=5
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
index 2231a8da1f5..284c2cd5a62 100644
--- a/xlators/cluster/ec/src/ec-common.c
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -1753,6 +1753,9 @@ ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk,
}
}
+ if (fop->healing) {
+ lock->healing = fop->healing & (fop->good | fop->remaining);
+ }
ec_lock_update_good(lock, fop);
lock->exclusive -= (fop->flags & EC_FLAG_LOCK_SHARED) == 0;
@@ -1950,6 +1953,8 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
ec_lock_t *lock;
ec_inode_t *ctx;
dict_t * dict;
+ uintptr_t update_on = 0;
+
int32_t err = -ENOMEM;
fop = link->fop;
@@ -2003,12 +2008,14 @@ ec_update_size_version(ec_lock_link_t *link, uint64_t *version,
fop->frame->root->uid = 0;
fop->frame->root->gid = 0;
+ update_on = lock->good_mask | lock->healing;
+
if (link->lock->fd == NULL) {
- ec_xattrop(fop->frame, fop->xl, lock->good_mask, EC_MINIMUM_MIN,
+ ec_xattrop(fop->frame, fop->xl, update_on, EC_MINIMUM_MIN,
ec_update_size_version_done, link, &link->lock->loc,
GF_XATTROP_ADD_ARRAY64, dict, NULL);
} else {
- ec_fxattrop(fop->frame, fop->xl, lock->good_mask, EC_MINIMUM_MIN,
+ ec_fxattrop(fop->frame, fop->xl, update_on, EC_MINIMUM_MIN,
ec_update_size_version_done, link, link->lock->fd,
GF_XATTROP_ADD_ARRAY64, dict, NULL);
}
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index afe7833f385..fae31778532 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -1905,7 +1905,7 @@ ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
heal->fd = fd_ref (fd);
heal->xl = ec->xl;
heal->data = &barrier;
- syncbarrier_init (heal->data);
+ size = ec_adjust_size (ec, size, 0);
heal->total_size = size;
heal->size = (128 * GF_UNIT_KB * (ec->self_heal_window_size));
/* We need to adjust the size to a multiple of the stripe size of the
@@ -1943,13 +1943,15 @@ ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
}
int
-__ec_heal_trim_sinks (call_frame_t *frame, ec_t *ec, fd_t *fd,
- unsigned char *healed_sinks, unsigned char *trim)
+__ec_heal_trim_sinks (call_frame_t *frame, ec_t *ec,
+ fd_t *fd, unsigned char *healed_sinks,
+ unsigned char *trim, uint64_t size)
{
default_args_cbk_t *replies = NULL;
unsigned char *output = NULL;
int ret = 0;
int i = 0;
+ off_t trim_offset = 0;
EC_REPLIES_ALLOC (replies, ec->nodes);
output = alloca0 (ec->nodes);
@@ -1958,9 +1960,9 @@ __ec_heal_trim_sinks (call_frame_t *frame, ec_t *ec, fd_t *fd,
ret = 0;
goto out;
}
-
+ trim_offset = ec_adjust_size (ec, size, 1);
ret = cluster_ftruncate (ec->xl_list, trim, ec->nodes, replies, output,
- frame, ec->xl, fd, 0, NULL);
+ frame, ec->xl, fd, trim_offset, NULL);
for (i = 0; i < ec->nodes; i++) {
if (!output[i] && trim[i])
healed_sinks[i] = 0;
@@ -2014,7 +2016,7 @@ ec_data_undo_pending (call_frame_t *frame, ec_t *ec, fd_t *fd, dict_t *xattr,
if ((memcmp (versions_xattr, allzero, sizeof (allzero)) == 0) &&
(memcmp (dirty_xattr, allzero, sizeof (allzero)) == 0) &&
- (size == 0)) {
+ (size_xattr == 0)) {
ret = 0;
goto out;
}
@@ -2220,7 +2222,8 @@ __ec_heal_data (call_frame_t *frame, ec_t *ec, fd_t *fd, unsigned char *heal_on,
if (ret < 0)
goto unlock;
- ret = __ec_heal_trim_sinks (frame, ec, fd, healed_sinks, trim);
+ ret = __ec_heal_trim_sinks (frame, ec, fd, healed_sinks,
+ trim, size[source]);
}
unlock:
cluster_uninodelk (ec->xl_list, locked_on, ec->nodes, replies, output,