cluster/ec: Fix write size in self-heal

Self-heal was always using a fixed block size to heal a file. This was incorrect for dispersed volumes with a number of data bricks not being a power of 2. This patch adjusts the block size to a multiple of the stripe size of the volume. It also propagates errors detected during the data heal to stop healing the file and not mark it as healed. Change-Id: I9ee3fde98a9e5d6116fd096ceef88686fd1d28e2 BUG: 1251446 Signed-off-by: Xavier Hernandez <xhernandez@datalab.es> Reviewed-on: http://review.gluster.org/11862 Tested-by: NetBSD Build System <jenkins@build.gluster.org> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
author: Xavier Hernandez <xhernandez@datalab.es> 2015-08-07 12:37:52 +0200
committer: Pranith Kumar Karampuri <pkarampu@redhat.com> 2015-08-14 02:09:46 -0700
commit: 289d00369f0ddb78f534735f7d3bf86268adac60 (patch)
tree: cbc347422a3871c8ea1a24ce8b21ec1ac7902962
parent: 7641eb8b469a6dd4db6db59d2a5ef4d5a65e1a61 (diff)
3 files changed, 60 insertions, 0 deletions
diff --git a/tests/bugs/disperse/bug-1251446.t b/tests/bugs/disperse/bug-1251446.t
new file mode 100644
index 00000000000..f805539b946
--- /dev/null
+++ b/tests/bugs/disperse/bug-1251446.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 4 redundancy 1 $H0:$B0/${V0}{0..3}
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+TEST dd if=/dev/urandom of=$M0/test1 bs=1024k count=2
+cs=$(sha1sum $M0/test1 | awk '{ print $1 }')
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT '3' online_brick_count
+
+TEST cp $M0/test1 $M0/test2
+EXPECT "$cs" echo $(sha1sum $M0/test2 | awk '{ print $1 }')
+
+TEST $CLI volume start $V0 force
+EXPECT '4' online_brick_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid
+TEST $CLI volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+EXPECT "699392" stat -c "%s" $B0/${V0}0/test2
+
+# force cache clear
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT '3' online_brick_count
+
+EXPECT "$cs" echo $(sha1sum $M0/test2 | awk '{ print $1 }')
+
+## cleanup
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
index 285f71e702d..8a48a7ca824 100644
--- a/xlators/cluster/ec/src/ec-data.h
+++ b/xlators/cluster/ec/src/ec-data.h
@@ -283,6 +283,7 @@ struct _ec_heal
     fd_t             *fd;
     int32_t           partial;
     int32_t           done;
+    int32_t           error;
     gf_boolean_t      nameheal;
     uintptr_t         available;
     uintptr_t         good;
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index d095aceec00..f76839db38f 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -1777,6 +1777,7 @@ ec_heal_block_done (call_frame_t *frame, void *cookie, xlator_t *this,
 
         fop->heal = NULL;
         heal->fop = NULL;
+        heal->error = op_ret < 0 ? op_errno : 0;
         syncbarrier_wake (heal->data);
         return 0;
 }
@@ -1787,6 +1788,9 @@ ec_sync_heal_block (call_frame_t *frame, xlator_t *this, ec_heal_t *heal)
         ec_heal_block (frame, this, heal->bad|heal->good, EC_MINIMUM_ONE,
                        ec_heal_block_done, heal);
         syncbarrier_wait (heal->data, 1);
+        if (heal->error != 0) {
+                return -heal->error;
+        }
         if (heal->bad == 0)
                 return -ENOTCONN;
         return 0;
@@ -1812,6 +1816,11 @@ ec_rebuild_data (call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
         pool = ec->xl->ctx->iobuf_pool;
         heal->total_size = size;
         heal->size = iobpool_default_pagesize (pool);
+        /* We need to adjust the size to a multiple of the stripe size of the
+         * volume. Otherwise writes would need to fill gaps (head and/or tail)
+         * with existent data from the bad bricks. This could be garbage on a
+         * damaged file or it could fail if there aren't enough bricks. */
+        heal->size -= heal->size % ec->stripe_size;
         heal->bad       = ec_char_array_to_mask (healed_sinks, ec->nodes);
         heal->good      = ec_char_array_to_mask (sources, ec->nodes);
         heal->iatt.ia_type = IA_IFREG;
author	Xavier Hernandez <xhernandez@datalab.es>	2015-08-07 12:37:52 +0200
committer	Pranith Kumar Karampuri <pkarampu@redhat.com>	2015-08-14 02:09:46 -0700
commit	289d00369f0ddb78f534735f7d3bf86268adac60 (patch)
tree	cbc347422a3871c8ea1a24ce8b21ec1ac7902962
parent	7641eb8b469a6dd4db6db59d2a5ef4d5a65e1a61 (diff)