summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2015-01-23 11:12:54 +0530
committerRaghavendra Bhat <raghavendra@redhat.com>2015-02-03 05:24:56 -0800
commitf397d7edb85c1e4b78c4cac176dc8a0afe8cf9a8 (patch)
tree964e187c2c8ff5e073306dd5b90f751348eafd10
parent90f35bc8e806fc615d5e2a2657a389dbdd7e2672 (diff)
afr: Don't write to sparse regions of sink.
Backport of http://review.gluster.org/9480 Problem: When data-self-heal-algorithm is set to 'full', shd just reads from source and writes to sink. If source file happened to be sparse (VM workloads), we end up actually writing 0s to the corresponding regions of the sink causing it to lose its sparseness. Fix: If the source file is sparse, and the data read from source and sink are both zeros for that range, skip writing that range to the sink. Change-Id: Id23d953fe2c8c64cde5ce3530b52ef91a7583891 BUG: 1187547 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: http://review.gluster.org/9515 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Reviewed-by: Raghavendra Bhat <raghavendra@redhat.com>
-rw-r--r--tests/basic/afr/sparse-file-self-heal.t17
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c41
2 files changed, 56 insertions, 2 deletions
diff --git a/tests/basic/afr/sparse-file-self-heal.t b/tests/basic/afr/sparse-file-self-heal.t
index fa8375b1c8d..01775bd3ed5 100644
--- a/tests/basic/afr/sparse-file-self-heal.t
+++ b/tests/basic/afr/sparse-file-self-heal.t
@@ -17,6 +17,7 @@ TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0
TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k
TEST dd if=/dev/urandom of=$M0/bigger2big count=1 bs=2048k
TEST dd if=/dev/urandom of=$M0/big2bigger count=1 bs=1024k
+TEST truncate -s 1G $M0/FILE
TEST kill_brick $V0 $H0 $B0/${V0}0
@@ -38,6 +39,10 @@ bigger2big_md5sum=$(md5sum $M0/bigger2big | awk '{print $1}')
TEST truncate -s 2M $M0/big2bigger
big2bigger_md5sum=$(md5sum $M0/big2bigger | awk '{print $1}')
+#Write data to file and restore its sparseness
+TEST dd if=/dev/urandom of=$M0/FILE count=1 bs=131072
+TEST truncate -s 1G $M0/FILE
+
$CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
@@ -66,6 +71,9 @@ EXPECT "0" has_holes $B0/${V0}0/small
EXPECT "0" has_holes $B0/${V0}0/bigger2big
EXPECT "1" has_holes $B0/${V0}0/big2bigger
+#Check that self-heal has not written 0s to sink and made it non-sparse.
+USED_KB=`du -s $B0/${V0}0/FILE|cut -f1`
+TEST [ $USED_KB -lt 1000000 ]
TEST rm -f $M0/*
#check the same tests with diff self-heal
@@ -74,6 +82,7 @@ TEST $CLI volume set $V0 data-self-heal-algorithm diff
TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k
TEST dd if=/dev/urandom of=$M0/big2bigger count=1 bs=1024k
TEST dd if=/dev/urandom of=$M0/bigger2big count=1 bs=2048k
+TEST truncate -s 1G $M0/FILE
TEST kill_brick $V0 $H0 $B0/${V0}0
@@ -95,6 +104,10 @@ bigger2big_md5sum=$(md5sum $M0/bigger2big | awk '{print $1}')
TEST truncate -s 2M $M0/big2bigger
big2bigger_md5sum=$(md5sum $M0/big2bigger | awk '{print $1}')
+#Write data to file and restore its sparseness
+TEST dd if=/dev/urandom of=$M0/FILE count=1 bs=131072
+TEST truncate -s 1G $M0/FILE
+
$CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
@@ -118,4 +131,8 @@ EXPECT "1" has_holes $B0/${V0}0/big2bigger
EXPECT "0" has_holes $B0/${V0}0/bigger2big
EXPECT "0" has_holes $B0/${V0}0/small
+#Check that self-heal has not written 0s to sink and made it non-sparse.
+USED_KB=`du -s $B0/${V0}0/FILE|cut -f1`
+TEST [ $USED_KB -lt 1000000 ]
+
cleanup
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index a434b9e6ba1..5637de365e0 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -111,11 +111,38 @@ __afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this,
}
+static gf_boolean_t
+__afr_is_sink_zero_filled (xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, int sink)
+{
+ afr_private_t *priv = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec *iovec = NULL;
+ int count = 0;
+ int ret = 0;
+ gf_boolean_t zero_filled = _gf_false;
+
+ priv = this->private;
+ ret = syncop_readv (priv->children[sink], fd, size, offset, 0, &iovec,
+ &count, &iobref);
+ if (ret < 0)
+ goto out;
+ ret = iov_0filled (iovec, count);
+ if (!ret)
+ zero_filled = _gf_true;
+out:
+ if (iovec)
+ GF_FREE (iovec);
+ if (iobref)
+ iobref_unref (iobref);
+ return zero_filled;
+}
+
static int
__afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd,
int source, unsigned char *healed_sinks,
off_t offset, size_t size,
- struct afr_reply *replies)
+ struct afr_reply *replies, int type)
{
struct iovec *iovec = NULL;
int count = 0;
@@ -165,6 +192,16 @@ __afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd,
(iov_0filled (iovec, count) == 0))
continue;
+ /* Avoid filling up sparse regions of the sink with 0-filled
+ * writes.*/
+ if (type == AFR_SELFHEAL_DATA_FULL &&
+ HAS_HOLES ((&replies[source].poststat)) &&
+ ((offset + size) <= replies[i].poststat.ia_size) &&
+ (iov_0filled (iovec, count) == 0) &&
+ __afr_is_sink_zero_filled (this, fd, size, offset, i)) {
+ continue;
+ }
+
ret = syncop_writev (priv->children[i], fd, iovec, count,
offset, iobref, 0);
if (ret != iov_length (iovec, count)) {
@@ -216,7 +253,7 @@ afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = __afr_selfheal_data_read_write (frame, this, fd, source,
healed_sinks, offset, size,
- replies);
+ replies, type);
}
unlock:
afr_selfheal_uninodelk (frame, this, fd->inode, this->name,