From f397d7edb85c1e4b78c4cac176dc8a0afe8cf9a8 Mon Sep 17 00:00:00 2001 From: Ravishankar N Date: Fri, 23 Jan 2015 11:12:54 +0530 Subject: afr: Don't write to sparse regions of sink. Backport of http://review.gluster.org/9480 Problem: When data-self-heal-algorithm is set to 'full', shd just reads from source and writes to sink. If source file happened to be sparse (VM workloads), we end up actually writing 0s to the corresponding regions of the sink causing it to lose its sparseness. Fix: If the source file is sparse, and the data read from source and sink are both zeros for that range, skip writing that range to the sink. Change-Id: Id23d953fe2c8c64cde5ce3530b52ef91a7583891 BUG: 1187547 Signed-off-by: Ravishankar N Reviewed-on: http://review.gluster.org/9515 Tested-by: Gluster Build System Reviewed-by: Pranith Kumar Karampuri Reviewed-by: Raghavendra Bhat --- tests/basic/afr/sparse-file-self-heal.t | 17 ++++++++++++ xlators/cluster/afr/src/afr-self-heal-data.c | 41 ++++++++++++++++++++++++++-- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/tests/basic/afr/sparse-file-self-heal.t b/tests/basic/afr/sparse-file-self-heal.t index fa8375b1c8d..01775bd3ed5 100644 --- a/tests/basic/afr/sparse-file-self-heal.t +++ b/tests/basic/afr/sparse-file-self-heal.t @@ -17,6 +17,7 @@ TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k TEST dd if=/dev/urandom of=$M0/bigger2big count=1 bs=2048k TEST dd if=/dev/urandom of=$M0/big2bigger count=1 bs=1024k +TEST truncate -s 1G $M0/FILE TEST kill_brick $V0 $H0 $B0/${V0}0 @@ -38,6 +39,10 @@ bigger2big_md5sum=$(md5sum $M0/bigger2big | awk '{print $1}') TEST truncate -s 2M $M0/big2bigger big2bigger_md5sum=$(md5sum $M0/big2bigger | awk '{print $1}') +#Write data to file and restore its sparseness +TEST dd if=/dev/urandom of=$M0/FILE count=1 bs=131072 +TEST truncate -s 1G $M0/FILE + $CLI volume start $V0 force EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status @@ -66,6 +71,9 @@ EXPECT "0" has_holes $B0/${V0}0/small EXPECT "0" has_holes $B0/${V0}0/bigger2big EXPECT "1" has_holes $B0/${V0}0/big2bigger +#Check that self-heal has not written 0s to sink and made it non-sparse. +USED_KB=`du -s $B0/${V0}0/FILE|cut -f1` +TEST [ $USED_KB -lt 1000000 ] TEST rm -f $M0/* #check the same tests with diff self-heal @@ -74,6 +82,7 @@ TEST $CLI volume set $V0 data-self-heal-algorithm diff TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k TEST dd if=/dev/urandom of=$M0/big2bigger count=1 bs=1024k TEST dd if=/dev/urandom of=$M0/bigger2big count=1 bs=2048k +TEST truncate -s 1G $M0/FILE TEST kill_brick $V0 $H0 $B0/${V0}0 @@ -95,6 +104,10 @@ bigger2big_md5sum=$(md5sum $M0/bigger2big | awk '{print $1}') TEST truncate -s 2M $M0/big2bigger big2bigger_md5sum=$(md5sum $M0/big2bigger | awk '{print $1}') +#Write data to file and restore its sparseness +TEST dd if=/dev/urandom of=$M0/FILE count=1 bs=131072 +TEST truncate -s 1G $M0/FILE + $CLI volume start $V0 force EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status @@ -118,4 +131,8 @@ EXPECT "1" has_holes $B0/${V0}0/big2bigger EXPECT "0" has_holes $B0/${V0}0/bigger2big EXPECT "0" has_holes $B0/${V0}0/small +#Check that self-heal has not written 0s to sink and made it non-sparse. +USED_KB=`du -s $B0/${V0}0/FILE|cut -f1` +TEST [ $USED_KB -lt 1000000 ] + cleanup diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index a434b9e6ba1..5637de365e0 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -111,11 +111,38 @@ __afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this, } +static gf_boolean_t +__afr_is_sink_zero_filled (xlator_t *this, fd_t *fd, size_t size, + off_t offset, int sink) +{ + afr_private_t *priv = NULL; + struct iobref *iobref = NULL; + struct iovec *iovec = NULL; + int count = 0; + int ret = 0; + gf_boolean_t zero_filled = _gf_false; + + priv = this->private; + ret = syncop_readv (priv->children[sink], fd, size, offset, 0, &iovec, + &count, &iobref); + if (ret < 0) + goto out; + ret = iov_0filled (iovec, count); + if (!ret) + zero_filled = _gf_true; +out: + if (iovec) + GF_FREE (iovec); + if (iobref) + iobref_unref (iobref); + return zero_filled; +} + static int __afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd, int source, unsigned char *healed_sinks, off_t offset, size_t size, - struct afr_reply *replies) + struct afr_reply *replies, int type) { struct iovec *iovec = NULL; int count = 0; @@ -165,6 +192,16 @@ __afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd, (iov_0filled (iovec, count) == 0)) continue; + /* Avoid filling up sparse regions of the sink with 0-filled + * writes.*/ + if (type == AFR_SELFHEAL_DATA_FULL && + HAS_HOLES ((&replies[source].poststat)) && + ((offset + size) <= replies[i].poststat.ia_size) && + (iov_0filled (iovec, count) == 0) && + __afr_is_sink_zero_filled (this, fd, size, offset, i)) { + continue; + } + ret = syncop_writev (priv->children[i], fd, iovec, count, offset, iobref, 0); if (ret != iov_length (iovec, count)) { @@ -216,7 +253,7 @@ afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd, ret = __afr_selfheal_data_read_write (frame, this, fd, source, healed_sinks, offset, size, - replies); + replies, type); } unlock: afr_selfheal_uninodelk (frame, this, fd->inode, this->name, -- cgit