diff options
author | Ravishankar N <ravishankar@redhat.com> | 2015-01-23 11:12:54 +0530 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2015-01-30 04:02:43 -0800 |
commit | 0f84f8e8048367737a2dd6ddf0c57403e757441d (patch) | |
tree | ec5a1e4764cefb06d6c4d85367750fb219720121 | |
parent | e8eff5cf350b24ad628de8092d7927eab1e23c32 (diff) |
afr: Don't write to sparse regions of sink.
Problem:
When data-self-heal-algorithm is set to 'full', shd just reads from
source and writes to sink. If source file happened to be sparse (VM
workloads), we end up actually writing 0s to the corresponding regions
of the sink causing it to lose its sparseness.
Fix:
If the source file is sparse, and the data read from source and sink are
both zeros for that range, skip writing that range to the sink.
Change-Id: I787b06a553803247f43a40c00139cb483a22f9ca
BUG: 1166020
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: http://review.gluster.org/9480
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
-rw-r--r-- | tests/basic/afr/sparse-file-self-heal.t | 17 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 41 |
2 files changed, 56 insertions, 2 deletions
diff --git a/tests/basic/afr/sparse-file-self-heal.t b/tests/basic/afr/sparse-file-self-heal.t index 726af0710d5..1bc915e062c 100644 --- a/tests/basic/afr/sparse-file-self-heal.t +++ b/tests/basic/afr/sparse-file-self-heal.t @@ -17,6 +17,7 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0; TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k TEST dd if=/dev/urandom of=$M0/bigger2big count=1 bs=2048k TEST dd if=/dev/urandom of=$M0/big2bigger count=1 bs=1024k +TEST truncate -s 1G $M0/FILE TEST kill_brick $V0 $H0 $B0/${V0}0 @@ -38,6 +39,10 @@ bigger2big_md5sum=$(md5sum $M0/bigger2big | awk '{print $1}') TEST truncate -s 2M $M0/big2bigger big2bigger_md5sum=$(md5sum $M0/big2bigger | awk '{print $1}') +#Write data to file and restore its sparseness +TEST dd if=/dev/urandom of=$M0/FILE count=1 bs=131072 +TEST truncate -s 1G $M0/FILE + $CLI volume start $V0 force EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status @@ -66,6 +71,9 @@ EXPECT "0" has_holes $B0/${V0}0/small EXPECT "0" has_holes $B0/${V0}0/bigger2big EXPECT "1" has_holes $B0/${V0}0/big2bigger +#Check that self-heal has not written 0s to sink and made it non-sparse. +USED_KB=`du -s $B0/${V0}0/FILE|cut -f1` +TEST [ $USED_KB -lt 1000000 ] TEST rm -f $M0/* #check the same tests with diff self-heal @@ -74,6 +82,7 @@ TEST $CLI volume set $V0 data-self-heal-algorithm diff TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k TEST dd if=/dev/urandom of=$M0/big2bigger count=1 bs=1024k TEST dd if=/dev/urandom of=$M0/bigger2big count=1 bs=2048k +TEST truncate -s 1G $M0/FILE TEST kill_brick $V0 $H0 $B0/${V0}0 @@ -95,6 +104,10 @@ bigger2big_md5sum=$(md5sum $M0/bigger2big | awk '{print $1}') TEST truncate -s 2M $M0/big2bigger big2bigger_md5sum=$(md5sum $M0/big2bigger | awk '{print $1}') +#Write data to file and restore its sparseness +TEST dd if=/dev/urandom of=$M0/FILE count=1 bs=131072 +TEST truncate -s 1G $M0/FILE + $CLI volume start $V0 force EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status @@ -118,4 +131,8 @@ EXPECT "1" has_holes $B0/${V0}0/big2bigger EXPECT "0" has_holes $B0/${V0}0/bigger2big EXPECT "0" has_holes $B0/${V0}0/small +#Check that self-heal has not written 0s to sink and made it non-sparse. +USED_KB=`du -s $B0/${V0}0/FILE|cut -f1` +TEST [ $USED_KB -lt 1000000 ] + cleanup diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 45a099cec86..1b1d57d0048 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -112,11 +112,38 @@ __afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this, } +static gf_boolean_t +__afr_is_sink_zero_filled (xlator_t *this, fd_t *fd, size_t size, + off_t offset, int sink) +{ + afr_private_t *priv = NULL; + struct iobref *iobref = NULL; + struct iovec *iovec = NULL; + int count = 0; + int ret = 0; + gf_boolean_t zero_filled = _gf_false; + + priv = this->private; + ret = syncop_readv (priv->children[sink], fd, size, offset, 0, &iovec, + &count, &iobref); + if (ret < 0) + goto out; + ret = iov_0filled (iovec, count); + if (!ret) + zero_filled = _gf_true; +out: + if (iovec) + GF_FREE (iovec); + if (iobref) + iobref_unref (iobref); + return zero_filled; +} + static int __afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd, int source, unsigned char *healed_sinks, off_t offset, size_t size, - struct afr_reply *replies) + struct afr_reply *replies, int type) { struct iovec *iovec = NULL; int count = 0; @@ -166,6 +193,16 @@ __afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd, (iov_0filled (iovec, count) == 0)) continue; + /* Avoid filling up sparse regions of the sink with 0-filled + * writes.*/ + if (type == AFR_SELFHEAL_DATA_FULL && + HAS_HOLES ((&replies[source].poststat)) && + ((offset + size) <= replies[i].poststat.ia_size) && + (iov_0filled (iovec, count) == 0) && + __afr_is_sink_zero_filled (this, fd, size, offset, i)) { + continue; + } + ret = syncop_writev (priv->children[i], fd, iovec, count, offset, iobref, 0); if (ret != iov_length (iovec, count)) { @@ -217,7 +254,7 @@ afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd, ret = __afr_selfheal_data_read_write (frame, this, fd, source, healed_sinks, offset, size, - replies); + replies, type); } unlock: afr_selfheal_uninodelk (frame, this, fd->inode, this->name, |