summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2015-01-23 11:12:54 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2015-01-30 04:02:43 -0800
commit0f84f8e8048367737a2dd6ddf0c57403e757441d (patch)
treeec5a1e4764cefb06d6c4d85367750fb219720121
parente8eff5cf350b24ad628de8092d7927eab1e23c32 (diff)
afr: Don't write to sparse regions of sink.
Problem: When data-self-heal-algorithm is set to 'full', shd just reads from source and writes to sink. If source file happened to be sparse (VM workloads), we end up actually writing 0s to the corresponding regions of the sink causing it to lose its sparseness. Fix: If the source file is sparse, and the data read from source and sink are both zeros for that range, skip writing that range to the sink. Change-Id: I787b06a553803247f43a40c00139cb483a22f9ca BUG: 1166020 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: http://review.gluster.org/9480 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com> Tested-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
-rw-r--r--tests/basic/afr/sparse-file-self-heal.t17
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c41
2 files changed, 56 insertions, 2 deletions
diff --git a/tests/basic/afr/sparse-file-self-heal.t b/tests/basic/afr/sparse-file-self-heal.t
index 726af0710d5..1bc915e062c 100644
--- a/tests/basic/afr/sparse-file-self-heal.t
+++ b/tests/basic/afr/sparse-file-self-heal.t
@@ -17,6 +17,7 @@ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k
TEST dd if=/dev/urandom of=$M0/bigger2big count=1 bs=2048k
TEST dd if=/dev/urandom of=$M0/big2bigger count=1 bs=1024k
+TEST truncate -s 1G $M0/FILE
TEST kill_brick $V0 $H0 $B0/${V0}0
@@ -38,6 +39,10 @@ bigger2big_md5sum=$(md5sum $M0/bigger2big | awk '{print $1}')
TEST truncate -s 2M $M0/big2bigger
big2bigger_md5sum=$(md5sum $M0/big2bigger | awk '{print $1}')
+#Write data to file and restore its sparseness
+TEST dd if=/dev/urandom of=$M0/FILE count=1 bs=131072
+TEST truncate -s 1G $M0/FILE
+
$CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
@@ -66,6 +71,9 @@ EXPECT "0" has_holes $B0/${V0}0/small
EXPECT "0" has_holes $B0/${V0}0/bigger2big
EXPECT "1" has_holes $B0/${V0}0/big2bigger
+#Check that self-heal has not written 0s to sink and made it non-sparse.
+USED_KB=`du -s $B0/${V0}0/FILE|cut -f1`
+TEST [ $USED_KB -lt 1000000 ]
TEST rm -f $M0/*
#check the same tests with diff self-heal
@@ -74,6 +82,7 @@ TEST $CLI volume set $V0 data-self-heal-algorithm diff
TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k
TEST dd if=/dev/urandom of=$M0/big2bigger count=1 bs=1024k
TEST dd if=/dev/urandom of=$M0/bigger2big count=1 bs=2048k
+TEST truncate -s 1G $M0/FILE
TEST kill_brick $V0 $H0 $B0/${V0}0
@@ -95,6 +104,10 @@ bigger2big_md5sum=$(md5sum $M0/bigger2big | awk '{print $1}')
TEST truncate -s 2M $M0/big2bigger
big2bigger_md5sum=$(md5sum $M0/big2bigger | awk '{print $1}')
+#Write data to file and restore its sparseness
+TEST dd if=/dev/urandom of=$M0/FILE count=1 bs=131072
+TEST truncate -s 1G $M0/FILE
+
$CLI volume start $V0 force
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
@@ -118,4 +131,8 @@ EXPECT "1" has_holes $B0/${V0}0/big2bigger
EXPECT "0" has_holes $B0/${V0}0/bigger2big
EXPECT "0" has_holes $B0/${V0}0/small
+#Check that self-heal has not written 0s to sink and made it non-sparse.
+USED_KB=`du -s $B0/${V0}0/FILE|cut -f1`
+TEST [ $USED_KB -lt 1000000 ]
+
cleanup
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 45a099cec86..1b1d57d0048 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -112,11 +112,38 @@ __afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this,
}
+static gf_boolean_t
+__afr_is_sink_zero_filled (xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, int sink)
+{
+ afr_private_t *priv = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec *iovec = NULL;
+ int count = 0;
+ int ret = 0;
+ gf_boolean_t zero_filled = _gf_false;
+
+ priv = this->private;
+ ret = syncop_readv (priv->children[sink], fd, size, offset, 0, &iovec,
+ &count, &iobref);
+ if (ret < 0)
+ goto out;
+ ret = iov_0filled (iovec, count);
+ if (!ret)
+ zero_filled = _gf_true;
+out:
+ if (iovec)
+ GF_FREE (iovec);
+ if (iobref)
+ iobref_unref (iobref);
+ return zero_filled;
+}
+
static int
__afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd,
int source, unsigned char *healed_sinks,
off_t offset, size_t size,
- struct afr_reply *replies)
+ struct afr_reply *replies, int type)
{
struct iovec *iovec = NULL;
int count = 0;
@@ -166,6 +193,16 @@ __afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd,
(iov_0filled (iovec, count) == 0))
continue;
+ /* Avoid filling up sparse regions of the sink with 0-filled
+ * writes.*/
+ if (type == AFR_SELFHEAL_DATA_FULL &&
+ HAS_HOLES ((&replies[source].poststat)) &&
+ ((offset + size) <= replies[i].poststat.ia_size) &&
+ (iov_0filled (iovec, count) == 0) &&
+ __afr_is_sink_zero_filled (this, fd, size, offset, i)) {
+ continue;
+ }
+
ret = syncop_writev (priv->children[i], fd, iovec, count,
offset, iobref, 0);
if (ret != iov_length (iovec, count)) {
@@ -217,7 +254,7 @@ afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd,
ret = __afr_selfheal_data_read_write (frame, this, fd, source,
healed_sinks, offset, size,
- replies);
+ replies, type);
}
unlock:
afr_selfheal_uninodelk (frame, this, fd->inode, this->name,