diff options
author | Ravishankar N <ravishankar@redhat.com> | 2015-10-21 21:05:46 +0530 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2015-10-29 01:40:19 -0700 |
commit | 50646435b4076cfb30d7ebabf2d688f91c957cec (patch) | |
tree | 9ee11abc73bdb94492d42b0ed36e0ae11cf37ff2 /xlators/cluster | |
parent | 0ce29bbd6a1cc459d4f4ffc50a4658988ef52039 (diff) |
afr: write zeros to sink for non-sparse files
Backport of http://review.gluster.org/#/c/12371/
Problem: If a file is created with zeroes ('dd', 'fallocate' etc.) when
a brick is down, the self-heal does not write the zeroes to the sink
after it comes up. Consequenty, there is a mismatch in disk-usage
amongst the bricks of the replica.
Fix: If we definitely know that the file is not sparse, then write the
zeroes to the sink even if the checksums match.
Change-Id: Ic739b3da5dbf47d99801c0e1743bb13aeb3af864
BUG: 1275921
Signed-off-by: Ravishankar N <ravishankar@redhat.com>
Reviewed-on: http://review.gluster.org/12436
Tested-by: NetBSD Build System <jenkins@build.gluster.org>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 57 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 2 |
2 files changed, 43 insertions, 16 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 279f2faaaaf..6a3d6e13b1b 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -33,13 +33,18 @@ __checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this, dict_t *xdata) { afr_local_t *local = NULL; + struct afr_reply *replies = NULL; int i = (long) cookie; local = frame->local; - - local->replies[i].valid = 1; - local->replies[i].op_ret = op_ret; - local->replies[i].op_errno = op_errno; + replies = local->replies; + + replies[i].valid = 1; + replies[i].op_ret = op_ret; + replies[i].op_errno = op_errno; + if (xdata) + replies[i].buf_has_zeroes = dict_get_str_boolean (xdata, + "buf-has-zeroes", _gf_false); if (strong) memcpy (local->replies[i].checksum, strong, MD5_DIGEST_LENGTH); @@ -75,19 +80,23 @@ attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, static gf_boolean_t -__afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this, - fd_t *fd, int source, - unsigned char *healed_sinks, - off_t offset, size_t size) +__afr_can_skip_data_block_heal (call_frame_t *frame, xlator_t *this, fd_t *fd, + int source, unsigned char *healed_sinks, + off_t offset, size_t size, + struct iatt *poststat) { afr_private_t *priv = NULL; afr_local_t *local = NULL; unsigned char *wind_subvols = NULL; + gf_boolean_t checksum_match = _gf_true; + dict_t *xdata = NULL; int i = 0; priv = this->private; local = frame->local; - + xdata = dict_new(); + if (xdata) + i = dict_set_int32 (xdata, "check-zero-filled", 1); wind_subvols = alloca0 (priv->child_count); for (i = 0; i < priv->child_count; i++) { if (i == source || healed_sinks[i]) @@ -95,7 +104,9 @@ __afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this, } AFR_ONLIST (wind_subvols, frame, __checksum_cbk, rchecksum, fd, - offset, size, NULL); + offset, size, xdata); + if (xdata) + dict_unref (xdata); if (!local->replies[source].valid || local->replies[source].op_ret != 0) return _gf_false; @@ -106,12 +117,26 @@ __afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this, if (local->replies[i].valid) { if (memcmp (local->replies[source].checksum, local->replies[i].checksum, - MD5_DIGEST_LENGTH)) - return _gf_false; + MD5_DIGEST_LENGTH)) { + checksum_match = _gf_false; + break; + } } } - return _gf_true; + if (checksum_match) { + if (HAS_HOLES (poststat)) + return _gf_true; + + /* For non-sparse files, we might be better off writing the + * zeroes to sinks to avoid mismatch of disk-usage in bricks. */ + if (local->replies[source].buf_has_zeroes) + return _gf_false; + else + return _gf_true; + } + + return _gf_false; } @@ -225,7 +250,6 @@ __afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd, return ret; } - static int afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd, int source, unsigned char *healed_sinks, off_t offset, @@ -249,8 +273,9 @@ afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd, } if (type == AFR_SELFHEAL_DATA_DIFF && - __afr_selfheal_data_checksums_match (frame, this, fd, source, - healed_sinks, offset, size)) { + __afr_can_skip_data_block_heal (frame, this, fd, source, + healed_sinks, offset, size, + &replies[source].poststat)) { ret = 0; goto unlock; } diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 9ee5ae36df0..c2fd1166d96 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -270,7 +270,9 @@ struct afr_reply { struct iatt preparent; struct iatt preparent2; struct iatt postparent2; + /* For rchecksum */ uint8_t checksum[MD5_DIGEST_LENGTH]; + gf_boolean_t buf_has_zeroes; }; typedef enum { |