summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2015-10-21 21:05:46 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2015-10-29 01:40:19 -0700
commit50646435b4076cfb30d7ebabf2d688f91c957cec (patch)
tree9ee11abc73bdb94492d42b0ed36e0ae11cf37ff2 /xlators/cluster
parent0ce29bbd6a1cc459d4f4ffc50a4658988ef52039 (diff)
afr: write zeros to sink for non-sparse files
Backport of http://review.gluster.org/#/c/12371/ Problem: If a file is created with zeroes ('dd', 'fallocate' etc.) when a brick is down, the self-heal does not write the zeroes to the sink after it comes up. Consequenty, there is a mismatch in disk-usage amongst the bricks of the replica. Fix: If we definitely know that the file is not sparse, then write the zeroes to the sink even if the checksums match. Change-Id: Ic739b3da5dbf47d99801c0e1743bb13aeb3af864 BUG: 1275921 Signed-off-by: Ravishankar N <ravishankar@redhat.com> Reviewed-on: http://review.gluster.org/12436 Tested-by: NetBSD Build System <jenkins@build.gluster.org> Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c57
-rw-r--r--xlators/cluster/afr/src/afr.h2
2 files changed, 43 insertions, 16 deletions
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 279f2faaaaf..6a3d6e13b1b 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -33,13 +33,18 @@ __checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
dict_t *xdata)
{
afr_local_t *local = NULL;
+ struct afr_reply *replies = NULL;
int i = (long) cookie;
local = frame->local;
-
- local->replies[i].valid = 1;
- local->replies[i].op_ret = op_ret;
- local->replies[i].op_errno = op_errno;
+ replies = local->replies;
+
+ replies[i].valid = 1;
+ replies[i].op_ret = op_ret;
+ replies[i].op_errno = op_errno;
+ if (xdata)
+ replies[i].buf_has_zeroes = dict_get_str_boolean (xdata,
+ "buf-has-zeroes", _gf_false);
if (strong)
memcpy (local->replies[i].checksum, strong, MD5_DIGEST_LENGTH);
@@ -75,19 +80,23 @@ attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
static gf_boolean_t
-__afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int source,
- unsigned char *healed_sinks,
- off_t offset, size_t size)
+__afr_can_skip_data_block_heal (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks,
+ off_t offset, size_t size,
+ struct iatt *poststat)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
unsigned char *wind_subvols = NULL;
+ gf_boolean_t checksum_match = _gf_true;
+ dict_t *xdata = NULL;
int i = 0;
priv = this->private;
local = frame->local;
-
+ xdata = dict_new();
+ if (xdata)
+ i = dict_set_int32 (xdata, "check-zero-filled", 1);
wind_subvols = alloca0 (priv->child_count);
for (i = 0; i < priv->child_count; i++) {
if (i == source || healed_sinks[i])
@@ -95,7 +104,9 @@ __afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this,
}
AFR_ONLIST (wind_subvols, frame, __checksum_cbk, rchecksum, fd,
- offset, size, NULL);
+ offset, size, xdata);
+ if (xdata)
+ dict_unref (xdata);
if (!local->replies[source].valid || local->replies[source].op_ret != 0)
return _gf_false;
@@ -106,12 +117,26 @@ __afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this,
if (local->replies[i].valid) {
if (memcmp (local->replies[source].checksum,
local->replies[i].checksum,
- MD5_DIGEST_LENGTH))
- return _gf_false;
+ MD5_DIGEST_LENGTH)) {
+ checksum_match = _gf_false;
+ break;
+ }
}
}
- return _gf_true;
+ if (checksum_match) {
+ if (HAS_HOLES (poststat))
+ return _gf_true;
+
+ /* For non-sparse files, we might be better off writing the
+ * zeroes to sinks to avoid mismatch of disk-usage in bricks. */
+ if (local->replies[source].buf_has_zeroes)
+ return _gf_false;
+ else
+ return _gf_true;
+ }
+
+ return _gf_false;
}
@@ -225,7 +250,6 @@ __afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd,
return ret;
}
-
static int
afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd,
int source, unsigned char *healed_sinks, off_t offset,
@@ -249,8 +273,9 @@ afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd,
}
if (type == AFR_SELFHEAL_DATA_DIFF &&
- __afr_selfheal_data_checksums_match (frame, this, fd, source,
- healed_sinks, offset, size)) {
+ __afr_can_skip_data_block_heal (frame, this, fd, source,
+ healed_sinks, offset, size,
+ &replies[source].poststat)) {
ret = 0;
goto unlock;
}
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index 9ee5ae36df0..c2fd1166d96 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -270,7 +270,9 @@ struct afr_reply {
struct iatt preparent;
struct iatt preparent2;
struct iatt postparent2;
+ /* For rchecksum */
uint8_t checksum[MD5_DIGEST_LENGTH];
+ gf_boolean_t buf_has_zeroes;
};
typedef enum {