diff options
| -rw-r--r-- | tests/basic/afr/sparse-file-self-heal.t | 21 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 57 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 2 | ||||
| -rw-r--r-- | xlators/storage/posix/src/posix.c | 43 | 
4 files changed, 99 insertions, 24 deletions
diff --git a/tests/basic/afr/sparse-file-self-heal.t b/tests/basic/afr/sparse-file-self-heal.t index 026ada7fe5e..d540ef786d1 100644 --- a/tests/basic/afr/sparse-file-self-heal.t +++ b/tests/basic/afr/sparse-file-self-heal.t @@ -2,6 +2,8 @@  #This file checks if self-heal of files with holes is working properly or not  #bigger is 2M, big is 1M, small is anything less +#Also tests if non-sparse files with zeroes in it are healed correctly w.r.t +#disk usage.  . $(dirname $0)/../../include.rc  . $(dirname $0)/../../volume.rc @@ -43,6 +45,9 @@ big2bigger_md5sum=$(md5sum $M0/big2bigger | awk '{print $1}')  TEST dd if=/dev/urandom of=$M0/FILE count=1 bs=131072  TEST truncate -s 1G $M0/FILE +#Create a non-sparse file containing zeroes. +TEST dd if=/dev/zero of=$M0/zeroedfile bs=1024 count=1024 +  $CLI volume start $V0 force  EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0  EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status @@ -79,6 +84,13 @@ EXPECT "1" has_holes $B0/${V0}0/big2bigger  #Check that self-heal has not written 0s to sink and made it non-sparse.  USED_KB=`du -s $B0/${V0}0/FILE|cut -f1`  TEST [ $USED_KB -lt 1000000 ] + +#Check that the non-sparse file consumes the same disk space in both bricks post +#self-heal +USED_KB1=`du -s $B0/${V0}0/zeroedfile|cut -f1` +USED_KB2=`du -s $B0/${V0}1/zeroedfile|cut -f1` +TEST [ $USED_KB1 -eq $USED_KB2 ] +  TEST rm -f $M0/*  #check the same tests with diff self-heal @@ -113,6 +125,9 @@ big2bigger_md5sum=$(md5sum $M0/big2bigger | awk '{print $1}')  TEST dd if=/dev/urandom of=$M0/FILE count=1 bs=131072  TEST truncate -s 1G $M0/FILE +#Create a non-sparse file containing zeroes. +TEST dd if=/dev/zero of=$M0/zeroedfile bs=1024 count=1024 +  $CLI volume start $V0 force  EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0  EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status @@ -145,4 +160,10 @@ EXPECT "0" has_holes $B0/${V0}0/small  USED_KB=`du -s $B0/${V0}0/FILE|cut -f1`  TEST [ $USED_KB -lt 1000000 ] +#Check that the non-sparse file consumes the same disk space in both bricks post +#self-heal. +USED_KB1=`du -s $B0/${V0}0/zeroedfile|cut -f1` +USED_KB2=`du -s $B0/${V0}1/zeroedfile|cut -f1` +TEST [ $USED_KB1 -eq $USED_KB2 ] +  cleanup diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 36d658fa454..4529462c3a9 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -28,13 +28,18 @@ __checksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  		dict_t *xdata)  {  	afr_local_t *local = NULL; +        struct afr_reply *replies = NULL;  	int i = (long) cookie;  	local = frame->local; - -	local->replies[i].valid = 1; -	local->replies[i].op_ret = op_ret; -	local->replies[i].op_errno = op_errno; +        replies = local->replies; + +	replies[i].valid = 1; +	replies[i].op_ret = op_ret; +	replies[i].op_errno = op_errno; +        if (xdata) +                replies[i].buf_has_zeroes = dict_get_str_boolean (xdata, +                                                   "buf-has-zeroes", _gf_false);  	if (strong)  		memcpy (local->replies[i].checksum, strong, MD5_DIGEST_LENGTH); @@ -70,19 +75,23 @@ attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  static gf_boolean_t -__afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this, -				     fd_t *fd, int source, -				     unsigned char *healed_sinks, -				     off_t offset, size_t size) +__afr_can_skip_data_block_heal (call_frame_t *frame, xlator_t *this, fd_t *fd, +                                int source, unsigned char *healed_sinks, +				off_t offset, size_t size, +                                struct iatt *poststat)  {  	afr_private_t *priv = NULL;  	afr_local_t *local = NULL;  	unsigned char *wind_subvols = NULL; +        gf_boolean_t checksum_match = _gf_true; +        dict_t *xdata = NULL;  	int i = 0;  	priv = this->private;  	local = frame->local; - +        xdata = dict_new(); +        if (xdata) +                i = dict_set_int32 (xdata, "check-zero-filled", 1);  	wind_subvols = alloca0 (priv->child_count);  	for (i = 0; i < priv->child_count; i++) {  		if (i == source || healed_sinks[i]) @@ -90,7 +99,9 @@ __afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this,  	}  	AFR_ONLIST (wind_subvols, frame, __checksum_cbk, rchecksum, fd, -		    offset, size, NULL); +		    offset, size, xdata); +        if (xdata) +                dict_unref (xdata);  	if (!local->replies[source].valid || local->replies[source].op_ret != 0)  		return _gf_false; @@ -101,12 +112,26 @@ __afr_selfheal_data_checksums_match (call_frame_t *frame, xlator_t *this,                  if (local->replies[i].valid) {                          if (memcmp (local->replies[source].checksum,                                      local->replies[i].checksum, -                                    MD5_DIGEST_LENGTH)) -                                return _gf_false; +                                    MD5_DIGEST_LENGTH)) { +                                checksum_match = _gf_false; +                                break; +                        }                  }  	} -	return _gf_true; +        if (checksum_match) { +                if (HAS_HOLES (poststat)) +                        return _gf_true; + +                /* For non-sparse files, we might be better off writing the +                 * zeroes to sinks to avoid mismatch of disk-usage in bricks. */ +                if (local->replies[source].buf_has_zeroes) +                        return _gf_false; +                else +                        return _gf_true; +        } + +        return _gf_false;  } @@ -220,7 +245,6 @@ __afr_selfheal_data_read_write (call_frame_t *frame, xlator_t *this, fd_t *fd,  	return ret;  } -  static int  afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd,  			 int source, unsigned char *healed_sinks, off_t offset, @@ -244,8 +268,9 @@ afr_selfheal_data_block (call_frame_t *frame, xlator_t *this, fd_t *fd,  		}  		if (type == AFR_SELFHEAL_DATA_DIFF && -		    __afr_selfheal_data_checksums_match (frame, this, fd, source, -							 healed_sinks, offset, size)) { +		    __afr_can_skip_data_block_heal (frame, this, fd, source, +					            healed_sinks, offset, size, +                                                   &replies[source].poststat)) {  			ret = 0;  			goto unlock;  		} diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index bd4ff655546..28147e00579 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -265,7 +265,9 @@ struct afr_reply {  	struct iatt preparent;  	struct iatt preparent2;  	struct iatt postparent2; +        /* For rchecksum */  	uint8_t checksum[MD5_DIGEST_LENGTH]; +        gf_boolean_t buf_has_zeroes;  };  typedef enum { diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index bc4e4904b74..b544e3739c6 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -5895,9 +5895,13 @@ posix_rchecksum (call_frame_t *frame, xlator_t *this,          int                     op_ret          = -1;          int                     op_errno        = 0;          int                     ret             = 0; +        ssize_t                 bytes_read      = 0;          int32_t                 weak_checksum   = 0; +        int32_t                 zerofillcheck   = 0;          unsigned char           strong_checksum[MD5_DIGEST_LENGTH] = {0};          struct posix_private    *priv           = NULL; +        dict_t                  *rsp_xdata      = NULL; +        gf_boolean_t            buf_has_zeroes  = _gf_false;          VALIDATE_OR_GOTO (frame, out);          VALIDATE_OR_GOTO (this, out); @@ -5912,6 +5916,12 @@ posix_rchecksum (call_frame_t *frame, xlator_t *this,                  goto out;          } +        rsp_xdata = dict_new(); +        if (!rsp_xdata) { +                op_errno = ENOMEM; +                goto out; +        } +          ret = posix_fd_ctx_get (fd, this, &pfd);          if (ret < 0) {                  gf_msg (this->name, GF_LOG_WARNING, -ret, P_MSG_PFD_NULL, @@ -5927,12 +5937,12 @@ posix_rchecksum (call_frame_t *frame, xlator_t *this,                  if (priv->aio_capable && priv->aio_init_done)                          __posix_fd_set_odirect (fd, pfd, 0, offset, len); -                ret = pread (_fd, buf, len, offset); -                if (ret < 0) { +                bytes_read = pread (_fd, buf, len, offset); +                if (bytes_read < 0) {                          gf_msg (this->name, GF_LOG_WARNING, errno,                                  P_MSG_PREAD_FAILED, -                                "pread of %d bytes returned %d ", -                                len, ret); +                                "pread of %d bytes returned %ld ", +                                len, bytes_read);                          op_errno = errno;                  } @@ -5940,17 +5950,34 @@ posix_rchecksum (call_frame_t *frame, xlator_t *this,          }          UNLOCK (&fd->lock); -        if (ret < 0) +        if (bytes_read < 0)                  goto out; +        if (xdata && dict_get_int32 (xdata, "check-zero-filled", +                                     &zerofillcheck) == 0) { +                buf_has_zeroes = (mem_0filled (buf, bytes_read)) ? _gf_false : +                                 _gf_true; +                ret = dict_set_uint32 (rsp_xdata, "buf-has-zeroes", +                                       buf_has_zeroes); +                if (ret) { +                        gf_msg (this->name, GF_LOG_WARNING, -ret, +                                P_MSG_DICT_SET_FAILED, "%s: Failed to set " +                                "dictionary value for key: %s", +                                uuid_utoa (fd->inode->gfid), "buf-has-zeroes"); +                        op_errno = -ret; +                        goto out; +                } +        }          weak_checksum = gf_rsync_weak_checksum ((unsigned char *) buf, (size_t) ret); -        gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) ret, (unsigned char *) strong_checksum); +        gf_rsync_strong_checksum ((unsigned char *) buf, (size_t) bytes_read, +                                  (unsigned char *) strong_checksum);          op_ret = 0;  out:          STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, -                             weak_checksum, strong_checksum, NULL); - +                             weak_checksum, strong_checksum, rsp_xdata); +        if (rsp_xdata) +                dict_unref (rsp_xdata);          GF_FREE (alloc_buf);          return 0;  | 
