diff options
author | M. Mohan Kumar <mohan@in.ibm.com> | 2013-11-09 14:51:53 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2013-11-10 21:25:49 -0800 |
commit | c8fef37c5d566c906728b5f6f27baaa9a8d2a20d (patch) | |
tree | 03c833446bc73bfa3da6c621315b590c0d65c748 /rpc | |
parent | d5335f9e40f6e9533f7812d153b9727bcc04aa4e (diff) |
glusterfs: zerofill support
Add support for a new ZEROFILL fop. Zerofill writes zeroes to a file in
the specified range. This fop will be useful when a whole file needs to
be initialized with zero (could be useful for zero filled VM disk image
provisioning or during scrubbing of VM disk images).
Client/application can issue this FOP for zeroing out. Gluster server
will zero out required range of bytes ie server offloaded zeroing. In
the absence of this fop, client/application has to repetitively issue
write (zero) fop to the server, which is very inefficient method because
of the overheads involved in RPC calls and acknowledgements.
WRITESAME is a SCSI T10 command that takes a block of data as input and
writes the same data to other blocks and this write is handled
completely within the storage and hence is known as offload . Linux ,now
has support for SCSI WRITESAME command which is exposed to the user in
the form of BLKZEROOUT ioctl. BD Xlator can exploit BLKZEROOUT ioctl to
implement this fop. Thus zeroing out operations can be completely
offloaded to the storage device , making it highly efficient.
The fop takes two arguments offset and size. It zeroes out 'size' number
of bytes in an opened file starting from 'offset' position.
This patch adds zerofill support to the following areas:
- libglusterfs
- io-stats
- performance/md-cache,open-behind
- quota
- cluster/afr,dht,stripe
- rpc/xdr
- protocol/client,server
- io-threads
- marker
- storage/posix
- libgfapi
Client applications can exloit this fop by using glfs_zerofill introduced in
libgfapi.FUSE support to this fop has not been added as there is no system call
for this fop.
Changes from previous version 3:
* Removed redundant memory failure log messages
Changes from previous version 2:
* Rebased and fixed build error
Changes from previous version 1:
* Rebased for latest master
TODO :
* Add zerofill support to trace xlator
* Expose zerofill capability as part of gluster volume info
Here is a performance comparison of server offloaded zeofill vs zeroing
out using repeated writes.
[root@llmvm02 remote]# time ./offloaded aakash-test log 20
real 3m34.155s
user 0m0.018s
sys 0m0.040s
[root@llmvm02 remote]# time ./manually aakash-test log 20
real 4m23.043s
user 0m2.197s
sys 0m14.457s
[root@llmvm02 remote]# time ./offloaded aakash-test log 25;
real 4m28.363s
user 0m0.021s
sys 0m0.025s
[root@llmvm02 remote]# time ./manually aakash-test log 25
real 5m34.278s
user 0m2.957s
sys 0m18.808s
The argument log is a file which we want to set for logging purpose and
the third argument is size in GB .
As we can see there is a performance improvement of around 20% with this
fop.
Change-Id: I081159f5f7edde0ddb78169fb4c21c776ec91a18
BUG: 1028673
Signed-off-by: Aakash Lal Das <aakash@linux.vnet.ibm.com>
Signed-off-by: M. Mohan Kumar <mohan@in.ibm.com>
Reviewed-on: http://review.gluster.org/5327
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'rpc')
-rw-r--r-- | rpc/rpc-lib/src/protocol-common.h | 1 | ||||
-rw-r--r-- | rpc/xdr/src/glusterfs3-xdr.c | 41 | ||||
-rw-r--r-- | rpc/xdr/src/glusterfs3-xdr.h | 29 | ||||
-rw-r--r-- | rpc/xdr/src/glusterfs3-xdr.x | 17 |
4 files changed, 88 insertions, 0 deletions
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index cbd79bd9dbc..adec7b63849 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -58,6 +58,7 @@ enum gf_fop_procnum { GFS3_OP_FREMOVEXATTR, GFS3_OP_FALLOCATE, GFS3_OP_DISCARD, + GFS3_OP_ZEROFILL, GFS3_OP_MAXVALUE, } ; diff --git a/rpc/xdr/src/glusterfs3-xdr.c b/rpc/xdr/src/glusterfs3-xdr.c index 4e9791b2077..3205c551e5b 100644 --- a/rpc/xdr/src/glusterfs3-xdr.c +++ b/rpc/xdr/src/glusterfs3-xdr.c @@ -1585,6 +1585,47 @@ xdr_gfs3_discard_rsp (XDR *xdrs, gfs3_discard_rsp *objp) } bool_t +xdr_gfs3_zerofill_req (XDR *xdrs, gfs3_zerofill_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->offset)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->size)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, + (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_zerofill_rsp (XDR *xdrs, gfs3_zerofill_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->statpre)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->statpost)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, + (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + + +bool_t xdr_gfs3_rchecksum_req (XDR *xdrs, gfs3_rchecksum_req *objp) { register int32_t *buf; diff --git a/rpc/xdr/src/glusterfs3-xdr.h b/rpc/xdr/src/glusterfs3-xdr.h index 9e5d2e67bcb..13566e69447 100644 --- a/rpc/xdr/src/glusterfs3-xdr.h +++ b/rpc/xdr/src/glusterfs3-xdr.h @@ -936,6 +936,31 @@ struct gfs3_discard_rsp { }; typedef struct gfs3_discard_rsp gfs3_discard_rsp; +struct gfs3_zerofill_req { + char gfid[16]; + quad_t fd; + u_quad_t offset; + u_quad_t size; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_zerofill_req gfs3_zerofill_req; + +struct gfs3_zerofill_rsp { + int op_ret; + int op_errno; + struct gf_iatt statpre; + struct gf_iatt statpost; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_zerofill_rsp gfs3_zerofill_rsp; + + struct gfs3_rchecksum_req { quad_t fd; u_quad_t offset; @@ -1235,6 +1260,8 @@ extern bool_t xdr_gfs3_fallocate_req (XDR *, gfs3_fallocate_req*); extern bool_t xdr_gfs3_fallocate_rsp (XDR *, gfs3_fallocate_rsp*); extern bool_t xdr_gfs3_discard_req (XDR *, gfs3_discard_req*); extern bool_t xdr_gfs3_discard_rsp (XDR *, gfs3_discard_rsp*); +extern bool_t xdr_gfs3_zerofill_req (XDR *, gfs3_zerofill_req*); +extern bool_t xdr_gfs3_zerofill_rsp (XDR *, gfs3_zerofill_rsp*); extern bool_t xdr_gfs3_rchecksum_req (XDR *, gfs3_rchecksum_req*); extern bool_t xdr_gfs3_rchecksum_rsp (XDR *, gfs3_rchecksum_rsp*); extern bool_t xdr_gf_setvolume_req (XDR *, gf_setvolume_req*); @@ -1333,6 +1360,8 @@ extern bool_t xdr_gfs3_fallocate_req (); extern bool_t xdr_gfs3_fallocate_rsp (); extern bool_t xdr_gfs3_discard_req (); extern bool_t xdr_gfs3_discard_rsp (); +extern bool_t xdr_gfs3_zerofill_req (); +extern bool_t xdr_gfs3_zerofill_rsp (); extern bool_t xdr_gfs3_rchecksum_req (); extern bool_t xdr_gfs3_rchecksum_rsp (); extern bool_t xdr_gf_setvolume_req (); diff --git a/rpc/xdr/src/glusterfs3-xdr.x b/rpc/xdr/src/glusterfs3-xdr.x index e2b086b1d1a..1edbda3ada9 100644 --- a/rpc/xdr/src/glusterfs3-xdr.x +++ b/rpc/xdr/src/glusterfs3-xdr.x @@ -599,6 +599,23 @@ struct gfs3_fstat_req { opaque xdata<>; /* Extra data */ } ; + struct gfs3_zerofill_req { + opaque gfid[16]; + hyper fd; + unsigned hyper offset; + unsigned hyper size; + opaque xdata<>; +} ; + + struct gfs3_zerofill_rsp { + int op_ret; + int op_errno; + struct gf_iatt statpre; + struct gf_iatt statpost; + opaque xdata<>; +} ; + + struct gfs3_rchecksum_req { hyper fd; unsigned hyper offset; |