From d1ccc4e400728d90f2ef7904661f53deb7199123 Mon Sep 17 00:00:00 2001 From: Brian Foster Date: Wed, 8 May 2013 08:54:11 -0400 Subject: gluster: add fallocate fop support Implement support for the fallocate file operation. fallocate allocates blocks for a particular inode such that future writes to the associated region of the file are guaranteed not to fail with ENOSPC. This patch adds fallocate support to the following areas: - libglusterfs - mount/fuse - io-stats - performance/md-cache,open-behind - quota - cluster/afr,dht,stripe - rpc/xdr - protocol/client,server - io-threads - marker - storage/posix - libgfapi BUG: 949242 Change-Id: Ice8e61351f9d6115c5df68768bc844abbf0ce8bd Signed-off-by: Brian Foster Reviewed-on: http://review.gluster.org/4969 Tested-by: Gluster Build System Reviewed-by: Anand Avati --- api/examples/gfapi.py | 14 ++ api/src/glfs-fops.c | 34 +++ api/src/glfs.h | 2 + configure.ac | 13 ++ contrib/fuse-include/fuse_kernel.h | 9 + libglusterfs/src/call-stub.c | 65 ++++++ libglusterfs/src/call-stub.h | 16 ++ libglusterfs/src/compat.h | 6 + libglusterfs/src/defaults.c | 29 +++ libglusterfs/src/defaults.h | 18 ++ libglusterfs/src/globals.c | 1 + libglusterfs/src/glusterfs.h | 1 + libglusterfs/src/syncop.c | 31 +++ libglusterfs/src/syncop.h | 2 + libglusterfs/src/syscall.c | 23 ++ libglusterfs/src/syscall.h | 2 + libglusterfs/src/xlator.c | 1 + libglusterfs/src/xlator.h | 17 ++ rpc/rpc-lib/src/protocol-common.h | 1 + rpc/xdr/src/glusterfs3-xdr.c | 40 ++++ rpc/xdr/src/glusterfs3-xdr.h | 29 +++ rpc/xdr/src/glusterfs3-xdr.x | 17 ++ tests/bugs/bug-949242.t | 54 +++++ tests/fallocate.rc | 19 ++ xlators/cluster/afr/src/afr-common.c | 244 ++++++++++++++++++++++ xlators/cluster/afr/src/afr.c | 1 + xlators/cluster/afr/src/afr.h | 8 + xlators/cluster/dht/src/dht-common.h | 2 + xlators/cluster/dht/src/dht-inode-write.c | 140 +++++++++++++ xlators/cluster/dht/src/dht.c | 1 + xlators/cluster/stripe/src/stripe.c | 168 +++++++++++++++ xlators/debug/io-stats/src/io-stats.c | 26 +++ xlators/features/marker/src/marker.c | 69 ++++++ xlators/features/quota/src/quota.c | 172 +++++++++++++++ xlators/mount/fuse/src/fuse-bridge.c | 47 ++++- xlators/performance/io-threads/src/io-threads.c | 52 +++++ xlators/performance/md-cache/src/md-cache.c | 41 +++- xlators/performance/open-behind/src/open-behind.c | 19 ++ xlators/protocol/client/src/client-rpc-fops.c | 102 +++++++++ xlators/protocol/client/src/client.c | 35 ++++ xlators/protocol/server/src/server-rpc-fops.c | 119 +++++++++++ xlators/storage/posix/src/posix.c | 67 ++++++ 42 files changed, 1755 insertions(+), 2 deletions(-) create mode 100644 tests/bugs/bug-949242.t create mode 100644 tests/fallocate.rc diff --git a/api/examples/gfapi.py b/api/examples/gfapi.py index 7d7a5bab..8dfe2791 100755 --- a/api/examples/gfapi.py +++ b/api/examples/gfapi.py @@ -98,6 +98,9 @@ class File(object): def write (self, data, flags=0): return api.glfs_write(self.fd,data,len(data),flags) + def fallocate (self, mode, offset, len): + return api.glfs_fallocate(self.fd, mode, offset, len) + class Dir(object): def __init__ (self, fd): @@ -349,6 +352,16 @@ if __name__ == "__main__": return False, "wrong listxattr value %s" % repr(xattrs) return True, "listxattr worked" + def test_fallocate (vol, path, data): + mypath = path + ".io" + fd = vol.creat(mypath,os.O_WRONLY|os.O_EXCL,0644) + if not fd: + return False, "creat error" + rc = fd.fallocate(0, 0, 1024) + if rc != 0: + return False, "fallocate error" + return True, "fallocate worked" + test_list = ( test_create_write, test_open_read, @@ -363,6 +376,7 @@ if __name__ == "__main__": test_setxattr, test_getxattr, test_listxattr, + test_fallocate, ) ok_to_fail = ( diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c index 66e7d69f..4bc0bb1b 100644 --- a/api/src/glfs-fops.c +++ b/api/src/glfs-fops.c @@ -2631,6 +2631,40 @@ out: } +int +glfs_fallocate (struct glfs_fd *glfd, int keep_size, off_t offset, size_t len) +{ + int ret = -1; + xlator_t *subvol = NULL; + fd_t *fd = NULL; + + __glfs_entry_fd (glfd); + + subvol = glfs_active_subvol (glfd->fs); + if (!subvol) { + ret = -1; + errno = EIO; + goto out; + } + + fd = glfs_resolve_fd (glfd->fs, subvol, glfd); + if (!fd) { + ret = -1; + errno = EBADFD; + goto out; + } + + ret = syncop_fallocate (subvol, fd, keep_size, offset, len); +out: + if (fd) + fd_unref(fd); + + glfs_subvol_done (glfd->fs, subvol); + + return ret; +} + + int glfs_chdir (struct glfs *fs, const char *path) { diff --git a/api/src/glfs.h b/api/src/glfs.h index f472ca4e..3888a103 100644 --- a/api/src/glfs.h +++ b/api/src/glfs.h @@ -448,6 +448,8 @@ int glfs_lremovexattr (glfs_t *fs, const char *path, const char *name); int glfs_fremovexattr (glfs_fd_t *fd, const char *name); +int glfs_fallocate(glfs_fd_t *fd, int keep_size, off_t offset, size_t len); + char *glfs_getcwd (glfs_t *fs, char *buf, size_t size); int glfs_chdir (glfs_t *fs, const char *path); diff --git a/configure.ac b/configure.ac index 2c54c3be..2a47d17a 100644 --- a/configure.ac +++ b/configure.ac @@ -233,6 +233,8 @@ AC_CHECK_HEADERS([sys/extattr.h]) AC_CHECK_HEADERS([openssl/md5.h]) +AC_CHECK_HEADERS([linux/falloc.h]) + case $host_os in darwin*) if ! test "`/usr/bin/sw_vers | grep ProductVersion: | cut -f 2 | cut -d. -f2`" -ge 5; then @@ -518,6 +520,17 @@ if test "x${have_fdatasync}" = "xyes"; then AC_DEFINE(HAVE_FDATASYNC, 1, [define if fdatasync exists]) fi +AC_CHECK_FUNC([fallocate], [have_fallocate=yes]) +if test "x${have_fallocate}" = "xyes"; then + AC_DEFINE(HAVE_FALLOCATE, 1, [define if fallocate exists]) +fi + +AC_CHECK_FUNC([posix_fallocate], [have_posix_fallocate=yes]) +if test "x${have_posix_fallocate}" = "xyes"; then + AC_DEFINE(HAVE_POSIX_FALLOCATE, 1, [define if posix_fallocate exists]) +fi + + # Check the distribution where you are compiling glusterfs on GF_DISTRIBUTION= diff --git a/contrib/fuse-include/fuse_kernel.h b/contrib/fuse-include/fuse_kernel.h index 0c3a11d5..2f4e26ff 100644 --- a/contrib/fuse-include/fuse_kernel.h +++ b/contrib/fuse-include/fuse_kernel.h @@ -262,6 +262,7 @@ enum fuse_opcode { FUSE_IOCTL = 39, FUSE_POLL = 40, + FUSE_FALLOCATE = 43, FUSE_READDIRPLUS = 44, /* CUSE specific operations */ CUSE_INIT = 4096, @@ -537,6 +538,14 @@ struct fuse_notify_poll_wakeup_out { __u64 kh; }; +struct fuse_fallocate_in { + __u64 fh; + __u64 offset; + __u64 length; + __u32 mode; + __u32 padding; +}; + struct fuse_in_header { __u32 len; __u32 opcode; diff --git a/libglusterfs/src/call-stub.c b/libglusterfs/src/call-stub.c index bd81c4ed..2bb99341 100644 --- a/libglusterfs/src/call-stub.c +++ b/libglusterfs/src/call-stub.c @@ -2130,6 +2130,61 @@ out: return stub; } +call_stub_t * +fop_fallocate_cbk_stub(call_frame_t *frame, fop_fallocate_cbk_t fn, + int32_t op_ret, int32_t op_errno, + struct iatt *statpre, struct iatt *statpost, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + GF_VALIDATE_OR_GOTO ("call-stub", frame, out); + + stub = stub_new (frame, 0, GF_FOP_FALLOCATE); + GF_VALIDATE_OR_GOTO ("call-stub", stub, out); + + stub->fn_cbk.fallocate = fn; + + stub->args_cbk.op_ret = op_ret; + stub->args_cbk.op_errno = op_errno; + + if (statpre) + stub->args_cbk.prestat = *statpre; + if (statpost) + stub->args_cbk.poststat = *statpost; + if (xdata) + stub->args_cbk.xdata = dict_ref (xdata); +out: + return stub; +} + +call_stub_t * +fop_fallocate_stub(call_frame_t *frame, fop_fallocate_t fn, fd_t *fd, + int32_t mode, off_t offset, size_t len, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + GF_VALIDATE_OR_GOTO ("call-stub", frame, out); + GF_VALIDATE_OR_GOTO ("call-stub", fn, out); + + stub = stub_new (frame, 1, GF_FOP_FALLOCATE); + GF_VALIDATE_OR_GOTO ("call-stub", stub, out); + + stub->fn.fallocate = fn; + + if (fd) + stub->args.fd = fd_ref (fd); + + stub->args.flags = mode; + stub->args.offset = offset; + stub->args.size = len; + + if (xdata) + stub->args.xdata = dict_ref (xdata); +out: + return stub; + +} static void call_resume_wind (call_stub_t *stub) @@ -2347,6 +2402,12 @@ call_resume_wind (call_stub_t *stub) stub->args.fd, &stub->args.stat, stub->args.valid, stub->args.xdata); break; + case GF_FOP_FALLOCATE: + stub->fn.fallocate(stub->frame, stub->frame->this, + stub->args.fd, stub->args.flags, + stub->args.offset, stub->args.size, + stub->args.xdata); + break; default: gf_log_callingfn ("call-stub", GF_LOG_ERROR, "Invalid value of FOP (%d)", @@ -2541,6 +2602,10 @@ call_resume_unwind (call_stub_t *stub) STUB_UNWIND (stub, fsetattr, &stub->args_cbk.prestat, &stub->args_cbk.poststat, stub->args_cbk.xdata); break; + case GF_FOP_FALLOCATE: + STUB_UNWIND(stub, fallocate, &stub->args_cbk.prestat, + &stub->args_cbk.poststat, stub->args_cbk.xdata); + break; default: gf_log_callingfn ("call-stub", GF_LOG_ERROR, "Invalid value of FOP (%d)", diff --git a/libglusterfs/src/call-stub.h b/libglusterfs/src/call-stub.h index 33511183..d940fe6f 100644 --- a/libglusterfs/src/call-stub.h +++ b/libglusterfs/src/call-stub.h @@ -69,6 +69,7 @@ typedef struct { fop_fxattrop_t fxattrop; fop_setattr_t setattr; fop_fsetattr_t fsetattr; + fop_fallocate_t fallocate; } fn; union { @@ -113,6 +114,7 @@ typedef struct { fop_fxattrop_cbk_t fxattrop; fop_setattr_cbk_t setattr; fop_fsetattr_cbk_t fsetattr; + fop_fallocate_cbk_t fallocate; } fn_cbk; struct { @@ -713,6 +715,20 @@ fop_fsetattr_cbk_stub (call_frame_t *frame, struct iatt *statpre, struct iatt *statpost, dict_t *xdata); +call_stub_t * +fop_fallocate_stub(call_frame_t *frame, + fop_fallocate_t fn, + fd_t *fd, + int32_t mode, off_t offset, + size_t len, dict_t *xdata); + +call_stub_t * +fop_fallocate_cbk_stub(call_frame_t *frame, + fop_fallocate_cbk_t fn, + int32_t op_ret, int32_t op_errno, + struct iatt *statpre, struct iatt *statpost, + dict_t *xdata); + void call_resume (call_stub_t *stub); void call_stub_destroy (call_stub_t *stub); void call_unwind_error (call_stub_t *stub, int op_ret, int op_errno); diff --git a/libglusterfs/src/compat.h b/libglusterfs/src/compat.h index 3d0cee1a..2bd98254 100644 --- a/libglusterfs/src/compat.h +++ b/libglusterfs/src/compat.h @@ -32,6 +32,12 @@ #include #include #include +#ifdef HAVE_FALLOC_H +#include +#else +#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */ +#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */ +#endif #ifndef HAVE_LLISTXATTR diff --git a/libglusterfs/src/defaults.c b/libglusterfs/src/defaults.c index 5bac845c..32454a1e 100644 --- a/libglusterfs/src/defaults.c +++ b/libglusterfs/src/defaults.c @@ -454,6 +454,15 @@ default_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; } +int32_t +default_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, pre, post, xdata); + return 0; +} + int32_t default_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, char *spec_data) @@ -862,6 +871,17 @@ default_fsetattr_resume (call_frame_t *frame, xlator_t *this, fd_t *fd, return 0; } +int32_t +default_fallocate_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t keep_size, off_t offset, size_t len, dict_t *xdata) +{ + STACK_WIND(frame, default_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset, len, + xdata); + return 0; +} + + /* FOPS */ int32_t @@ -1266,6 +1286,15 @@ default_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, return 0; } +int32_t +default_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t keep_size, off_t offset, size_t len, dict_t *xdata) +{ + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset, + len, xdata); + return 0; +} int32_t default_forget (xlator_t *this, inode_t *inode) diff --git a/libglusterfs/src/defaults.h b/libglusterfs/src/defaults.h index 8a9de789..bc2f6429 100644 --- a/libglusterfs/src/defaults.h +++ b/libglusterfs/src/defaults.h @@ -243,6 +243,12 @@ int32_t default_fsetattr (call_frame_t *frame, struct iatt *stbuf, int32_t valid, dict_t *xdata); +int32_t default_fallocate(call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t keep_size, off_t offset, + size_t len, dict_t *xdata); + /* Resume */ int32_t default_getspec_resume (call_frame_t *frame, xlator_t *this, @@ -453,6 +459,13 @@ int32_t default_fsetattr_resume (call_frame_t *frame, struct iatt *stbuf, int32_t valid, dict_t *xdata); +int32_t default_fallocate_resume(call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t keep_size, off_t offset, + size_t len, dict_t *xdata); + + /* _cbk */ int32_t @@ -663,6 +676,11 @@ default_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *statpre, struct iatt *statpost, dict_t *xdata); +int32_t default_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata); + + int32_t default_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, char *spec_data); diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c index 05ff52c2..ba5075b2 100644 --- a/libglusterfs/src/globals.c +++ b/libglusterfs/src/globals.c @@ -67,6 +67,7 @@ const char *gf_fop_list[GF_FOP_MAXVALUE] = { [GF_FOP_RELEASE] = "RELEASE", [GF_FOP_RELEASEDIR] = "RELEASEDIR", [GF_FOP_FREMOVEXATTR]= "FREMOVEXATTR", + [GF_FOP_FALLOCATE] = "FALLOCATE", }; /* THIS */ diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 8ee55c70..013cdfff 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -197,6 +197,7 @@ typedef enum { GF_FOP_RELEASEDIR, GF_FOP_GETSPEC, GF_FOP_FREMOVEXATTR, + GF_FOP_FALLOCATE, GF_FOP_MAXVALUE, } glusterfs_fop_t; diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c index 8e5db41f..f57e8da5 100644 --- a/libglusterfs/src/syncop.c +++ b/libglusterfs/src/syncop.c @@ -1916,6 +1916,37 @@ syncop_access (xlator_t *subvol, loc_t *loc, int32_t mask) } +int +syncop_fallocate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + struct syncargs *args = NULL; + + args = cookie; + + args->op_ret = op_ret; + args->op_errno = op_errno; + + __wake (args); + + return 0; +} + +int +syncop_fallocate(xlator_t *subvol, fd_t *fd, int32_t keep_size, off_t offset, + size_t len) +{ + struct syncargs args = {0, }; + + SYNCOP (subvol, (&args), syncop_fallocate_cbk, subvol->fops->fallocate, + fd, keep_size, offset, len, NULL); + + errno = args.op_errno; + return args.op_ret; +} + + int syncop_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct gf_flock *flock, diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/syncop.h index 98e88ff3..59e62ad1 100644 --- a/libglusterfs/src/syncop.h +++ b/libglusterfs/src/syncop.h @@ -333,6 +333,8 @@ int syncop_mkdir (xlator_t *subvol, loc_t *loc, mode_t mode, dict_t *dict, int syncop_link (xlator_t *subvol, loc_t *oldloc, loc_t *newloc); int syncop_fsyncdir (xlator_t *subvol, fd_t *fd, int datasync); int syncop_access (xlator_t *subvol, loc_t *loc, int32_t mask); +int syncop_fallocate(xlator_t *subvol, fd_t *fd, int32_t keep_size, off_t offset, + size_t len); int syncop_rename (xlator_t *subvol, loc_t *oldloc, loc_t *newloc); diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c index bb834dbf..e8954cc2 100644 --- a/libglusterfs/src/syscall.c +++ b/libglusterfs/src/syscall.c @@ -458,3 +458,26 @@ sys_access (const char *pathname, int mode) { return access (pathname, mode); } + + +int +sys_fallocate(int fd, int mode, off_t offset, off_t len) +{ +#ifdef HAVE_FALLOCATE + return fallocate(fd, mode, offset, len); +#endif + +#ifdef HAVE_POSIX_FALLOCATE + if (mode) { + /* keep size not supported */ + errno = EOPNOTSUPP; + return -1; + } + + return posix_fallocate(fd, offset, len); +#endif + + errno = ENOSYS; + return -1; +} + diff --git a/libglusterfs/src/syscall.h b/libglusterfs/src/syscall.h index d5c6ce5f..f1c9f58c 100644 --- a/libglusterfs/src/syscall.h +++ b/libglusterfs/src/syscall.h @@ -139,4 +139,6 @@ sys_access (const char *pathname, int mode); int sys_ftruncate (int fd, off_t length); +int sys_fallocate(int fd, int mode, off_t offset, off_t len); + #endif /* __SYSCALL_H__ */ diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c index f9e5db67..1926240e 100644 --- a/libglusterfs/src/xlator.c +++ b/libglusterfs/src/xlator.c @@ -79,6 +79,7 @@ fill_defaults (xlator_t *xl) SET_DEFAULT_FOP (fxattrop); SET_DEFAULT_FOP (setattr); SET_DEFAULT_FOP (fsetattr); + SET_DEFAULT_FOP (fallocate); SET_DEFAULT_FOP (getspec); diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index 43fd0638..ace73f2e 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -417,6 +417,14 @@ typedef int32_t (*fop_fsetattr_cbk_t) (call_frame_t *frame, struct iatt *preop_stbuf, struct iatt *postop_stbuf, dict_t *xdata); +typedef int32_t (*fop_fallocate_cbk_t) (call_frame_t *frame, + void *cookie, + xlator_t *this, + int32_t op_ret, + int32_t op_errno, + struct iatt *preop_stbuf, + struct iatt *postop_stbuf, dict_t *xdata); + typedef int32_t (*fop_lookup_t) (call_frame_t *frame, xlator_t *this, loc_t *loc, @@ -634,6 +642,13 @@ typedef int32_t (*fop_fsetattr_t) (call_frame_t *frame, struct iatt *stbuf, int32_t valid, dict_t *xdata); +typedef int32_t (*fop_fallocate_t) (call_frame_t *frame, + xlator_t *this, + fd_t *fd, + int32_t keep_size, + off_t offset, + size_t len, + dict_t *xdata); struct xlator_fops { fop_lookup_t lookup; @@ -678,6 +693,7 @@ struct xlator_fops { fop_setattr_t setattr; fop_fsetattr_t fsetattr; fop_getspec_t getspec; + fop_fallocate_t fallocate; /* these entries are used for a typechecking hack in STACK_WIND _only_ */ fop_lookup_cbk_t lookup_cbk; @@ -722,6 +738,7 @@ struct xlator_fops { fop_setattr_cbk_t setattr_cbk; fop_fsetattr_cbk_t fsetattr_cbk; fop_getspec_cbk_t getspec_cbk; + fop_fallocate_cbk_t fallocate_cbk; }; typedef int32_t (*cbk_forget_t) (xlator_t *this, diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index c645351e..214e8400 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -53,6 +53,7 @@ enum gf_fop_procnum { GFS3_OP_SETATTR, GFS3_OP_FSETATTR, GFS3_OP_READDIRP, + GFS3_OP_FALLOCATE, GFS3_OP_RELEASE, GFS3_OP_RELEASEDIR, GFS3_OP_FREMOVEXATTR, diff --git a/rpc/xdr/src/glusterfs3-xdr.c b/rpc/xdr/src/glusterfs3-xdr.c index a502b2ea..2ce0b84e 100644 --- a/rpc/xdr/src/glusterfs3-xdr.c +++ b/rpc/xdr/src/glusterfs3-xdr.c @@ -1506,6 +1506,46 @@ xdr_gfs3_fsetattr_rsp (XDR *xdrs, gfs3_fsetattr_rsp *objp) return TRUE; } +bool_t +xdr_gfs3_fallocate_req (XDR *xdrs, gfs3_fallocate_req *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_opaque (xdrs, objp->gfid, 16)) + return FALSE; + if (!xdr_quad_t (xdrs, &objp->fd)) + return FALSE; + if (!xdr_u_int (xdrs, &objp->flags)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->offset)) + return FALSE; + if (!xdr_u_quad_t (xdrs, &objp->size)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + +bool_t +xdr_gfs3_fallocate_rsp (XDR *xdrs, gfs3_fallocate_rsp *objp) +{ + register int32_t *buf; + buf = NULL; + + if (!xdr_int (xdrs, &objp->op_ret)) + return FALSE; + if (!xdr_int (xdrs, &objp->op_errno)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->statpre)) + return FALSE; + if (!xdr_gf_iatt (xdrs, &objp->statpost)) + return FALSE; + if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0)) + return FALSE; + return TRUE; +} + bool_t xdr_gfs3_rchecksum_req (XDR *xdrs, gfs3_rchecksum_req *objp) { diff --git a/rpc/xdr/src/glusterfs3-xdr.h b/rpc/xdr/src/glusterfs3-xdr.h index 0268d1a1..2f9ba4b4 100644 --- a/rpc/xdr/src/glusterfs3-xdr.h +++ b/rpc/xdr/src/glusterfs3-xdr.h @@ -887,6 +887,31 @@ struct gfs3_fsetattr_rsp { }; typedef struct gfs3_fsetattr_rsp gfs3_fsetattr_rsp; +struct gfs3_fallocate_req { + char gfid[16]; + quad_t fd; + u_int flags; + u_quad_t offset; + u_quad_t size; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fallocate_req gfs3_fallocate_req; + +struct gfs3_fallocate_rsp { + int op_ret; + int op_errno; + struct gf_iatt statpre; + struct gf_iatt statpost; + struct { + u_int xdata_len; + char *xdata_val; + } xdata; +}; +typedef struct gfs3_fallocate_rsp gfs3_fallocate_rsp; + struct gfs3_rchecksum_req { quad_t fd; u_quad_t offset; @@ -1182,6 +1207,8 @@ extern bool_t xdr_gfs3_setattr_req (XDR *, gfs3_setattr_req*); extern bool_t xdr_gfs3_setattr_rsp (XDR *, gfs3_setattr_rsp*); extern bool_t xdr_gfs3_fsetattr_req (XDR *, gfs3_fsetattr_req*); extern bool_t xdr_gfs3_fsetattr_rsp (XDR *, gfs3_fsetattr_rsp*); +extern bool_t xdr_gfs3_fallocate_req (XDR *, gfs3_fallocate_req*); +extern bool_t xdr_gfs3_fallocate_rsp (XDR *, gfs3_fallocate_rsp*); extern bool_t xdr_gfs3_rchecksum_req (XDR *, gfs3_rchecksum_req*); extern bool_t xdr_gfs3_rchecksum_rsp (XDR *, gfs3_rchecksum_rsp*); extern bool_t xdr_gf_setvolume_req (XDR *, gf_setvolume_req*); @@ -1276,6 +1303,8 @@ extern bool_t xdr_gfs3_setattr_req (); extern bool_t xdr_gfs3_setattr_rsp (); extern bool_t xdr_gfs3_fsetattr_req (); extern bool_t xdr_gfs3_fsetattr_rsp (); +extern bool_t xdr_gfs3_fallocate_req (); +extern bool_t xdr_gfs3_fallocate_rsp (); extern bool_t xdr_gfs3_rchecksum_req (); extern bool_t xdr_gfs3_rchecksum_rsp (); extern bool_t xdr_gf_setvolume_req (); diff --git a/rpc/xdr/src/glusterfs3-xdr.x b/rpc/xdr/src/glusterfs3-xdr.x index 063f302d..7fad5833 100644 --- a/rpc/xdr/src/glusterfs3-xdr.x +++ b/rpc/xdr/src/glusterfs3-xdr.x @@ -566,6 +566,23 @@ struct gfs3_fstat_req { opaque xdata<>; /* Extra data */ } ; + struct gfs3_fallocate_req { + opaque gfid[16]; + hyper fd; + unsigned int flags; + unsigned hyper offset; + unsigned hyper size; + opaque xdata<>; /* Extra data */ +} ; + + struct gfs3_fallocate_rsp { + int op_ret; + int op_errno; + struct gf_iatt statpre; + struct gf_iatt statpost; + opaque xdata<>; /* Extra data */ +} ; + struct gfs3_rchecksum_req { hyper fd; unsigned hyper offset; diff --git a/tests/bugs/bug-949242.t b/tests/bugs/bug-949242.t new file mode 100644 index 00000000..71708467 --- /dev/null +++ b/tests/bugs/bug-949242.t @@ -0,0 +1,54 @@ +#!/bin/bash +# +# Bug 949242 - Test basic fallocate functionality. +# +# Run several commands to verify basic fallocate functionality. We verify that +# fallocate creates and allocates blocks to a file. We also verify that the keep +# size option does not modify the file size. +### + +. $(dirname $0)/../include.rc +. $(dirname $0)/../fallocate.rc + +cleanup; + +TEST glusterd + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4} +TEST $CLI volume start $V0 + +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 + +# check for fallocate support before continuing the test +require_fallocate -l 1m -n $M0/file && rm -f $M0/file + +# fallocate a file and verify blocks are allocated +TEST fallocate -l 1m $M0/file +blksz=`stat --printf=%b $M0/file` +nblks=`stat --printf=%B $M0/file` +TEST [ $(($blksz * $nblks)) -eq 1048576 ] + +TEST unlink $M0/file + +# truncate a file to a fixed size, fallocate and verify that the size does not +# change +TEST truncate --size=1m $M0/file +TEST fallocate -l 2m -n $M0/file +blksz=`stat --printf=%b $M0/file` +nblks=`stat --printf=%B $M0/file` +sz=`stat --printf=%s $M0/file` +TEST [ $sz -eq 1048576 ] +# Note that gluster currently incorporates a hack to limit the number of blocks +# reported as allocated to the file by the file size. We have allocated beyond the +# file size here. Just check for non-zero allocation to avoid setting a land mine +# for if/when that behavior might change. +TEST [ ! $(($blksz * $nblks)) -eq 0 ] + +TEST unlink $M0/file + +TEST umount $M0 + +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0 + +cleanup; diff --git a/tests/fallocate.rc b/tests/fallocate.rc new file mode 100644 index 00000000..3756bb94 --- /dev/null +++ b/tests/fallocate.rc @@ -0,0 +1,19 @@ +#!/bin/bash + +# Helper to verify a given fallocate command is supported and skip a test +# otherwise. Older versions of the fallocate utility might not support all modes +# (i.e., discard) and older versions of fuse might not support the associated +# fallocate requests. + +function require_fallocate() +{ + output=`fallocate $* 2>&1` + ret=$? + if [ ! $ret -eq 0 ] && ([[ $output == *unsupported* ]] || + [[ $output == *invalid* ]] || + [[ $output == *"not supported"* ]]) + then + SKIP_TESTS + exit + fi +} diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index a9acb409..830ecd99 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -2821,6 +2821,250 @@ out: /* }}} */ +/* {{{ fallocate */ + +static int +afr_fallocate_unwind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t * local = NULL; + call_frame_t *main_frame = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + if (local->transaction.main_frame) + main_frame = local->transaction.main_frame; + local->transaction.main_frame = NULL; + } + UNLOCK (&frame->lock); + + if (main_frame) { + AFR_STACK_UNWIND (fallocate, main_frame, local->op_ret, + local->op_errno, + &local->cont.fallocate.prebuf, + &local->cont.fallocate.postbuf, + NULL); + } + return 0; +} + +static int +afr_fallocate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + afr_local_t * local = NULL; + afr_private_t * priv = NULL; + int child_index = (long) cookie; + int call_count = -1; + int need_unwind = 0; + int read_child = 0; + + local = frame->local; + priv = this->private; + + read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL); + + LOCK (&frame->lock); + { + if (child_index == read_child) { + local->read_child_returned = _gf_true; + } + + if (afr_fop_failed (op_ret, op_errno)) + afr_transaction_fop_failed (frame, this, child_index); + + if (op_ret != -1) { + if (local->success_count == 0) { + local->op_ret = op_ret; + local->cont.fallocate.prebuf = *prebuf; + local->cont.fallocate.postbuf = *postbuf; + } + + if (child_index == read_child) { + local->cont.fallocate.prebuf = *prebuf; + local->cont.fallocate.postbuf = *postbuf; + } + + local->success_count++; + + if ((local->success_count >= priv->wait_count) + && local->read_child_returned) { + need_unwind = 1; + } + } + local->op_errno = op_errno; + } + UNLOCK (&frame->lock); + + if (need_unwind) + local->transaction.unwind (frame, this); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + local->transaction.resume (frame, this); + } + + return 0; +} + +static int +afr_fallocate_wind (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int call_count = -1; + int i = 0; + + local = frame->local; + priv = this->private; + + call_count = afr_pre_op_done_children_count (local->transaction.pre_op, + priv->child_count); + + if (call_count == 0) { + local->transaction.resume (frame, this); + return 0; + } + + local->call_count = call_count; + + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i]) { + STACK_WIND_COOKIE (frame, afr_fallocate_wind_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->fallocate, + local->fd, + local->cont.fallocate.mode, + local->cont.fallocate.offset, + local->cont.fallocate.len, + NULL); + + if (!--call_count) + break; + } + } + + return 0; +} + +static int +afr_fallocate_done (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; + + local = frame->local; + + local->transaction.unwind (frame, this); + + AFR_STACK_DESTROY (frame); + + return 0; +} + +static int +afr_do_fallocate(call_frame_t *frame, xlator_t *this) +{ + call_frame_t * transaction_frame = NULL; + afr_local_t * local = NULL; + int op_ret = -1; + int op_errno = 0; + + local = frame->local; + + transaction_frame = copy_frame (frame); + if (!transaction_frame) { + goto out; + } + + transaction_frame->local = local; + frame->local = NULL; + + local->op = GF_FOP_FALLOCATE; + + local->transaction.fop = afr_fallocate_wind; + local->transaction.done = afr_fallocate_done; + local->transaction.unwind = afr_fallocate_unwind; + + local->transaction.main_frame = frame; + + local->transaction.start = local->cont.fallocate.offset; + local->transaction.len = 0; + + /* fallocate can modify the file size */ + op_ret = afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION); + if (op_ret < 0) { + op_errno = -op_ret; + goto out; + } + + op_ret = 0; +out: + if (op_ret < 0) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (fallocate, frame, op_ret, op_errno, NULL, + NULL, NULL); + } + + return 0; +} + +int +afr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + afr_private_t * priv = NULL; + afr_local_t * local = NULL; + call_frame_t *transaction_frame = NULL; + int ret = -1; + int op_errno = 0; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (this->private, out); + + priv = this->private; + + if (afr_is_split_brain (this, fd->inode)) { + op_errno = EIO; + goto out; + } + QUORUM_CHECK(fallocate,out); + + AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out); + local = frame->local; + + ret = afr_local_init (local, priv, &op_errno); + if (ret < 0) + goto out; + + local->cont.fallocate.mode = mode; + local->cont.fallocate.offset = offset; + local->cont.fallocate.len = len; + + local->fd = fd_ref (fd); + + afr_open_fd_fix (fd, this); + + afr_do_fallocate (frame, this); + + ret = 0; +out: + if (ret < 0) { + if (transaction_frame) + AFR_STACK_DESTROY (transaction_frame); + AFR_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); + } + + return 0; +} + +/* }}} */ + /* {{{ fsync */ int32_t diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index bee10fd0..4d6c7148 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -483,6 +483,7 @@ struct xlator_fops fops = { .finodelk = afr_finodelk, .entrylk = afr_entrylk, .fentrylk = afr_fentrylk, + .fallocate = afr_fallocate, /* inode read */ .access = afr_access, diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 387ed12e..185f4f71 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -652,6 +652,14 @@ typedef struct _afr_local { dict_t *params; char *linkpath; } symlink; + + struct { + int32_t mode; + off_t offset; + size_t len; + struct iatt prebuf; + struct iatt postbuf; + } fallocate; } cont; struct { diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index 9de86136..de35e843 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -691,6 +691,8 @@ int32_t dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata); int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata); +int32_t dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t mode, off_t offset, size_t len, dict_t *xdata); int32_t dht_init (xlator_t *this); void dht_fini (xlator_t *this); diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index b87d2f73..56ed36ef 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -19,6 +19,7 @@ int dht_writev2 (xlator_t *this, call_frame_t *frame, int ret); int dht_truncate2 (xlator_t *this, call_frame_t *frame, int ret); int dht_setattr2 (xlator_t *this, call_frame_t *frame, int ret); +int dht_fallocate2(xlator_t *this, call_frame_t *frame, int op_ret); int dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -348,6 +349,145 @@ err: return 0; } + +int +dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + int ret = -1; + + GF_VALIDATE_OR_GOTO ("dht", frame, err); + GF_VALIDATE_OR_GOTO ("dht", this, out); + GF_VALIDATE_OR_GOTO ("dht", frame->local, out); + GF_VALIDATE_OR_GOTO ("dht", cookie, out); + + local = frame->local; + prev = cookie; + + if ((op_ret == -1) && (op_errno != ENOENT)) { + local->op_errno = op_errno; + local->op_ret = -1; + gf_log (this->name, GF_LOG_DEBUG, + "subvolume %s returned -1 (%s)", + prev->this->name, strerror (op_errno)); + + goto out; + } + + if (local->call_cnt != 1) { + if (local->stbuf.ia_blocks) { + dht_iatt_merge (this, postbuf, &local->stbuf, NULL); + dht_iatt_merge (this, prebuf, &local->prebuf, NULL); + } + goto out; + } + local->rebalance.target_op_fn = dht_fallocate2; + + /* Phase 2 of migration */ + if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) { + ret = dht_rebalance_complete_check (this, frame); + if (!ret) + return 0; + } + + /* Check if the rebalance phase1 is true */ + if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { + dht_iatt_merge (this, &local->stbuf, postbuf, NULL); + dht_iatt_merge (this, &local->prebuf, prebuf, NULL); + ret = fd_ctx_get (local->fd, this, NULL); + if (!ret) { + dht_fallocate2 (this, frame, 0); + return 0; + } + ret = dht_rebalance_in_progress_check (this, frame); + if (!ret) + return 0; + } + +out: + DHT_STRIP_PHASE1_FLAGS (postbuf); + DHT_STRIP_PHASE1_FLAGS (prebuf); + DHT_STACK_UNWIND (fallocate, frame, op_ret, op_errno, + prebuf, postbuf, xdata); +err: + return 0; +} + +int +dht_fallocate2(xlator_t *this, call_frame_t *frame, int op_ret) +{ + dht_local_t *local = NULL; + xlator_t *subvol = NULL; + uint64_t tmp_subvol = 0; + int ret = -1; + + local = frame->local; + + if (local->fd) + ret = fd_ctx_get (local->fd, this, &tmp_subvol); + if (!ret) + subvol = (xlator_t *)(long)tmp_subvol; + + if (!subvol) + subvol = local->cached_subvol; + + local->call_cnt = 2; /* This is the second attempt */ + + STACK_WIND(frame, dht_fallocate_cbk, subvol, subvol->fops->fallocate, + local->fd, local->rebalance.flags, local->rebalance.offset, + local->rebalance.size, NULL); + + return 0; +} + +int +dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + xlator_t *subvol = NULL; + int op_errno = -1; + dht_local_t *local = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + local = dht_local_init (frame, NULL, fd, GF_FOP_FALLOCATE); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + local->rebalance.flags = mode; + local->rebalance.offset = offset; + local->rebalance.size = len; + + local->call_cnt = 1; + subvol = local->cached_subvol; + if (!subvol) { + gf_log (this->name, GF_LOG_DEBUG, + "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + STACK_WIND (frame, dht_fallocate_cbk, + subvol, subvol->fops->fallocate, + fd, mode, offset, len, xdata); + + return 0; + +err: + op_errno = (op_errno == -1) ? errno : op_errno; + DHT_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); + + return 0; +} + + /* handle cases of migration here for 'setattr()' calls */ int dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index 814f0a8e..d3031e20 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -70,6 +70,7 @@ struct xlator_fops fops = { .fxattrop = dht_fxattrop, .setattr = dht_setattr, .fsetattr = dht_fsetattr, + .fallocate = dht_fallocate, }; struct xlator_dumpops dumpops = { diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c index dadd3fec..3c56464b 100644 --- a/xlators/cluster/stripe/src/stripe.c +++ b/xlators/cluster/stripe/src/stripe.c @@ -3773,6 +3773,173 @@ err: } +int32_t +stripe_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + int32_t callcnt = 0; + stripe_local_t *local = NULL; + stripe_local_t *mlocal = NULL; + call_frame_t *prev = NULL; + call_frame_t *mframe = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + mframe = local->orig_frame; + mlocal = mframe->local; + + LOCK(&frame->lock); + { + callcnt = ++mlocal->call_count; + + if (op_ret == 0) { + mlocal->post_buf = *postbuf; + mlocal->pre_buf = *prebuf; + + mlocal->prebuf_blocks += prebuf->ia_blocks; + mlocal->postbuf_blocks += postbuf->ia_blocks; + + correct_file_size(prebuf, mlocal->fctx, prev); + correct_file_size(postbuf, mlocal->fctx, prev); + + if (mlocal->prebuf_size < prebuf->ia_size) + mlocal->prebuf_size = prebuf->ia_size; + if (mlocal->postbuf_size < postbuf->ia_size) + mlocal->postbuf_size = postbuf->ia_size; + } + + /* return the first failure */ + if (mlocal->op_ret == 0) { + mlocal->op_ret = op_ret; + mlocal->op_errno = op_errno; + } + } + UNLOCK (&frame->lock); + + if ((callcnt == mlocal->wind_count) && mlocal->unwind) { + mlocal->pre_buf.ia_size = mlocal->prebuf_size; + mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; + mlocal->post_buf.ia_size = mlocal->postbuf_size; + mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; + + STRIPE_STACK_UNWIND (fallocate, mframe, mlocal->op_ret, + mlocal->op_errno, &mlocal->pre_buf, + &mlocal->post_buf, NULL); + } +out: + STRIPE_STACK_DESTROY(frame); + return 0; +} + +int32_t +stripe_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + stripe_local_t *local = NULL; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = 1; + int32_t idx = 0; + int32_t offset_offset = 0; + int32_t remaining_size = 0; + off_t fill_size = 0; + uint64_t stripe_size = 0; + uint64_t tmp_fctx = 0; + off_t dest_offset = 0; + call_frame_t *fframe = NULL; + stripe_local_t *flocal = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + inode_ctx_get (fd->inode, this, &tmp_fctx); + if (!tmp_fctx) { + op_errno = EINVAL; + goto err; + } + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + stripe_size = fctx->stripe_size; + + STRIPE_VALIDATE_FCTX (fctx, err); + + remaining_size = len; + + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + frame->local = local; + local->stripe_size = stripe_size; + local->fctx = fctx; + + if (!stripe_size) { + gf_log (this->name, GF_LOG_DEBUG, + "Wrong stripe size for the file"); + op_errno = EINVAL; + goto err; + } + + while (1) { + fframe = copy_frame(frame); + flocal = mem_get0(this->local_pool); + if (!flocal) { + op_errno = ENOMEM; + goto err; + } + flocal->orig_frame = frame; + fframe->local = flocal; + + /* send fallocate request to the associated child node */ + idx = (((offset + offset_offset) / + local->stripe_size) % fctx->stripe_count); + + fill_size = (local->stripe_size - + ((offset + offset_offset) % local->stripe_size)); + if (fill_size > remaining_size) + fill_size = remaining_size; + + remaining_size -= fill_size; + + local->wind_count++; + if (remaining_size == 0) + local->unwind = 1; + + dest_offset = offset + offset_offset; + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(dest_offset, + local->stripe_size, fctx->stripe_count); + + /* + * TODO: Create a separate handler for coalesce mode that sends a + * single fallocate per-child (since the ranges are linear). + */ + STACK_WIND(fframe, stripe_fallocate_cbk, fctx->xl_array[idx], + fctx->xl_array[idx]->fops->fallocate, fd, mode, + dest_offset, fill_size, xdata); + + offset_offset += fill_size; + if (remaining_size == 0) + break; + } + + return 0; +err: + if (fframe) + STRIPE_STACK_DESTROY(fframe); + + STRIPE_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + + int32_t stripe_release (xlator_t *this, fd_t *fd) { @@ -5221,6 +5388,7 @@ struct xlator_fops fops = { .removexattr = stripe_removexattr, .fremovexattr = stripe_fremovexattr, .readdirp = stripe_readdirp, + .fallocate = stripe_fallocate, }; struct xlator_cbks cbks = { diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index 63bb8fa9..5bb0e9d6 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -1723,6 +1723,18 @@ io_stats_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } +int +io_stats_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + UPDATE_PROFILE_STATS(frame, FALLOCATE); + STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; +} + + int io_stats_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata) @@ -2391,6 +2403,19 @@ io_stats_fstat (call_frame_t *frame, xlator_t *this, } +int +io_stats_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + START_FOP_LATENCY(frame); + + STACK_WIND(frame, io_stats_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, + xdata); + + return 0; +} + int io_stats_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) @@ -2817,6 +2842,7 @@ struct xlator_fops fops = { .fxattrop = io_stats_fxattrop, .setattr = io_stats_setattr, .fsetattr = io_stats_fsetattr, + .fallocate = io_stats_fallocate, }; struct xlator_cbks cbks = { diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c index 82d9066d..2dc03f93 100644 --- a/xlators/features/marker/src/marker.c +++ b/xlators/features/marker/src/marker.c @@ -1808,6 +1808,74 @@ err: } +int32_t +marker_fallocate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_TRACE, "%s occurred while " + "fallocating a file ", strerror (op_errno)); + } + + local = (marker_local_t *) frame->local; + + frame->local = NULL; + + STACK_UNWIND_STRICT (fallocate, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + + if (op_ret == -1 || local == NULL) + goto out; + + priv = this->private; + + if (priv->feature_enabled & GF_QUOTA) + mq_initiate_quota_txn (this, &local->loc); + + if (priv->feature_enabled & GF_XTIME) + marker_xtime_update_marks (this, local); +out: + marker_local_unref (local); + + return 0; +} + +int32_t +marker_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + int32_t ret = 0; + marker_local_t *local = NULL; + marker_conf_t *priv = NULL; + + priv = this->private; + + if (priv->feature_enabled == 0) + goto wind; + + local = mem_get0 (this->local_pool); + + MARKER_INIT_LOCAL (frame, local); + + ret = marker_inode_loc_fill (fd->inode, &local->loc); + + if (ret == -1) + goto err; +wind: + STACK_WIND (frame, marker_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, + xdata); + return 0; +err: + STACK_UNWIND_STRICT (fallocate, frame, -1, ENOMEM, NULL, NULL, NULL); + + return 0; +} + /* when a call from the special client is received on * key trusted.glusterfs.volume-mark with value "RESET" * or if the value is 0length, update the change the @@ -2617,6 +2685,7 @@ struct xlator_fops fops = { .removexattr = marker_removexattr, .getxattr = marker_getxattr, .readdirp = marker_readdirp, + .fallocate = marker_fallocate, }; struct xlator_cbks cbks = { diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c index 4ea54cca..c527e7ca 100644 --- a/xlators/features/quota/src/quota.c +++ b/xlators/features/quota/src/quota.c @@ -2969,6 +2969,177 @@ err: return 0; } +int32_t +quota_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + int32_t ret = 0; + uint64_t ctx_int = 0; + quota_inode_ctx_t *ctx = NULL; + quota_local_t *local = NULL; + quota_dentry_t *dentry = NULL; + int64_t delta = 0; + + local = frame->local; + + if ((op_ret < 0) || (local == NULL)) { + goto out; + } + + ret = inode_ctx_get (local->loc.inode, this, &ctx_int); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to get the context", local->loc.path); + goto out; + } + + ctx = (quota_inode_ctx_t *)(unsigned long) ctx_int; + + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in %s (gfid:%s)", + local->loc.path, uuid_utoa (local->loc.inode->gfid)); + goto out; + } + + LOCK (&ctx->lock); + { + ctx->buf = *postbuf; + } + UNLOCK (&ctx->lock); + + list_for_each_entry (dentry, &ctx->parents, next) { + delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512; + quota_update_size (this, local->loc.inode, + dentry->name, dentry->par, delta); + } + +out: + QUOTA_STACK_UNWIND (fallocate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + +int32_t +quota_fallocate_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t mode, off_t offset, size_t len, dict_t *xdata) +{ + quota_local_t *local = NULL; + int32_t op_errno = EINVAL; + + local = frame->local; + if (local == NULL) { + gf_log (this->name, GF_LOG_WARNING, "local is NULL"); + goto unwind; + } + + if (local->op_ret == -1) { + op_errno = local->op_errno; + goto unwind; + } + + STACK_WIND (frame, quota_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, + xdata); + return 0; + +unwind: + QUOTA_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + +int32_t +quota_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + int32_t ret = -1, op_errno = EINVAL; + int32_t parents = 0; + quota_local_t *local = NULL; + quota_inode_ctx_t *ctx = NULL; + quota_priv_t *priv = NULL; + call_stub_t *stub = NULL; + quota_dentry_t *dentry = NULL; + + GF_ASSERT (frame); + GF_VALIDATE_OR_GOTO ("quota", this, unwind); + GF_VALIDATE_OR_GOTO (this->name, fd, unwind); + + local = quota_local_new (); + if (local == NULL) { + goto unwind; + } + + frame->local = local; + local->loc.inode = inode_ref (fd->inode); + + ret = quota_inode_ctx_get (fd->inode, -1, this, NULL, NULL, &ctx, 0); + if (ctx == NULL) { + gf_log (this->name, GF_LOG_WARNING, + "quota context not set in inode (gfid:%s)", + uuid_utoa (fd->inode->gfid)); + goto unwind; + } + + stub = fop_fallocate_stub(frame, quota_fallocate_helper, fd, mode, offset, len, + xdata); + if (stub == NULL) { + op_errno = ENOMEM; + goto unwind; + } + + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, unwind); + + LOCK (&ctx->lock); + { + list_for_each_entry (dentry, &ctx->parents, next) { + parents++; + } + } + UNLOCK (&ctx->lock); + + /* + * Note that by using len as the delta we're assuming the range from + * offset to offset+len has not already been allocated. This can result + * in ENOSPC errors attempting to allocate an already allocated range. + */ + local->delta = len; + local->stub = stub; + local->link_count = parents; + + list_for_each_entry (dentry, &ctx->parents, next) { + ret = quota_check_limit (frame, fd->inode, this, dentry->name, + dentry->par); + if (ret == -1) { + break; + } + } + + stub = NULL; + + LOCK (&local->lock); + { + local->link_count = 0; + if (local->validate_count == 0) { + stub = local->stub; + local->stub = NULL; + } + } + UNLOCK (&local->lock); + + if (stub != NULL) { + call_resume (stub); + } + + return 0; + +unwind: + QUOTA_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + int32_t mem_acct_init (xlator_t *this) @@ -3304,6 +3475,7 @@ struct xlator_fops fops = { .removexattr = quota_removexattr, .fremovexattr = quota_fremovexattr, .readdirp = quota_readdirp, + .fallocate = quota_fallocate, }; struct xlator_cbks cbks = { diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 43c98a23..59472b91 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -2653,6 +2653,51 @@ fuse_readdirp (xlator_t *this, fuse_in_header_t *finh, void *msg) fuse_resolve_and_resume (state, fuse_readdirp_resume); } +static int +fuse_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + return fuse_err_cbk(frame, cookie, this, op_ret, op_errno, xdata); +} + +static void +fuse_fallocate_resume(fuse_state_t *state) +{ + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%"PRIu64": FALLOCATE (%p, flags=%d, size=%zu, offset=%"PRId64")", + state->finh->unique, state->fd, state->flags, state->size, + state->off); + + /* we only support KEEP_SIZE */ + FUSE_FOP(state, fuse_fallocate_cbk, GF_FOP_FALLOCATE, fallocate, state->fd, + (state->flags & FALLOC_FL_KEEP_SIZE), state->off, state->size, + state->xdata); +} + +static void +fuse_fallocate(xlator_t *this, fuse_in_header_t *finh, void *msg) +{ + struct fuse_fallocate_in *ffi = msg; + fuse_state_t *state = NULL; + + GET_STATE(this, finh, state); + state->off = ffi->offset; + state->size = ffi->length; + state->flags = ffi->mode; + state->fd = FH_TO_FD(ffi->fh); + + /* discard TBD as separate FOP */ + if (state->flags & FALLOC_FL_PUNCH_HOLE) { + send_fuse_err(this, finh, EOPNOTSUPP); + free_fuse_state(state); + return; + } + + fuse_resolve_fd_init(state, &state->resolve, state->fd); + fuse_resolve_and_resume(state, fuse_fallocate_resume); +} + static void fuse_releasedir (xlator_t *this, fuse_in_header_t *finh, void *msg) @@ -4861,7 +4906,7 @@ static fuse_handler_t *fuse_std_ops[FUSE_OP_HIGH] = { /* [FUSE_POLL] */ /* [FUSE_NOTIFY_REPLY] */ /* [FUSE_BATCH_FORGET] */ - /* [FUSE_FALLOCATE] */ + [FUSE_FALLOCATE] = fuse_fallocate, [FUSE_READDIRPLUS] = fuse_readdirp, }; diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index 226c091f..33642bff 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -307,6 +307,7 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub) case GF_FOP_XATTROP: case GF_FOP_FXATTROP: case GF_FOP_RCHECKSUM: + case GF_FOP_FALLOCATE: pri = IOT_PRI_LO; break; @@ -2406,6 +2407,56 @@ out: return 0; } +int +iot_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preop, struct iatt *postop, dict_t *xdata) +{ + STACK_UNWIND_STRICT (fallocate, frame, op_ret, op_errno, preop, postop, + xdata); + return 0; +} + + +int +iot_fallocate_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + STACK_WIND (frame, iot_fallocate_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fallocate, fd, mode, offset, len, + xdata); + return 0; +} + + +int +iot_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + call_stub_t *stub = NULL; + int ret = -1; + + stub = fop_fallocate_stub(frame, iot_fallocate_wrapper, fd, mode, offset, + len, xdata); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot create fallocate stub" + "(out of memory)"); + ret = -ENOMEM; + goto out; + } + + ret = iot_schedule (frame, this, stub); + +out: + if (ret < 0) { + STACK_UNWIND_STRICT (fallocate, frame, -1, -ret, NULL, NULL, + NULL); + if (stub != NULL) { + call_stub_destroy (stub); + } + } + return 0; +} int __iot_workers_scale (iot_conf_t *conf) @@ -2736,6 +2787,7 @@ struct xlator_fops fops = { .xattrop = iot_xattrop, .fxattrop = iot_fxattrop, .rchecksum = iot_rchecksum, + .fallocate = iot_fallocate, }; struct xlator_cbks cbks; diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c index 24924200..0b2c9431 100644 --- a/xlators/performance/md-cache/src/md-cache.c +++ b/xlators/performance/md-cache/src/md-cache.c @@ -1849,6 +1849,44 @@ mdc_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, return 0; } +int +mdc_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); + +out: + MDC_STACK_UNWIND (fallocate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + +int mdc_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + mdc_local_t *local; + + local = mdc_local_get(frame); + local->fd = fd_ref(fd); + + STACK_WIND(frame, mdc_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, + xdata); + + return 0; +} int mdc_forget (xlator_t *this, inode_t *inode) @@ -1977,7 +2015,8 @@ struct xlator_fops fops = { .getxattr = mdc_getxattr, .fgetxattr = mdc_fgetxattr, .readdirp = mdc_readdirp, - .readdir = mdc_readdir + .readdir = mdc_readdir, + .fallocate = mdc_fallocate, }; diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c index b405b94c..5ac81313 100644 --- a/xlators/performance/open-behind/src/open-behind.c +++ b/xlators/performance/open-behind/src/open-behind.c @@ -681,6 +681,24 @@ err: return 0; } +int +ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + call_stub_t *stub; + + stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode, + offset, len, xdata); + if (!stub) + goto err; + + open_and_resume(this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; +} int ob_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, @@ -903,6 +921,7 @@ struct xlator_fops fops = { .fentrylk = ob_fentrylk, .fxattrop = ob_fxattrop, .fsetattr = ob_fsetattr, + .fallocate = ob_fallocate, .unlink = ob_unlink, .rename = ob_rename, .lk = ob_lk, diff --git a/xlators/protocol/client/src/client-rpc-fops.c b/xlators/protocol/client/src/client-rpc-fops.c index 71ca3fbd..53cf3968 100644 --- a/xlators/protocol/client/src/client-rpc-fops.c +++ b/xlators/protocol/client/src/client-rpc-fops.c @@ -1936,6 +1936,61 @@ out: return 0; } +int +client3_3_fallocate_cbk (struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + call_frame_t *frame = NULL; + gfs3_fallocate_rsp rsp = {0,}; + struct iatt prestat = {0,}; + struct iatt poststat = {0,}; + int ret = 0; + xlator_t *this = NULL; + dict_t *xdata = NULL; + + + this = THIS; + + frame = myframe; + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = ENOTCONN; + goto out; + } + ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gfs3_fallocate_rsp); + if (ret < 0) { + gf_log (this->name, GF_LOG_ERROR, "XDR decoding failed"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + if (-1 != rsp.op_ret) { + gf_stat_to_iatt (&rsp.statpre, &prestat); + gf_stat_to_iatt (&rsp.statpost, &poststat); + } + + GF_PROTOCOL_DICT_UNSERIALIZE (this, xdata, (rsp.xdata.xdata_val), + (rsp.xdata.xdata_len), ret, + rsp.op_errno, out); + +out: + if (rsp.op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, "remote operation failed: %s", + strerror (gf_error_to_errno (rsp.op_errno))); + } + CLIENT_STACK_UNWIND (fallocate, frame, rsp.op_ret, + gf_error_to_errno (rsp.op_errno), &prestat, + &poststat, xdata); + + free (rsp.xdata.xdata_val); + + if (xdata) + dict_unref (xdata); + + return 0; +} int client3_3_setattr_cbk (struct rpc_req *req, struct iovec *iov, int count, @@ -5786,7 +5841,52 @@ unwind: return 0; } +int32_t +client3_3_fallocate(call_frame_t *frame, xlator_t *this, void *data) +{ + clnt_args_t *args = NULL; + int64_t remote_fd = -1; + clnt_conf_t *conf = NULL; + gfs3_fallocate_req req = {{0},}; + int op_errno = ESTALE; + int ret = 0; + + if (!frame || !this || !data) + goto unwind; + args = data; + conf = this->private; + + CLIENT_GET_REMOTE_FD (this, args->fd, DEFAULT_REMOTE_FD, + remote_fd, op_errno, unwind); + + req.fd = remote_fd; + req.flags = args->flags; + req.offset = args->offset; + req.size = args->size; + memcpy(req.gfid, args->fd->inode->gfid, 16); + + GF_PROTOCOL_DICT_SERIALIZE (this, args->xdata, (&req.xdata.xdata_val), + req.xdata.xdata_len, op_errno, unwind); + + ret = client_submit_request (this, &req, frame, conf->fops, + GFS3_OP_FALLOCATE, + client3_3_fallocate_cbk, NULL, + NULL, 0, NULL, 0, + NULL, (xdrproc_t)xdr_gfs3_fallocate_req); + if (ret) { + gf_log (this->name, GF_LOG_WARNING, "failed to send the fop"); + } + + GF_FREE (req.xdata.xdata_val); + + return 0; +unwind: + CLIENT_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); + GF_FREE (req.xdata.xdata_val); + + return 0; +} /* Table Specific to FOPS */ @@ -5833,6 +5933,7 @@ rpc_clnt_procedure_t clnt3_3_fop_actors[GF_FOP_MAXVALUE] = { [GF_FOP_SETATTR] = { "SETATTR", client3_3_setattr }, [GF_FOP_FSETATTR] = { "FSETATTR", client3_3_fsetattr }, [GF_FOP_READDIRP] = { "READDIRP", client3_3_readdirp }, + [GF_FOP_FALLOCATE] = { "FALLOCATE", client3_3_fallocate }, [GF_FOP_RELEASE] = { "RELEASE", client3_3_release }, [GF_FOP_RELEASEDIR] = { "RELEASEDIR", client3_3_releasedir }, [GF_FOP_GETSPEC] = { "GETSPEC", client3_getspec }, @@ -5882,6 +5983,7 @@ char *clnt3_3_fop_names[GFS3_OP_MAXVALUE] = { [GFS3_OP_SETATTR] = "SETATTR", [GFS3_OP_FSETATTR] = "FSETATTR", [GFS3_OP_READDIRP] = "READDIRP", + [GFS3_OP_FALLOCATE] = "FALLOCATE", [GFS3_OP_RELEASE] = "RELEASE", [GFS3_OP_RELEASEDIR] = "RELEASEDIR", [GFS3_OP_FREMOVEXATTR] = "FREMOVEXATTR", diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index e24282e2..377263af 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -1961,6 +1961,40 @@ out: return 0; } +int32_t +client_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + int ret = -1; + clnt_conf_t *conf = NULL; + rpc_clnt_procedure_t *proc = NULL; + clnt_args_t args = {0,}; + + conf = this->private; + if (!conf || !conf->fops) + goto out; + + args.fd = fd; + args.flags = mode; + args.offset = offset; + args.size = len; + args.xdata = xdata; + + proc = &conf->fops->proctable[GF_FOP_FALLOCATE]; + if (!proc) { + gf_log (this->name, GF_LOG_ERROR, + "rpc procedure not found for %s", + gf_fop_list[GF_FOP_FALLOCATE]); + goto out; + } + if (proc->fn) + ret = proc->fn (frame, this, &args); +out: + if (ret) + STACK_UNWIND_STRICT (fallocate, frame, -1, ENOTCONN, NULL, NULL, NULL); + + return 0; +} int32_t client_getspec (call_frame_t *frame, xlator_t *this, const char *key, @@ -2679,6 +2713,7 @@ struct xlator_fops fops = { .fxattrop = client_fxattrop, .setattr = client_setattr, .fsetattr = client_fsetattr, + .fallocate = client_fallocate, .getspec = client_getspec, }; diff --git a/xlators/protocol/server/src/server-rpc-fops.c b/xlators/protocol/server/src/server-rpc-fops.c index a9d2ee5b..4b847eab 100644 --- a/xlators/protocol/server/src/server-rpc-fops.c +++ b/xlators/protocol/server/src/server-rpc-fops.c @@ -1969,6 +1969,45 @@ out: return 0; } +int +server_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *statpre, struct iatt *statpost, dict_t *xdata) +{ + gfs3_fallocate_rsp rsp = {0,}; + server_state_t *state = NULL; + rpcsvc_request_t *req = NULL; + + req = frame->local; + state = CALL_STATE (frame); + + GF_PROTOCOL_DICT_SERIALIZE (this, xdata, (&rsp.xdata.xdata_val), + rsp.xdata.xdata_len, op_errno, out); + + if (op_ret) { + gf_log (this->name, GF_LOG_INFO, + "%"PRId64": FALLOCATE %"PRId64" (%s) ==> (%s)", + frame->root->unique, state->resolve.fd_no, + uuid_utoa (state->resolve.gfid), + strerror (op_errno)); + goto out; + } + + gf_stat_from_iatt (&rsp.statpre, statpre); + gf_stat_from_iatt (&rsp.statpost, statpost); + +out: + rsp.op_ret = op_ret; + rsp.op_errno = gf_errno_to_error (op_errno); + + server_submit_reply(frame, req, &rsp, NULL, 0, NULL, + (xdrproc_t) xdr_gfs3_fallocate_rsp); + + GF_FREE (rsp.xdata.xdata_val); + + return 0; +} + /* Resume function section */ int @@ -2914,6 +2953,27 @@ err: return 0; } +int +server_fallocate_resume (call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE (frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND (frame, server_fallocate_cbk, + bound_xl, bound_xl->fops->fallocate, + state->fd, state->flags, state->offset, state->size, + state->xdata); + return 0; +err: + server_fallocate_cbk(frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL, NULL); + + return 0; +} @@ -3094,7 +3154,65 @@ out: return ret; } +int +server3_3_fallocate(rpcsvc_request_t *req) +{ + server_state_t *state = NULL; + call_frame_t *frame = NULL; + gfs3_fallocate_req args = {{0},}; + int ret = -1; + int op_errno = 0; + + if (!req) + return ret; + + ret = xdr_to_generic (req->msg[0], &args, + (xdrproc_t)xdr_gfs3_fallocate_req); + if (ret < 0) { + //failed to decode msg; + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + frame = get_frame_from_request (req); + if (!frame) { + // something wrong, mostly insufficient memory + req->rpc_err = GARBAGE_ARGS; /* TODO */ + goto out; + } + frame->root->op = GF_FOP_FALLOCATE; + + state = CALL_STATE (frame); + if (!state->conn->bound_xl) { + /* auth failure, request on subvolume without setvolume */ + req->rpc_err = GARBAGE_ARGS; + goto out; + } + + state->resolve.type = RESOLVE_MUST; + state->resolve.fd_no = args.fd; + + state->flags = args.flags; + state->offset = args.offset; + state->size = args.size; + memcpy(state->resolve.gfid, args.gfid, 16); + + GF_PROTOCOL_DICT_UNSERIALIZE (state->conn->bound_xl, state->xdata, + (args.xdata.xdata_val), + (args.xdata.xdata_len), ret, + op_errno, out); + + ret = 0; + resolve_and_resume (frame, server_fallocate_resume); + +out: + free (args.xdata.xdata_val); + + if (op_errno) + req->rpc_err = GARBAGE_ARGS; + return ret; +} int server3_3_readlink (rpcsvc_request_t *req) { @@ -5748,6 +5866,7 @@ rpcsvc_actor_t glusterfs3_3_fop_actors[] = { [GFS3_OP_SETATTR] = { "SETATTR", GFS3_OP_SETATTR, server3_3_setattr, NULL, 0}, [GFS3_OP_FSETATTR] = { "FSETATTR", GFS3_OP_FSETATTR, server3_3_fsetattr, NULL, 0}, [GFS3_OP_READDIRP] = { "READDIRP", GFS3_OP_READDIRP, server3_3_readdirp, NULL, 0}, + [GFS3_OP_FALLOCATE] = { "FALLOCATE", GFS3_OP_FALLOCATE, server3_3_fallocate, NULL, 0}, [GFS3_OP_RELEASE] = { "RELEASE", GFS3_OP_RELEASE, server3_3_release, NULL, 0}, [GFS3_OP_RELEASEDIR] = { "RELEASEDIR", GFS3_OP_RELEASEDIR, server3_3_releasedir, NULL, 0}, [GFS3_OP_FREMOVEXATTR] = { "FREMOVEXATTR", GFS3_OP_FREMOVEXATTR, server3_3_fremovexattr, NULL, 0}, diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index 93577cf5..36883403 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -561,6 +561,72 @@ out: return 0; } +static int32_t +_posix_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size, + off_t offset, size_t len, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + struct iatt statpre = {0,}; + struct iatt statpost = {0,}; + struct posix_fd *pfd = NULL; + int32_t ret = -1; + int32_t flags = 0; + + DECLARE_OLD_FS_ID_VAR; + + SET_FS_ID (frame->root->uid, frame->root->gid); + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (this, out); + VALIDATE_OR_GOTO (fd, out); + + ret = posix_fd_ctx_get (fd, this, &pfd); + if (ret < 0) { + op_errno = -ret; + gf_log (this->name, GF_LOG_DEBUG, + "pfd is NULL from fd=%p", fd); + goto out; + } + + op_ret = posix_fdstat (this, pfd->fd, &statpre); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "fallocate (fstat) failed on fd=%p: %s", fd, + strerror (op_errno)); + goto out; + } + + if (keep_size) + flags = FALLOC_FL_KEEP_SIZE; + + op_ret = sys_fallocate(pfd->fd, flags, offset, len); + if (op_ret == -1) { + op_errno = errno; + goto out; + } + + op_ret = posix_fdstat (this, pfd->fd, &statpost); + if (op_ret == -1) { + op_errno = errno; + gf_log (this->name, GF_LOG_ERROR, + "fallocate (fstat) failed on fd=%p: %s", fd, + strerror (op_errno)); + goto out; + } + + op_ret = 0; + +out: + SET_TO_OLD_FS_ID (); + + STACK_UNWIND_STRICT (fallocate, frame, op_ret, op_errno, + &statpre, &statpost, NULL); + + return 0; +} + int32_t posix_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata) @@ -4659,6 +4725,7 @@ struct xlator_fops fops = { .fxattrop = posix_fxattrop, .setattr = posix_setattr, .fsetattr = posix_fsetattr, + .fallocate = _posix_fallocate, }; struct xlator_cbks cbks = { -- cgit