diff options
author | Raghavendra Bhat <raghavendra@redhat.com> | 2018-11-06 15:27:31 -0500 |
---|---|---|
committer | Amar Tumballi <amarts@redhat.com> | 2018-12-12 15:56:55 +0000 |
commit | 7dadea15c58eb92e5f5727190bf9446dd6fe7a3c (patch) | |
tree | 4ced04de0219407604f30b1663b586f16b54dd06 | |
parent | 5c723ade196600030ee84621384cceb10fff64d8 (diff) |
copy_file_range support in GlusterFS
* libglusterfs changes to add new fop
* Fuse changes:
- Changes in fuse bridge xlator to receive and send responses
* posix changes to perform the op on the backend filesystem
* protocol and rpc changes for sending and receiving the fop
* gfapi changes for performing the fop
* tools: glfs-copy-file-range tool for testing copy_file_range fop
- Although, copy_file_range support has been added to the upstream
fuse kernel module, no release has been made yet of a kernel
which contains the support. It is expected to come in the
upcoming release of linux-4.20
So, as of now, executing copy_file_range fop on a fused based
filesystem results in fuse kernel module sending read on the
source fd and write on the destination fd.
Therefore a small gfapi based tool has been written to be able
test the copy_file_range fop. This tool is similar (in functionality)
to the example program given in copy_file_range man page.
So, running regular copy_file_range on a fuse mount point and
running gfapi based glfs-copy-file-range tool gives some idea about
how fast, the copy_file_range (or reflink) can be.
On the local machine this was the result obtained.
mount -t glusterfs workstation:new /mnt/glusterfs
[root@workstation ~]# cd /mnt/glusterfs/
[root@workstation glusterfs]# ls
file
[root@workstation glusterfs]# cd
[root@workstation ~]# time /tmp/a.out /mnt/glusterfs/file /mnt/glusterfs/new
real 0m6.495s
user 0m0.000s
sys 0m1.439s
[root@workstation ~]# time glfs-copy-file-range $(hostname) new /tmp/glfs.log /file /rrr
OPEN_SRC: opening /file is success
OPEN_DST: opening /rrr is success
FSTAT_SRC: fstat on /rrr is success
copy_file_range successful
real 0m0.309s
user 0m0.039s
sys 0m0.017s
This tool needs following arguments
1) hostname
2) volume name
3) log file path
4) source file path (relative to the gluster volume root)
5) destination file path (relative to the gluster volume root)
"glfs-copy-file-range <hostname> <volume> <log file path> <source> <destination>"
- Added a testcase as well to run glfs-copy-file-range tool
* io-stats changes to capture the fop for profiling
* NOTE:
- Added conditional check to see whether the copy_file_range syscall
is available or not. If not, then return ENOSYS.
- Added conditional check for kernel minor version in fuse_kernel.h
and fuse-bridge while referring to copy_file_range. And the kernel
minor version is kept as it is. i.e. 24. Increment it in future
when there is a kernel release which contains the support for
copy_file_range fop in fuse kernel module.
* The document which contains a writeup on this enhancement can be found at
https://docs.google.com/document/d/1BSILbXr_knynNwxSyyu503JoTz5QFM_4suNIh2WwrSc/edit
Change-Id: I280069c814dd21ce6ec3be00a884fc24ab692367
updates: #536
Signed-off-by: Raghavendra Bhat <raghavendra@redhat.com>
57 files changed, 1911 insertions, 19 deletions
diff --git a/api/src/Makefile.am b/api/src/Makefile.am index 6ed30bc99f6..7f9a7d17b35 100644 --- a/api/src/Makefile.am +++ b/api/src/Makefile.am @@ -19,7 +19,7 @@ libgfapi_la_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(top_srcdir)/rpc/xdr/src \ -I$(top_builddir)/rpc/xdr/src \ -DDATADIR=\"$(localstatedir)\" \ - -D__USE_FILE_OFFSET64 + -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases index a71422c8577..0e52c38d346 100644 --- a/api/src/gfapi.aliases +++ b/api/src/gfapi.aliases @@ -188,3 +188,4 @@ _pub_glfs_ftruncate _glfs_ftruncate$GFAPI_future _pub_glfs_ftruncate_async _glfs_ftruncate_async$GFAPI_future _pub_glfs_discard_async _glfs_discard_async$GFAPI_future _pub_glfs_zerofill_async _glfs_zerofill_async$GFAPI_future +_pub_glfs_copy_file_range _glfs_copy_file_range$GFAPI_future
\ No newline at end of file diff --git a/api/src/gfapi.map b/api/src/gfapi.map index c47323781fb..1be2953ce9a 100644 --- a/api/src/gfapi.map +++ b/api/src/gfapi.map @@ -255,5 +255,6 @@ GFAPI_future { glfs_ftruncate_async; glfs_discard_async; glfs_zerofill_async; + glfs_copy_file_range; } GFAPI_4.1.6; diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c index 2a1cc73ccee..f59990aed1f 100644 --- a/api/src/glfs-fops.c +++ b/api/src/glfs-fops.c @@ -1333,6 +1333,161 @@ invalid_fs: } ssize_t +pub_glfs_copy_file_range(struct glfs_fd *glfd_in, off64_t *off_in, + struct glfs_fd *glfd_out, off64_t *off_out, size_t len, + unsigned int flags, struct stat *statbuf, + struct stat *prestat, struct stat *poststat) +{ + xlator_t *subvol = NULL; + int ret = -1; + fd_t *fd_in = NULL; + fd_t *fd_out = NULL; + struct iatt preiatt = + { + 0, + }, + iattbuf = + { + 0, + }, + postiatt = { + 0, + }; + dict_t *fop_attr = NULL; + off64_t pos_in; + off64_t pos_out; + + DECLARE_OLD_THIS; + __GLFS_ENTRY_VALIDATE_FD(glfd_in, invalid_fs); + __GLFS_ENTRY_VALIDATE_FD(glfd_out, invalid_fs); + + GF_REF_GET(glfd_in); + GF_REF_GET(glfd_out); + + if (glfd_in->fs != glfd_out->fs) { + ret = -1; + errno = EXDEV; + goto out; + } + + subvol = glfs_active_subvol(glfd_in->fs); + if (!subvol) { + ret = -1; + errno = EIO; + goto out; + } + + fd_in = glfs_resolve_fd(glfd_in->fs, subvol, glfd_in); + if (!fd_in) { + ret = -1; + errno = EBADFD; + goto out; + } + + fd_out = glfs_resolve_fd(glfd_out->fs, subvol, glfd_out); + if (!fd_out) { + ret = -1; + errno = EBADFD; + goto out; + } + + /* + * This is based on how the vfs layer in the kernel handles + * copy_file_range call. Upon receiving it follows the + * below method to consider the offset. + * if (off_in != NULL) + * use the value off_in to perform the op + * else if off_in == NULL + * use the current file offset position to perform the op + * + * For gfapi, glfd->offset is used. For a freshly opened + * fd, the offset is set to 0. + */ + if (off_in) + pos_in = *off_in; + else + pos_in = glfd_in->offset; + + if (off_out) + pos_out = *off_out; + else + pos_out = glfd_out->offset; + + ret = get_fop_attr_thrd_key(&fop_attr); + if (ret) + gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed"); + + ret = syncop_copy_file_range(subvol, fd_in, pos_in, fd_out, pos_out, len, + flags, &iattbuf, &preiatt, &postiatt, fop_attr, + NULL); + DECODE_SYNCOP_ERR(ret); + + if (ret >= 0) { + pos_in += ret; + pos_out += ret; + + if (off_in) + *off_in = pos_in; + if (off_out) + *off_out = pos_out; + + if (statbuf) + glfs_iatt_to_stat(glfd_in->fs, &iattbuf, statbuf); + if (prestat) + glfs_iatt_to_stat(glfd_in->fs, &preiatt, prestat); + if (poststat) + glfs_iatt_to_stat(glfd_in->fs, &postiatt, poststat); + } + + if (ret <= 0) + goto out; + + /* + * If *off_in is NULL, then there is no offset info that can + * obtained from the input argument. Hence follow below method. + * If *off_in is NULL, then + * glfd->offset = offset + ret; + * else + * do nothing. + * + * According to the man page of copy_file_range, if off_in is + * NULL, then the offset of the source file is advanced by + * the return value of the fop. The same applies to off_out as + * well. Otherwise, if *off_in is not NULL, then the offset + * is not advanced by the filesystem. The entity which sends + * the copy_file_range call is supposed to advance the offset + * value in its buffer (pointed to by *off_in or *off_out) + * by the return value of copy_file_range. + */ + if (!off_in) + glfd_in->offset += ret; + + if (!off_out) + glfd_out->offset += ret; + +out: + if (fd_in) + fd_unref(fd_in); + if (fd_out) + fd_unref(fd_out); + if (glfd_in) + GF_REF_PUT(glfd_in); + if (glfd_out) + GF_REF_PUT(glfd_out); + if (fop_attr) + dict_unref(fop_attr); + + glfs_subvol_done(glfd_in->fs, subvol); + + __GLFS_EXIT_FS; + +invalid_fs: + return ret; +} + +GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_copy_file_range, future); + +ssize_t pub_glfs_pwritev(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, off_t offset, int flags) { diff --git a/api/src/glfs.h b/api/src/glfs.h index cd642a5ea20..160a784222f 100644 --- a/api/src/glfs.h +++ b/api/src/glfs.h @@ -42,6 +42,38 @@ #include <sys/statvfs.h> #include <inttypes.h> +/* + * For off64_t to be defined, we need both + * __USE_LARGEFILE64 to be true and __off64_t_defnined to be + * false. But, making __USE_LARGEFILE64 true causes other issues + * such as redinition of stat and fstat to stat64 and fstat64 + * respectively which again causes compilation issues. + * Without off64_t being defined, this will not compile as + * copy_file_range uses off64_t. Hence define it here. First + * check whether __off64_t_defined is true or not. <unistd.h> + * sets that flag when it defines off64_t. If __off64_t_defined + * is false and __USE_FILE_OFFSET64 is true, then go on to define + * off64_t using __off64_t. + */ +#ifndef GF_BSD_HOST_OS +#if defined(__USE_FILE_OFFSET64) && !defined(__off64_t_defined) +typedef __off64_t off64_t; +#endif /* defined(__USE_FILE_OFFSET64) && !defined(__off64_t_defined) */ +#else +#include <stdio.h> +#ifndef _OFF64_T_DECLARED +/* + * Including <stdio.h> (done above) should actually define + * _OFF64_T_DECLARED with off64_t data type being available + * for consumption. But, off64_t data type is not recognizable + * for FreeBSD versions less than 11. Hence, int64_t is typedefed + * to off64_t. + */ +#define _OFF64_T_DECLARED +typedef int64_t off64_t; +#endif /* _OFF64_T_DECLARED */ +#endif /* GF_BSD_HOST_OS */ + #if defined(HAVE_SYS_ACL_H) || (defined(USE_POSIX_ACLS) && USE_POSIX_ACLS) #include <sys/acl.h> #else @@ -594,6 +626,13 @@ off_t glfs_lseek(glfs_fd_t *fd, off_t offset, int whence) __THROW GFAPI_PUBLIC(glfs_lseek, 3.4.0); +ssize_t +glfs_copy_file_range(struct glfs_fd *glfd_in, off64_t *off_in, + struct glfs_fd *glfd_out, off64_t *off_out, size_t len, + unsigned int flags, struct stat *statbuf, + struct stat *prestat, struct stat *poststat) __THROW + GFAPI_PUBLIC(glfs_copy_file_range, future); + int glfs_truncate(glfs_t *fs, const char *path, off_t length) __THROW GFAPI_PUBLIC(glfs_truncate, 3.7.15); diff --git a/configure.ac b/configure.ac index 3ddb6f073a5..d3c8f8b9514 100644 --- a/configure.ac +++ b/configure.ac @@ -1018,6 +1018,25 @@ if test "x${have_posix_fallocate}" = "xyes"; then AC_DEFINE(HAVE_POSIX_FALLOCATE, 1, [define if posix_fallocate exists]) fi +# On fedora-29, copy_file_range syscall and the libc API both are present. +# Whereas, on some machines such as centos-7, RHEL-7, the API is not there. +# Only the system call is present. So, this change is to determine whether +# the API is present or not. If not, then check whether the system call is +# present or not. Accordingly sys_copy_file_range function will first call +# the API if it is there. Otherwise it will call syscall(SYS_copy_file_range). +AC_CHECK_FUNC([copy_file_range], [have_copy_file_range=yes]) +if test "x${have_copy_file_range}" = "xyes"; then + AC_DEFINE(HAVE_COPY_FILE_RANGE, 1, [define if copy_file_range exists]) +else + OLD_CFLAGS=${CFLAGS} + CFLAGS="-D_GNU_SOURCE" + AC_CHECK_DECL([SYS_copy_file_range], , , [#include <sys/syscall.h>]) + if test "x${ac_cv_have_decl_SYS_copy_file_range}" = "xyes"; then + AC_DEFINE(HAVE_COPY_FILE_RANGE_SYS, 1, [define if SYS_copy_file_range is available]) + fi + CFLAGS=${OLD_CFLAGS} +fi + BUILD_NANOSECOND_TIMESTAMPS=no AC_CHECK_FUNC([utimensat], [have_utimensat=yes]) if test "x${have_utimensat}" = "xyes"; then diff --git a/glusterfs-api.pc.in b/glusterfs-api.pc.in index 6af4e108f7f..4a2edb7bf07 100644 --- a/glusterfs-api.pc.in +++ b/glusterfs-api.pc.in @@ -9,4 +9,4 @@ Description: GlusterFS API Version: @GFAPI_VERSION@ Requires: @PKGCONFIG_UUID@ Libs: -L${libdir} @GFAPI_LIBS@ -lgfapi -lglusterfs -lgfrpc -lgfxdr -Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -DUSE_POSIX_ACLS=@USE_POSIX_ACLS@ +Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 -DUSE_POSIX_ACLS=@USE_POSIX_ACLS@ diff --git a/libgfchangelog.pc.in b/libgfchangelog.pc.in index e2ff1fb6214..79eac2ad2d3 100644 --- a/libgfchangelog.pc.in +++ b/libgfchangelog.pc.in @@ -9,4 +9,4 @@ Description: GlusterFS Changelog Consumer Library Version: @LIBGFCHANGELOG_VERSION@ Requires: @PKGCONFIG_UUID@ Libs: -L${libdir} -lgfchangelog -lglusterfs -Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 +Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am index 1d06f1586a9..970f4b74978 100644 --- a/libglusterfs/src/Makefile.am +++ b/libglusterfs/src/Makefile.am @@ -6,7 +6,7 @@ libglusterfs_la_CFLAGS = $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \ libglusterfs_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \ -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \ -DXLATORPARENTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)\" \ - -DXXH_NAMESPACE=GF_ \ + -DXXH_NAMESPACE=GF_ -D__USE_LARGEFILE64 \ -I$(top_srcdir)/rpc/xdr/src/ -I$(top_builddir)/rpc/xdr/src/ \ -I$(top_srcdir)/rpc/rpc-lib/src/ -I$(CONTRIBDIR)/rbtree \ -I$(CONTRIBDIR)/libexecinfo ${ARGP_STANDALONE_CPPFLAGS} \ diff --git a/libglusterfs/src/call-stub.c b/libglusterfs/src/call-stub.c index 96454dfaeb5..886dfa52ccc 100644 --- a/libglusterfs/src/call-stub.c +++ b/libglusterfs/src/call-stub.c @@ -1818,6 +1818,51 @@ out: } call_stub_t * +fop_copy_file_range_stub(call_frame_t *frame, fop_copy_file_range_t fn, + fd_t *fd_in, off64_t off_in, fd_t *fd_out, + off64_t off_out, size_t len, uint32_t flags, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + GF_VALIDATE_OR_GOTO("call-stub", frame, out); + GF_VALIDATE_OR_GOTO("call-stub", fn, out); + + stub = stub_new(frame, 1, GF_FOP_COPY_FILE_RANGE); + GF_VALIDATE_OR_GOTO("call-stub", stub, out); + + stub->fn.copy_file_range = fn; + + args_copy_file_range_store(&stub->args, fd_in, off_in, fd_out, off_out, len, + flags, xdata); + +out: + return stub; +} + +call_stub_t * +fop_copy_file_range_cbk_stub(call_frame_t *frame, fop_copy_file_range_cbk_t fn, + int32_t op_ret, int32_t op_errno, + struct iatt *stbuf, struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + GF_VALIDATE_OR_GOTO("call-stub", frame, out); + GF_VALIDATE_OR_GOTO("call-stub", fn, out); + + stub = stub_new(frame, 0, GF_FOP_COPY_FILE_RANGE); + GF_VALIDATE_OR_GOTO("call-stub", stub, out); + + stub->fn_cbk.copy_file_range = fn; + args_copy_file_range_cbk_store(&stub->args_cbk, op_ret, op_errno, stbuf, + prebuf_dst, postbuf_dst, xdata); + +out: + return stub; +} + +call_stub_t * fop_put_stub(call_frame_t *frame, fop_put_t fn, loc_t *loc, mode_t mode, mode_t umask, uint32_t flags, struct iovec *vector, int32_t count, off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata) @@ -2213,6 +2258,13 @@ call_resume_wind(call_stub_t *stub) stub->args.iobref, stub->args.xattr, stub->args.xdata); break; + case GF_FOP_COPY_FILE_RANGE: + stub->fn.copy_file_range( + stub->frame, stub->frame->this, stub->args.fd, + stub->args.off_in, stub->args.fd_dst, stub->args.off_out, + stub->args.size, stub->args.flags, stub->args.xdata); + break; + default: gf_msg_callingfn("call-stub", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ENTRY, @@ -2439,6 +2491,12 @@ call_resume_unwind(call_stub_t *stub) stub->args_cbk.xdata); break; + case GF_FOP_COPY_FILE_RANGE: + STUB_UNWIND(stub, copy_file_range, &stub->args_cbk.stat, + &stub->args_cbk.prestat, &stub->args_cbk.poststat, + stub->args_cbk.xdata); + break; + default: gf_msg_callingfn("call-stub", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ENTRY, diff --git a/libglusterfs/src/default-args.c b/libglusterfs/src/default-args.c index 479974e1637..cfceabd1f46 100644 --- a/libglusterfs/src/default-args.c +++ b/libglusterfs/src/default-args.c @@ -1541,6 +1541,48 @@ args_namelink_store(default_args_t *args, loc_t *loc, dict_t *xdata) return 0; } +int +args_copy_file_range_store(default_args_t *args, fd_t *fd_in, off64_t off_in, + fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, dict_t *xdata) +{ + if (fd_in) + args->fd = fd_ref(fd_in); + if (fd_out) + args->fd_dst = fd_ref(fd_out); + args->size = len; + args->off_in = off_in; + args->off_out = off_out; + args->flags = flags; + + if (xdata) + args->xdata = dict_ref(xdata); + + return 0; +} + +int +args_copy_file_range_cbk_store(default_args_cbk_t *args, int32_t op_ret, + int32_t op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata) +{ + args->op_ret = op_ret; + args->op_errno = op_errno; + if (op_ret >= 0) { + if (postbuf_dst) + args->poststat = *postbuf_dst; + if (prebuf_dst) + args->prestat = *prebuf_dst; + if (stbuf) + args->stat = *stbuf; + } + if (xdata) + args->xdata = dict_ref(xdata); + + return 0; +} + void args_cbk_wipe(default_args_cbk_t *args_cbk) { diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c index 97de8193dcb..5bf64e8c6c6 100644 --- a/libglusterfs/src/defaults-tmpl.c +++ b/libglusterfs/src/defaults-tmpl.c @@ -84,6 +84,7 @@ struct xlator_fops _default_fops = { .put = default_put, .icreate = default_icreate, .namelink = default_namelink, + .copy_file_range = default_copy_file_range, }; struct xlator_fops *default_fops = &_default_fops; diff --git a/libglusterfs/src/generator.py b/libglusterfs/src/generator.py index c17d450502d..5b7aa4764a0 100755 --- a/libglusterfs/src/generator.py +++ b/libglusterfs/src/generator.py @@ -599,6 +599,19 @@ ops['namelink'] = ( ('cbk-arg', 'xdata', 'dict_t *'), ) +ops['copy_file_range'] = ( + ('fop-arg', 'fd_in', 'fd_t *'), + ('fop-arg', 'off_in', 'off64_t '), + ('fop-arg', 'fd_out', 'fd_t *'), + ('fop-arg', 'off_out', 'off64_t '), + ('fop-arg', 'len', 'size_t'), + ('fop-arg', 'flags', 'uint32_t'), + ('fop-arg', 'xdata', 'dict_t *'), + ('cbk-arg', 'stbuf', 'struct iatt *'), + ('cbk-arg', 'prebuf_dst', 'struct iatt *'), + ('cbk-arg', 'postbuf_dst', 'struct iatt *'), + ('cbk-arg', 'xdata', 'dict_t *'), +) ##################################################################### xlator_cbks['forget'] = ( ('fn-arg', 'this', 'xlator_t *'), diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c index 35482545ab3..4fec0638926 100644 --- a/libglusterfs/src/globals.c +++ b/libglusterfs/src/globals.c @@ -77,6 +77,7 @@ const char *gf_fop_list[GF_FOP_MAXVALUE] = { [GF_FOP_PUT] = "PUT", [GF_FOP_ICREATE] = "ICREATE", [GF_FOP_NAMELINK] = "NAMELINK", + [GF_FOP_COPY_FILE_RANGE] = "COPY_FILE_RANGE", }; const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = { diff --git a/libglusterfs/src/glusterfs/call-stub.h b/libglusterfs/src/glusterfs/call-stub.h index bfed0fbc14a..c01c935e73d 100644 --- a/libglusterfs/src/glusterfs/call-stub.h +++ b/libglusterfs/src/glusterfs/call-stub.h @@ -81,6 +81,7 @@ typedef struct _call_stub { fop_put_t put; fop_icreate_t icreate; fop_namelink_t namelink; + fop_copy_file_range_t copy_file_range; } fn; union { @@ -136,6 +137,7 @@ typedef struct _call_stub { fop_put_cbk_t put; fop_icreate_cbk_t icreate; fop_namelink_cbk_t namelink; + fop_copy_file_range_cbk_t copy_file_range; } fn_cbk; default_args_t args; @@ -589,6 +591,18 @@ fop_namelink_cbk_stub(call_frame_t *frame, fop_namelink_cbk_t fn, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata); +call_stub_t * +fop_copy_file_range_stub(call_frame_t *frame, fop_copy_file_range_t fn, + fd_t *fd_in, off64_t off_in, fd_t *fd_out, + off64_t off_out, size_t len, uint32_t flags, + dict_t *xdata); + +call_stub_t * +fop_copy_file_range_cbk_stub(call_frame_t *frame, fop_copy_file_range_cbk_t fn, + int32_t op_ret, int32_t op_errno, + struct iatt *stbuf, struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata); + void call_resume(call_stub_t *stub); void diff --git a/libglusterfs/src/glusterfs/compat.h b/libglusterfs/src/glusterfs/compat.h index 38c07b5ae7c..9374b79f9af 100644 --- a/libglusterfs/src/glusterfs/compat.h +++ b/libglusterfs/src/glusterfs/compat.h @@ -116,6 +116,25 @@ #include <limits.h> #include <libgen.h> +/* + * This is where things like off64_t are defined. + * So include it before declaring _OFF64_T_DECLARED. + * If the freebsd version has support for off64_t + * including stdio.h should be sufficient. + */ +#include <stdio.h> + +#ifndef _OFF64_T_DECLARED +/* + * Including <stdio.h> (done above) should actually define + * _OFF64_T_DECLARED with off64_t data type being available + * for consumption. But, off64_t data type is not recognizable + * for FreeBSD versions less than 11. Hence, int64_t is typedefed + * to off64_t. + */ +#define _OFF64_T_DECLARED +typedef int64_t off64_t; +#endif /* _OFF64_T_DECLARED */ #ifndef XATTR_CREATE enum { diff --git a/libglusterfs/src/glusterfs/default-args.h b/libglusterfs/src/glusterfs/default-args.h index f15f558202b..ca7526fcab6 100644 --- a/libglusterfs/src/glusterfs/default-args.h +++ b/libglusterfs/src/glusterfs/default-args.h @@ -234,6 +234,12 @@ void args_lease_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno, struct gf_lease *lease, dict_t *xdata); +int +args_copy_file_range_cbk_store(default_args_cbk_t *args, int32_t op_ret, + int32_t op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata); + void args_cbk_wipe(default_args_cbk_t *args_cbk); @@ -439,6 +445,11 @@ args_icreate_store(default_args_t *args, loc_t *loc, mode_t mode, int args_namelink_store(default_args_t *args, loc_t *loc, dict_t *xdata); +int +args_copy_file_range_store(default_args_t *args, fd_t *fd_in, off64_t off_in, + fd_t *fd_out, off_t off64_out, size_t len, + uint32_t flags, dict_t *xdata); + void args_cbk_init(default_args_cbk_t *args_cbk); #endif /* _DEFAULT_ARGS_H */ diff --git a/libglusterfs/src/glusterfs/defaults.h b/libglusterfs/src/glusterfs/defaults.h index 5d6b8e28a51..5a818eeb91a 100644 --- a/libglusterfs/src/glusterfs/defaults.h +++ b/libglusterfs/src/glusterfs/defaults.h @@ -48,10 +48,20 @@ typedef struct { } default_args_cbk_t; typedef struct { - loc_t loc; /* @old in rename(), link() */ - loc_t loc2; /* @new in rename(), link() */ - fd_t *fd; + loc_t loc; /* @old in rename(), link() */ + loc_t loc2; /* @new in rename(), link() */ + fd_t *fd; /* for all the fd based ops */ + fd_t *fd_dst; /* Only for copy_file_range destination */ off_t offset; + /* + * According to the man page of copy_file_range, + * the offsets for source and destination file + * are of type loff_t. But the type loff_t is + * linux specific and is actual a typedef of + * off64_t. + */ + off64_t off_in; /* For copy_file_range source fd */ + off64_t off_out; /* For copy_file_range destination fd only */ int mask; size_t size; mode_t mode; @@ -323,6 +333,11 @@ int32_t default_namelink(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); +int32_t +default_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off64_t off_in, fd_t *fd_out, off64_t off_out, + size_t len, uint32_t flags, dict_t *xdata); + /* Resume */ int32_t default_getspec_resume(call_frame_t *frame, xlator_t *this, const char *key, @@ -542,6 +557,11 @@ default_put_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, int32_t count, off_t off, struct iobref *iobref, dict_t *xattr, dict_t *xdata); +int32_t +default_copy_file_range_resume(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off_t off64_in, fd_t *fd_out, off64_t off_out, + size_t len, uint32_t flags, dict_t *xdata); + /* _cbk_resume */ int32_t @@ -813,6 +833,13 @@ int32_t default_namelink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); +int32_t +default_copy_file_range_cbk_resume(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata); + /* _CBK */ int32_t default_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, @@ -1072,6 +1099,12 @@ default_namelink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, struct iatt *postbuf, dict_t *xdata); int32_t +default_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *stbuf, struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata); + +int32_t default_lookup_failure_cbk(call_frame_t *frame, int32_t op_errno); int32_t @@ -1231,6 +1264,9 @@ int32_t default_namelink_failure_cbk(call_frame_t *frame, int32_t op_errno); int32_t +default_copy_file_range_failure_cbk(call_frame_t *frame, int32_t op_errno); + +int32_t default_mem_acct_init(xlator_t *this); void diff --git a/libglusterfs/src/glusterfs/syncop.h b/libglusterfs/src/glusterfs/syncop.h index 203abe92b57..7a6167b0488 100644 --- a/libglusterfs/src/glusterfs/syncop.h +++ b/libglusterfs/src/glusterfs/syncop.h @@ -138,8 +138,19 @@ typedef struct syncbarrier syncbarrier_t; struct syncargs { int op_ret; int op_errno; + + /* + * The below 3 iatt structures are used in the fops + * whose callbacks get struct iatt as one of the + * a return arguments. Currently, the maximum number + * of iatt structures returned is 3 for some fops + * such as mknod, copy_file_range, mkdir etc. So + * all the following 3 iatt structures would be used + * for those fops. + */ struct iatt iatt1; struct iatt iatt2; + struct iatt iatt3; dict_t *xattr; struct statvfs statvfs_buf; struct iovec *vector; @@ -634,4 +645,17 @@ syncop_entrylk(xlator_t *subvol, const char *volume, loc_t *loc, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata_in, dict_t **xdata_out); +int +syncop_copy_file_range(xlator_t *subvol, fd_t *fd_in, off64_t off_in, + fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, struct iatt *stbuf, + struct iatt *preiatt_dst, struct iatt *postiatt_dst, + dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, struct iatt *postbuf_dst, + dict_t *xdata); + #endif /* _SYNCOP_H */ diff --git a/libglusterfs/src/glusterfs/syscall.h b/libglusterfs/src/glusterfs/syscall.h index faaf694b22c..6b33c141a5e 100644 --- a/libglusterfs/src/glusterfs/syscall.h +++ b/libglusterfs/src/glusterfs/syscall.h @@ -17,6 +17,7 @@ #include <sys/stat.h> #include <sys/time.h> #include <sys/socket.h> +#include <stdio.h> /* GF follows the Linux XATTR definition, which differs in Darwin. */ #define GF_XATTR_CREATE 0x1 /* set value, fail if attr already exists */ @@ -228,4 +229,32 @@ sys_socket(int domain, int type, int protocol); int sys_accept(int sock, struct sockaddr *sockaddr, socklen_t *socklen, int flags); +#ifdef GF_BSD_HOST_OS +#ifndef _OFF64_T_DECLARED +/* + * Including <stdio.h> (done above) should actually define + * _OFF64_T_DECLARED with off64_t data type being available + * for consumption. But, off64_t data type is not recognizable + * for FreeBSD versions less than 11. Hence, int64_t is typedefed + * to off64_t. + */ +#define _OFF64_T_DECLARED +typedef int64_t off64_t; +#endif /* _OFF64_T_DECLARED */ +#endif /* GF_BSD_HOST_OS */ + +/* + * According to the man page of copy_file_range, both off_in and off_out are + * pointers to the data type loff_t (i.e. loff_t *). But, freebsd does not + * have (and recognize) loff_t. Since loff_t is 64 bits, use off64_t + * instead. Since it's a pointer type it should be okay. It just needs + * to be a pointer-to-64-bit pointer for both 32- and 64-bit platforms. + * off64_t is recognized by freebsd. + * TODO: In future, when freebsd can recognize loff_t, probably revisit this + * and change the off_in and off_out to (loff_t *). + */ +ssize_t +sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out, + size_t len, unsigned int flags); + #endif /* __SYSCALL_H__ */ diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h index 4137d12eb27..12d507bc021 100644 --- a/libglusterfs/src/glusterfs/xlator.h +++ b/libglusterfs/src/glusterfs/xlator.h @@ -23,6 +23,7 @@ #include "glusterfs/list.h" #include "glusterfs/latency.h" #include "glusterfs/compat-uuid.h" +#include "glusterfs/syscall.h" #define FIRST_CHILD(xl) (xl->children->xlator) #define SECOND_CHILD(xl) (xl->children->next->xlator) @@ -354,6 +355,11 @@ typedef int32_t (*fop_namelink_cbk_t)(call_frame_t *frame, void *cookie, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata); +typedef int32_t (*fop_copy_file_range_cbk_t)( + call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *stbuf, struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata); + typedef int32_t (*fop_lookup_t)(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); @@ -544,6 +550,11 @@ typedef int32_t (*fop_icreate_t)(call_frame_t *frame, xlator_t *this, typedef int32_t (*fop_namelink_t)(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); +typedef int32_t (*fop_copy_file_range_t)(call_frame_t *frame, xlator_t *this, + fd_t *fd_in, off64_t off_in, + fd_t *fd_out, off64_t off_out, + size_t len, uint32_t flags, + dict_t *xdata); /* WARNING: make sure the list is in order with FOP definition in `rpc/xdr/src/glusterfs-fops.x`. @@ -609,6 +620,7 @@ struct xlator_fops { fop_put_t put; fop_icreate_t icreate; fop_namelink_t namelink; + fop_copy_file_range_t copy_file_range; /* these entries are used for a typechecking hack in STACK_WIND _only_ */ /* make sure to add _cbk variables only after defining regular fops as @@ -673,6 +685,7 @@ struct xlator_fops { fop_put_cbk_t put_cbk; fop_icreate_cbk_t icreate_cbk; fop_namelink_cbk_t namelink_cbk; + fop_copy_file_range_cbk_t copy_file_range_cbk; }; typedef int32_t (*cbk_forget_t)(xlator_t *this, inode_t *inode); diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym index baf44de64ad..6ca6a639456 100644 --- a/libglusterfs/src/libglusterfs.sym +++ b/libglusterfs/src/libglusterfs.sym @@ -92,6 +92,8 @@ args_xattrop_cbk_store args_xattrop_store args_zerofill_cbk_store args_zerofill_store +args_copy_file_range_cbk_store +args_copy_file_range_store bin_to_data call_resume call_resume_keep_stub @@ -351,6 +353,10 @@ default_put default_put_cbk default_put_failure_cbk default_put_resume +default_copy_file_range +default_copy_file_range_cbk +default_copy_file_range_failure_cbk +default_copy_file_range_resume __dentry_grep dht_is_linkfile dict_add @@ -471,6 +477,8 @@ fd_unref _fini fop_access_stub fop_create_stub +fop_copy_file_range_stub +fop_copy_file_range_cbk_stub fop_discard_stub fop_entrylk_stub fop_enum_to_pri_string @@ -933,6 +941,7 @@ synclock_unlock syncop_access syncop_close syncop_create +syncop_copy_file_range syncopctx_getctx syncopctx_setfsgid syncopctx_setfsgroups @@ -1006,6 +1015,7 @@ sys_chmod sys_chown sys_close sys_closedir +sys_copy_file_range sys_creat sys_fallocate sys_fchmod diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c index b70953725ce..bf70daf95c3 100644 --- a/libglusterfs/src/syncop.c +++ b/libglusterfs/src/syncop.c @@ -3397,4 +3397,65 @@ syncop_namelink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, __wake(args); return 0; -}
\ No newline at end of file +} + +int +syncop_copy_file_range(xlator_t *subvol, fd_t *fd_in, off64_t off_in, + fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, struct iatt *stbuf, + struct iatt *preiatt_dst, struct iatt *postiatt_dst, + dict_t *xdata_in, dict_t **xdata_out) +{ + struct syncargs args = { + 0, + }; + + SYNCOP(subvol, (&args), syncop_copy_file_range_cbk, + subvol->fops->copy_file_range, fd_in, off_in, fd_out, off_out, len, + flags, xdata_in); + + if (stbuf) { + *stbuf = args.iatt1; + } + if (preiatt_dst) { + *preiatt_dst = args.iatt2; + } + if (postiatt_dst) { + *postiatt_dst = args.iatt3; + } + + if (xdata_out) { + *xdata_out = args.xdata; + } else if (args.xdata) { + dict_unref(args.xdata); + } + + errno = args.op_errno; + return args.op_ret; +} + +int +syncop_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, struct iatt *postbuf_dst, + dict_t *xdata) +{ + struct syncargs *args = NULL; + + args = cookie; + + args->op_ret = op_ret; + args->op_errno = op_errno; + if (xdata) + args->xdata = dict_ref(xdata); + + if (op_ret >= 0) { + args->iatt1 = *stbuf; + args->iatt2 = *prebuf_dst; + args->iatt3 = *postbuf_dst; + } + + __wake(args); + + return 0; +} diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c index c72a8e16b34..1d88c8adac1 100644 --- a/libglusterfs/src/syscall.c +++ b/libglusterfs/src/syscall.c @@ -8,8 +8,8 @@ cases as published by the Free Software Foundation. */ -#include "glusterfs/syscall.h" #include "glusterfs/compat.h" +#include "glusterfs/syscall.h" #include "glusterfs/mem-pool.h" #include "glusterfs/libglusterfs-messages.h" @@ -19,6 +19,9 @@ #include <fcntl.h> #include <unistd.h> #include <stdarg.h> +#ifdef HAVE_COPY_FILE_RANGE_SYS +#include <sys/syscall.h> +#endif #define FS_ERROR_LOG(result) \ do { \ @@ -802,3 +805,30 @@ err: #endif return newsock; } + +ssize_t +sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out, + size_t len, unsigned int flags) +{ + /* + * TODO: Add check for other platofrms like freebsd etc if this syscall is + * not generic. + * This is what the function does. + * 1) Check whether copy_file_range API is present. If so call it. + * 2) If copy_file_range API is not present, then check whether + * the system call is there. If so, then use syscall to invoke + * SYS_copy_file_range system call. + * 3) If neither of the above is present, then return ENOSYS. + */ +#ifdef HAVE_COPY_FILE_RANGE + return FS_RET_CHECK( + copy_file_range(fd_in, off_in, fd_out, off_out, len, flags), errno); +#else +#ifdef HAVE_COPY_FILE_RANGE_SYS + return syscall(SYS_copy_file_range, fd_in, off_in, fd_out, off_out, len, + flags); +#else + return -ENOSYS; +#endif /* HAVE_COPY_FILE_RANGE_SYS */ +#endif /* HAVE_COPY_FILE_RANGE */ +} diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c index 1b7c9d46f88..b50848b3476 100644 --- a/libglusterfs/src/xlator.c +++ b/libglusterfs/src/xlator.c @@ -143,6 +143,7 @@ fill_defaults(xlator_t *xl) SET_DEFAULT_FOP(getspec); SET_DEFAULT_FOP(icreate); SET_DEFAULT_FOP(namelink); + SET_DEFAULT_FOP(copy_file_range); if (!xl->cbks) xl->cbks = &default_cbks; diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 4950857ae9e..779878f52be 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -68,6 +68,7 @@ enum gf_fop_procnum { GFS3_OP_ICREATE, GFS3_OP_NAMELINK, GFS3_OP_PUT, + GFS3_OP_COPY_FILE_RANGE, GFS3_OP_MAXVALUE, }; diff --git a/rpc/xdr/src/glusterfs-fops.x b/rpc/xdr/src/glusterfs-fops.x index ffb71d6418a..bacf07735f4 100644 --- a/rpc/xdr/src/glusterfs-fops.x +++ b/rpc/xdr/src/glusterfs-fops.x @@ -77,6 +77,7 @@ enum glusterfs_fop_t { GF_FOP_PUT, GF_FOP_ICREATE, GF_FOP_NAMELINK, + GF_FOP_COPY_FILE_RANGE, GF_FOP_MAXVALUE }; diff --git a/rpc/xdr/src/glusterfs4-xdr.x b/rpc/xdr/src/glusterfs4-xdr.x index c183dbcd704..dcea17fac68 100644 --- a/rpc/xdr/src/glusterfs4-xdr.x +++ b/rpc/xdr/src/glusterfs4-xdr.x @@ -628,6 +628,19 @@ struct gfx_seek_rsp { struct gfx_setvolume_req { gfx_dict dict; } ; + + struct gfx_copy_file_range_req { + opaque gfid1[16]; + opaque gfid2[16]; + quad_t fd_in; + quad_t fd_out; + u_quad_t off_in; + u_quad_t off_out; + unsigned int size; + unsigned int flag; + gfx_dict xdata; /* Extra data */ +}; + struct gfx_setvolume_rsp { int op_ret; int op_errno; diff --git a/rpc/xdr/src/libgfxdr.sym b/rpc/xdr/src/libgfxdr.sym index bd9131be7c6..22cdf30bfda 100644 --- a/rpc/xdr/src/libgfxdr.sym +++ b/rpc/xdr/src/libgfxdr.sym @@ -344,3 +344,4 @@ xdr_compound_req_v2 xdr_gfx_compound_req xdr_compound_rsp_v2 xdr_gfx_compound_rsp +xdr_gfx_copy_file_range_req
\ No newline at end of file diff --git a/tests/basic/gfapi/gfapi-copy-file-range.t b/tests/basic/gfapi/gfapi-copy-file-range.t new file mode 100644 index 00000000000..c24c1433edf --- /dev/null +++ b/tests/basic/gfapi/gfapi-copy-file-range.t @@ -0,0 +1,80 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +TEST glusterd + +# for now, a xfs filesystem with reflink support is created. +# In future, better to make changes in MKFS_LOOP so that, +# once can create a xfs filesystem with reflink enabled in +# generic and simple way, instead of doing below steps each +# time. +TEST truncate -s 2G $B0/xfs_image +mkfs.xfs 2>&1 | grep reflink +if [ $? -eq 0 ]; then + mkfs.xfs -f -i size=512 -m reflink=1 $B0/xfs_image; +else + mkfs.xfs -f -i size=512 $B0/xfs_image; +fi + +TEST mkdir $B0/bricks +TEST mount -t xfs -o loop $B0/xfs_image $B0/bricks + +# Just a single brick volume. More test cases need to be +# added in future for distribute, replicate, +# distributed replicate and distributed replicated sharded +# volumes. +TEST $CLI volume create $V0 $H0:$B0/bricks/brick1; +EXPECT 'Created' volinfo_field $V0 'Status'; + +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 + +TEST dd if=/dev/urandom of=$M0/file bs=1M count=555; + +# check for the existence of the created file +TEST stat $M0/file; + +# grab the size of the file +SRC_SIZE=$(stat -c %s $M0/file); + +logdir=`gluster --print-logdir` + +# TODO: +# For now, do not call copy-file-range utility. This is because, +# the regression machines are centos-7 based which does not have +# copy_file_range API available. So, instead of this testcase +# causing regression failures, for now, this is just a dummy test +# case. Uncomment the below tests (until volume stop) when there +# is support for copy_file_range in the regression machines. +# + +TEST build_tester $(dirname $0)/glfs-copy-file-range.c -lgfapi + +TEST ./$(dirname $0)/glfs-copy-file-range $H0 $V0 $logdir/gfapi-copy-file-range.log /file /new + +# check whether the destination file is created or not +TEST stat $M0/new + +# check the size of the destination file +DST_SIZE=$(stat -c %s $M0/new); + +# The sizes of the source and destination should be same. +# Atleast it ensures that, copy_file_range API is working +# as expected. Whether the actual cloning happened via reflink +# or a read/write happened is different matter. +TEST [ $SRC_SIZE == $DST_SIZE ]; + +cleanup_tester $(dirname $0)/glfs-copy-file-range + +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0 + +UMOUNT_LOOP $B0/bricks; + +cleanup; diff --git a/tests/basic/gfapi/glfs-copy-file-range.c b/tests/basic/gfapi/glfs-copy-file-range.c new file mode 100644 index 00000000000..756c38d21ec --- /dev/null +++ b/tests/basic/gfapi/glfs-copy-file-range.c @@ -0,0 +1,177 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <glusterfs/api/glfs.h> +#include <glusterfs/api/glfs-handles.h> +#include <string.h> +#include <time.h> +#include <libgen.h> + +static void +cleanup(glfs_t *fs) +{ + if (!fs) + return; +#if 0 + /* glfs fini path is still racy and crashing the program. Since + * this program any way has to die, we are not going to call fini + * in the released versions. i.e. final builds. For all + * internal testing lets enable this so that glfs_fini code + * path becomes stable. */ + glfs_fini (fs); +#endif +} + +int +main(int argc, char **argv) +{ + glfs_t *fs = NULL; + int ret = -1; + char *volname = NULL; + char *logfilepath = NULL; + char *path_src = NULL; + char *path_dst = NULL; + glfs_fd_t *glfd_in = NULL; + glfs_fd_t *glfd_out = NULL; + char *volfile_server = NULL; + + struct stat stbuf = { + 0, + }; + struct stat prestat_dst = { + 0, + }; + struct stat poststat_dst = { + 0, + }; + size_t len; + + if (argc < 6) { + printf("%s <volume> <log file path> <source> <destination>", argv[0]); + ret = -1; + goto out; + } + + volfile_server = argv[1]; + volname = argv[2]; + logfilepath = argv[3]; + path_src = argv[4]; + path_dst = argv[5]; + + if (path_src[0] != '/') { + fprintf(stderr, "source path %s is not absolute", path_src); + errno = EINVAL; + goto out; + } + + if (path_dst[0] != '/') { + fprintf(stderr, "destination path %s is not absolute", path_dst); + errno = EINVAL; + goto out; + } + + fs = glfs_new(volname); + if (!fs) { + ret = -errno; + fprintf(stderr, "Not able to initialize volume '%s'", volname); + goto out; + } + + ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 24007); + if (ret < 0) { + ret = -errno; + fprintf(stderr, + "Failed to set the volfile server, " + "%s", + strerror(errno)); + goto out; + } + + ret = glfs_set_logging(fs, logfilepath, 7); + if (ret < 0) { + ret = -errno; + fprintf(stderr, + "Failed to set the log file path, " + "%s", + strerror(errno)); + goto out; + } + + ret = glfs_init(fs); + if (ret < 0) { + ret = -errno; + if (errno == ENOENT) { + fprintf(stderr, "Volume %s does not exist", volname); + } else { + fprintf(stderr, + "%s: Not able to fetch " + "volfile from glusterd", + volname); + } + goto out; + } + + glfd_in = glfs_open(fs, path_src, O_RDONLY | O_NONBLOCK); + if (!glfd_in) { + ret = -errno; + goto out; + } else { + printf("OPEN_SRC: opening %s is success\n", path_src); + } + + glfd_out = glfs_creat(fs, path_dst, O_RDWR, 0644); + if (!glfd_out) { + fprintf(stderr, + "FAILED_DST_OPEN: failed to " + "open (create) %s (%s)\n", + path_dst, strerror(errno)); + ret = -errno; + goto out; + } else { + printf("OPEN_DST: opening %s is success\n", path_dst); + } + + ret = glfs_fstat(glfd_in, &stbuf); + if (ret < 0) { + ret = -errno; + goto out; + } else { + printf("FSTAT_SRC: fstat on %s is success\n", path_dst); + } + + len = stbuf.st_size; + + do { + ret = glfs_copy_file_range(glfd_in, NULL, glfd_out, NULL, len, 0, + &stbuf, &prestat_dst, &poststat_dst); + if (ret == -1) { + fprintf(stderr, "copy_file_range failed with %s\n", + strerror(errno)); + ret = -errno; + break; + } else { + printf("copy_file_range successful\n"); + len -= ret; + } + } while (len > 0); + +out: + if (glfd_in) + glfs_close(glfd_in); + if (glfd_out) + glfs_close(glfd_out); + + cleanup(fs); + + return ret; +} diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index 7bf0d8a8f00..f40b00bba2d 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -2119,6 +2119,19 @@ io_stats_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } int +io_stats_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *stbuf, struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata) +{ + UPDATE_PROFILE_STATS(frame, COPY_FILE_RANGE); + + STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, stbuf, + prebuf_dst, postbuf_dst, xdata); + return 0; +} + +int io_stats_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata) @@ -2873,6 +2886,19 @@ io_stats_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, } int +io_stats_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off_t off_in, fd_t *fd_out, off_t off_out, size_t len, + uint32_t flags, dict_t *xdata) +{ + START_FOP_LATENCY(frame); + + STACK_WIND(frame, io_stats_copy_file_range_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->copy_file_range, fd_in, off_in, fd_out, + off_out, len, flags, xdata); + return 0; +} + +int io_stats_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { START_FOP_LATENCY(frame); @@ -4189,6 +4215,7 @@ struct xlator_fops fops = { .getactivelk = io_stats_getactivelk, .setactivelk = io_stats_setactivelk, .compound = io_stats_compound, + .copy_file_range = io_stats_copy_file_range, }; struct xlator_cbks cbks = { diff --git a/xlators/features/changelog/lib/src/Makefile.am b/xlators/features/changelog/lib/src/Makefile.am index c4b9a3df692..c933ec53ed2 100644 --- a/xlators/features/changelog/lib/src/Makefile.am +++ b/xlators/features/changelog/lib/src/Makefile.am @@ -1,7 +1,7 @@ libgfchangelog_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \ -DDATADIR=\"$(localstatedir)\" -libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -fpic \ +libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 -fpic \ -I../../../src/ -I$(top_srcdir)/libglusterfs/src \ -I$(top_srcdir)/xlators/features/changelog/src \ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py index ab56dc9a4b3..8730a51d13e 100755 --- a/xlators/features/utime/src/utime-gen-fops-c.py +++ b/xlators/features/utime/src/utime-gen-fops-c.py @@ -62,6 +62,20 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, } """ +FOPS_COPY_FILE_RANGE_TEMPLATE = """ +int32_t +gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + gl_timespec_get(&frame->root->ctime); + + (void) utime_update_attribute_flags(frame, this, GF_FOP_COPY_FILE_RANGE); + STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); + return 0; +} +""" + FOPS_SETATTR_TEMPLATE = """ int32_t gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, @@ -94,6 +108,7 @@ utime_ops = ['fallocate', 'zerofill', 'opendir', 'mknod', 'mkdir', utime_read_op = ['readv'] utime_write_op = ['writev'] utime_setattr_ops = ['setattr', 'fsetattr'] +utime_copy_file_range_ops = ['copy_file_range'] def gen_defaults(): for name in ops: @@ -109,6 +124,9 @@ def gen_defaults(): if name in utime_setattr_ops: print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs)) print(generate(FOPS_SETATTR_TEMPLATE, name, fop_subs)) + if name in utime_copy_file_range_ops: + print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs)) + print(generate(FOPS_COPY_FILE_RANGE_TEMPLATE, name, fop_subs)) for l in open(sys.argv[1], 'r').readlines(): if l.find('#pragma generate') != -1: diff --git a/xlators/features/utime/src/utime-gen-fops-h.py b/xlators/features/utime/src/utime-gen-fops-h.py index 3686f2e3c1e..e96274c229a 100755 --- a/xlators/features/utime/src/utime-gen-fops-h.py +++ b/xlators/features/utime/src/utime-gen-fops-h.py @@ -18,7 +18,7 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, utime_ops = ['fallocate', 'zerofill', 'opendir', 'mknod', 'mkdir', 'unlink', 'rmdir', 'symlink', 'rename', 'link', 'truncate', 'ftruncate', 'create', 'open', 'removexattr', 'fremovexattr', - 'readv', 'writev', 'setattr', 'fsetattr'] + 'readv', 'writev', 'setattr', 'fsetattr', 'copy_file_range'] def gen_defaults(): for name, value in ops.items(): diff --git a/xlators/features/utime/src/utime-helpers.c b/xlators/features/utime/src/utime-helpers.c index c79e12badfa..79cc0145f50 100644 --- a/xlators/features/utime/src/utime-helpers.c +++ b/xlators/features/utime/src/utime-helpers.c @@ -93,6 +93,15 @@ utime_update_attribute_flags(call_frame_t *frame, xlator_t *this, frame->root->flags |= MDATA_CTIME; break; + case GF_FOP_COPY_FILE_RANGE: + /* Below 2 are for destination fd */ + frame->root->flags |= MDATA_CTIME; + frame->root->flags |= MDATA_MTIME; + /* Below flag is for the source fd */ + if (!utime_priv->noatime) { + frame->root->flags |= MDATA_ATIME; + } + break; default: frame->root->flags = 0; } diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 3b2622b431f..3f4e19c211e 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -2993,6 +2993,116 @@ fuse_write(xlator_t *this, fuse_in_header_t *finh, void *msg, return; } +#if FUSE_KERNEL_MINOR_VERSION >= 28 +static int +fuse_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, struct iatt *postbuf_dst, + dict_t *xdata) +{ + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + /* + * Fuse kernel module uses fuse_write_out itself as the + * output collector. In fact, fuse_kernel.h in the upstream + * kernel just defines the input structure fuse_copy_file_range_in + * for the fop. So, just use the fuse_write_out to send the + * response back to the kernel. + */ + struct fuse_write_out fcfro = { + 0, + }; + + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + + state = frame->root->state; + finh = state->finh; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); + + if (op_ret >= 0) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": WRITE => %d/%" GF_PRI_SIZET ",%" PRIu64 + " , %" PRIu64 " ,%" PRIu64 ",%" PRIu64, + frame->root->unique, op_ret, state->size, state->off_in, + state->off_out, stbuf->ia_size, postbuf_dst->ia_size); + + fcfro.size = op_ret; + send_fuse_obj(this, finh, &fcfro); + } else { + if (state->fd && state->fd->inode) + uuid_utoa_r(state->fd->inode->gfid, src_gfid); + else + snprintf(src_gfid, sizeof(src_gfid), "nil"); + + if (state->fd_dst && state->fd_dst->inode) + uuid_utoa_r(state->fd_dst->inode->gfid, dst_gfid); + else + snprintf(dst_gfid, sizeof(dst_gfid), "nil"); + + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 + ": COPY_FILE_RANGE => -1 gfid_in=%s fd_in=%p " + "gfid_out=%s fd_out=%p (%s)", + frame->root->unique, src_gfid, state->fd, dst_gfid, + state->fd_dst, strerror(op_errno)); + + send_fuse_err(this, finh, op_errno); + } + + free_fuse_state(state); + STACK_DESTROY(frame->root); + + return 0; +} + +void +fuse_copy_file_range_resume(fuse_state_t *state) +{ + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 + ": COPY_FILE_RANGE " + "(input fd: %p (gfid: %s), " + "output fd: %p (gfid: %s) size=%zu, " + "offset_in=%" PRIu64 ", offset_out=%" PRIu64 ")", + state->finh->unique, state->fd, uuid_utoa(state->fd->inode->gfid), + state->fd_dst, uuid_utoa(state->fd_dst->inode->gfid), state->size, + state->off_in, state->off_out); + + FUSE_FOP(state, fuse_copy_file_range_cbk, GF_FOP_COPY_FILE_RANGE, + copy_file_range, state->fd, state->off_in, state->fd_dst, + state->off_out, state->size, state->io_flags, state->xdata); +} + +static void +fuse_copy_file_range(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + struct fuse_copy_file_range_in *fcfri = msg; + fuse_state_t *state = NULL; + fd_t *fd_in = NULL; + fd_t *fd_out = NULL; + + GET_STATE(this, finh, state); + + fd_in = FH_TO_FD(fcfri->fh_in); + fd_out = FH_TO_FD(fcfri->fh_out); + state->fd = fd_in; + state->fd_dst = fd_out; + + fuse_resolve_fd_init(state, &state->resolve, fd_in); + fuse_resolve_fd_init(state, &state->resolve2, fd_out); + + state->size = fcfri->len; + state->off_in = fcfri->off_in; + state->off_out = fcfri->off_out; + state->io_flags = fcfri->flags; + + fuse_resolve_and_resume(state, fuse_copy_file_range_resume); +} +#endif /* FUSE_KERNEL_MINOR_VERSION >= 28 */ + #if FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE static int fuse_lseek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, @@ -6087,6 +6197,10 @@ static fuse_handler_t *fuse_std_ops[FUSE_OP_HIGH] = { #if FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE [FUSE_LSEEK] = fuse_lseek, #endif + +#if FUSE_KERNEL_MINOR_VERSION >= 28 + [FUSE_COPY_FILE_RANGE] = fuse_copy_file_range, +#endif }; static fuse_handler_t *fuse_dump_ops[FUSE_OP_HIGH]; diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h index 57380786f17..60702ab1da5 100644 --- a/xlators/mount/fuse/src/fuse-bridge.h +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -41,8 +41,32 @@ #include <glusterfs/gidcache.h> #if defined(GF_LINUX_HOST_OS) || defined(__FreeBSD__) || defined(__NetBSD__) + +/* + * TODO: + * So, with the addition of copy_file_range support, it might + * require a bump up of fuse kernel minor version (like it was + * done when support for lseek fop was added. But, as of now, + * the copy_file_range support has just landed in upstream + * kernel fuse module. So, until, there is a release of that + * fuse as part of a kernel, the FUSE_KERNEL_MINOR_VERSION + * from fuse_kernel.h in the contrib might not be changed. + * If so, then the highest op available should be based on + * the current minor version (which is 24). So, selectively + * determine. When, the minor version is changed to 28 in + * fuse_kernel.h from contrib (because in upstream linux + * kernel source tree, the kernel minor version which + * contains support for copy_file_range is 28), then remove + * the reference to FUSE_LSEEK below and just determine + * FUSE_OP_HIGH based on copy_file_range. + */ +#if FUSE_KERNEL_MINOR_VERSION >= 28 +#define FUSE_OP_HIGH (FUSE_COPY_FILE_RANGE + 1) +#else #define FUSE_OP_HIGH (FUSE_LSEEK + 1) #endif + +#endif #ifdef GF_DARWIN_HOST_OS #define FUSE_OP_HIGH (FUSE_DESTROY + 1) #endif @@ -400,10 +424,22 @@ typedef struct { loc_t loc2; fuse_in_header_t *finh; int32_t flags; + off_t off; + /* + * The man page of copy_file_range tells that the offset + * arguments are of type loff_t *. Here in fuse state, the values of + * those offsets are saved instead of pointers as the kernel sends + * the values of the offsets from those pointers instead of pointers. + * But the type loff_t is linux specific and is actually a typedef of + * off64_t. Hence using off64_t + */ + off64_t off_in; /* for copy_file_range source fd */ + off64_t off_out; /* for copy_file_range destination fd */ size_t size; unsigned long nlookup; fd_t *fd; + fd_t *fd_dst; /* for copy_file_range destination */ dict_t *xattr; dict_t *xdata; char *name; diff --git a/xlators/protocol/client/src/client-common.c b/xlators/protocol/client/src/client-common.c index 7708c820918..64db98d661b 100644 --- a/xlators/protocol/client/src/client-common.c +++ b/xlators/protocol/client/src/client-common.c @@ -2556,6 +2556,38 @@ out: } int +client_pre_copy_file_range_v2(xlator_t *this, gfx_copy_file_range_req *req, + fd_t *fd_in, off64_t off_in, fd_t *fd_out, + off64_t off_out, size_t size, int32_t flags, + dict_t **xdata) +{ + int64_t remote_fd_in = -1; + int64_t remote_fd_out = -1; + int op_errno = ESTALE; + + CLIENT_GET_REMOTE_FD(this, fd_in, FALLBACK_TO_ANON_FD, remote_fd_in, + op_errno, out); + + CLIENT_GET_REMOTE_FD(this, fd_out, FALLBACK_TO_ANON_FD, remote_fd_out, + op_errno, out); + req->size = size; + req->off_in = off_in; + req->off_out = off_out; + req->fd_in = remote_fd_in; + req->fd_out = remote_fd_out; + req->flag = flags; + + memcpy(req->gfid1, fd_in->inode->gfid, 16); + memcpy(req->gfid2, fd_out->inode->gfid, 16); + + dict_to_xdr(*xdata, &req->xdata); + + return 0; +out: + return -op_errno; +} + +int client_pre_statfs_v2(xlator_t *this, gfx_statfs_req *req, loc_t *loc, dict_t *xdata) { diff --git a/xlators/protocol/client/src/client-common.h b/xlators/protocol/client/src/client-common.h index 5214eae128e..a2043d8742a 100644 --- a/xlators/protocol/client/src/client-common.h +++ b/xlators/protocol/client/src/client-common.h @@ -621,4 +621,10 @@ client_post_rename_v2(xlator_t *this, gfx_rename_rsp *rsp, struct iatt *stbuf, struct iatt *prenewparent, struct iatt *postnewparent, dict_t **xdata); +int +client_pre_copy_file_range_v2(xlator_t *this, gfx_copy_file_range_req *req, + fd_t *fd_in, off64_t off_in, fd_t *fd_out, + off64_t off_out, size_t size, int32_t flags, + dict_t **xdata); + #endif /* __CLIENT_COMMON_H__ */ diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c index 849fdfca0bc..55e87b3c370 100644 --- a/xlators/protocol/client/src/client-helpers.c +++ b/xlators/protocol/client/src/client-helpers.c @@ -2459,6 +2459,20 @@ client_handle_fop_requirements_v2( lease, this, &this_req->compound_req_v2_u.compound_lease_req, op_errno, out, &args->loc, &args->lease, args->xdata); break; + case GF_FOP_COPY_FILE_RANGE: + /* + * Not going to handle the copy_file_range fop in compound + * operation. This is because, compound operation is going + * to be removed. In fact, AFR one of the heavy consumer of + * compound operations has stopped using that. + * https://github.com/gluster/glusterfs/issues/414 + * Therefore, sending ENOTSUP error for this fop coming as + * comound request. Though, there was no need of handling + * "case GF_FOP_COPY_FILE_RANGE" technically, this comment + * under the label of GF_FOP_COPY_FILE_RANGE will help in + * understanding that this fop does not handle the compund + * request and why. + */ default: return ENOTSUP; } @@ -2631,6 +2645,14 @@ compound_request_cleanup_v2(gfx_compound_req *req) case GF_FOP_SEEK: CLIENT4_COMPOUND_FOP_CLEANUP(curr_req, seek); break; + case GF_FOP_COPY_FILE_RANGE: + /* + * This fop is not handled in compund operations. + * Check the comment added under this fop's section + * in the compound_request_cleanup_v2. Therefore + * keeping this label only as a placeholder with + * a message that, this fop is not handled. + */ default: break; } @@ -3004,6 +3026,12 @@ client_process_response_v2(call_frame_t *frame, xlator_t *this, &this_args_cbk->lease, xdata); break; } + case GF_FOP_COPY_FILE_RANGE: + /* + * Not handling this fop. Returning ENOTSUP. Check + * the comment added for this fop in the function + * client_handle_fop_requirements_v2. + */ default: return -ENOTSUP; } diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c index ca180c1db4b..8f3ee41e5c5 100644 --- a/xlators/protocol/client/src/client-rpc-fops_v2.c +++ b/xlators/protocol/client/src/client-rpc-fops_v2.c @@ -2833,6 +2833,72 @@ out: return 0; } +int +client4_0_copy_file_range_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + gfx_common_3iatt_rsp rsp = { + 0, + }; + call_frame_t *frame = NULL; + struct iatt stbuf = { + 0, + }; + struct iatt prestat = { + 0, + }; + struct iatt poststat = { + 0, + }; + int ret = 0; + xlator_t *this = NULL; + dict_t *xdata = NULL; + clnt_local_t *local = NULL; + + this = THIS; + + frame = myframe; + local = frame->local; + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gfx_common_3iatt_rsp); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, PC_MSG_XDR_DECODING_FAILED, + "XDR decoding failed"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + ret = client_post_common_3iatt(this, &rsp, &stbuf, &prestat, &poststat, + &xdata); + if (ret < 0) + goto out; +out: + if (rsp.op_ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, gf_error_to_errno(rsp.op_errno), + PC_MSG_REMOTE_OP_FAILED, "remote operation failed"); + } else if (rsp.op_ret >= 0) { + if (local->attempt_reopen) + client_attempt_reopen(local->fd, this); + if (local->attempt_reopen_out) + client_attempt_reopen(local->fd_out, this); + } + CLIENT_STACK_UNWIND(copy_file_range, frame, rsp.op_ret, + gf_error_to_errno(rsp.op_errno), &stbuf, &prestat, + &poststat, xdata); + + if (xdata) + dict_unref(xdata); + + return 0; +} + int32_t client4_0_releasedir(call_frame_t *frame, xlator_t *this, void *data) { @@ -5846,6 +5912,80 @@ unwind: } int32_t +client4_0_copy_file_range(call_frame_t *frame, xlator_t *this, void *data) +{ + clnt_args_t *args = NULL; + clnt_conf_t *conf = NULL; + clnt_local_t *local = NULL; + gfx_copy_file_range_req req = { + { + 0, + }, + }; + int op_errno = ESTALE; + int ret = 0; + + if (!frame || !this || !data) + goto unwind; + + args = data; + conf = this->private; + + ret = client_pre_copy_file_range_v2(this, &req, args->fd, args->off_in, + args->fd_out, args->off_out, args->size, + args->flags, &args->xdata); + + if (ret) { + op_errno = -ret; + goto unwind; + } + + ret = client_fd_fop_prepare_local(frame, args->fd, req.fd_in); + if (ret) { + op_errno = -ret; + goto unwind; + } + + /* + * Since frame->local is allocated in above function call + * itself, better to use it (with the assumption that it + * has been allocated) directly instead of again calling + * client_fd_fop_prepare_local or modifying it, as doing + * so requires changes in other places as well. + */ + + local = frame->local; + local->fd_out = fd_ref(args->fd_out); + local->attempt_reopen_out = client_is_reopen_needed(args->fd_out, this, + req.fd_out); + + ret = client_submit_request( + this, &req, frame, conf->fops, GFS3_OP_COPY_FILE_RANGE, + client4_0_copy_file_range_cbk, NULL, NULL, 0, NULL, 0, NULL, + (xdrproc_t)xdr_gfx_copy_file_range_req); + if (ret) { + /* + * If the lower layers fail to submit a request, they'll also + * do the unwind for us (see rpc_clnt_submit), so don't unwind + * here in such cases. + */ + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); + } + + GF_FREE(req.xdata.pairs.pairs_val); + + return 0; + +unwind: + CLIENT_STACK_UNWIND(copy_file_range, frame, -1, op_errno, NULL, NULL, NULL, + NULL); + GF_FREE(req.xdata.pairs.pairs_val); + + return 0; +} + +int32_t client4_0_fsetattr(call_frame_t *frame, xlator_t *this, void *data) { clnt_args_t *args = NULL; @@ -6257,6 +6397,7 @@ rpc_clnt_procedure_t clnt4_0_fop_actors[GF_FOP_MAXVALUE] = { [GF_FOP_COMPOUND] = {"COMPOUND", client4_0_compound}, [GF_FOP_ICREATE] = {"ICREATE", client4_0_icreate}, [GF_FOP_NAMELINK] = {"NAMELINK", client4_0_namelink}, + [GF_FOP_COPY_FILE_RANGE] = {"COPY-FILE-RANGE", client4_0_copy_file_range}, }; rpc_clnt_prog_t clnt4_0_fop_prog = { diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 38723b43b45..c8e84f6e1b7 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -1129,6 +1129,41 @@ out: return 0; } +int32_t +client_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off_t off_in, fd_t *fd_out, off_t off_out, size_t len, + uint32_t flags, dict_t *xdata) +{ + int ret = -1; + clnt_conf_t *conf = NULL; + rpc_clnt_procedure_t *proc = NULL; + clnt_args_t args = { + 0, + }; + + conf = this->private; + if (!conf || !conf->fops) + goto out; + + args.fd = fd_in; + args.fd_out = fd_out; + args.offset = off_in; + args.off_out = off_out; + args.size = len; + args.flags = flags; + args.xdata = xdata; + + proc = &conf->fops->proctable[GF_FOP_COPY_FILE_RANGE]; + if (proc->fn) + ret = proc->fn(frame, this, &args); +out: + if (ret) + STACK_UNWIND_STRICT(copy_file_range, frame, -1, ENOTCONN, NULL, NULL, + NULL, NULL); + + return 0; +} + static gf_boolean_t is_client_rpc_init_command(dict_t *dict, xlator_t *this, char **value) { @@ -2898,6 +2933,7 @@ struct xlator_fops fops = { .icreate = client_icreate, .namelink = client_namelink, .put = client_put, + .copy_file_range = client_copy_file_range, }; struct xlator_dumpops dumpops = { diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index 5fc75a84628..71f84f3ca89 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -269,6 +269,7 @@ typedef struct client_local { loc_t loc; loc_t loc2; fd_t *fd; + fd_t *fd_out; /* used in copy_file_range */ clnt_fd_ctx_t *fdctx; uint32_t flags; struct iobref *iobref; @@ -280,6 +281,11 @@ typedef struct client_local { pthread_mutex_t mutex; char *name; gf_boolean_t attempt_reopen; + /* + * The below boolean variable is used + * only for copy_file_range fop + */ + gf_boolean_t attempt_reopen_out; /* required for compound fops */ compound_args_t *compound_args; unsigned int length; /* length of a compound fop */ @@ -289,7 +295,13 @@ typedef struct client_local { typedef struct client_args { loc_t *loc; + /* + * This is the source fd for copy_file_range and + * the default fd for any other fd based fop which + * requires only one fd (i.e. opetates on one fd) + */ fd_t *fd; + fd_t *fd_out; /* this is the destination fd for copy_file_range */ const char *linkname; struct iobref *iobref; struct iovec *vector; @@ -301,7 +313,17 @@ typedef struct client_args { struct gf_flock *flock; const char *volume; const char *basename; + off_t offset; + /* + * According to the man page of copy_file_range, + * the offsets for source and destination file + * are of type loff_t. But the type loff_t is + * linux specific and is actual a typedef of + * off64_t. + */ + off64_t off_in; /* used in copy_file_range for source fd */ + off64_t off_out; /* used in copy_file_range for dst fd */ int32_t mask; int32_t cmd; size_t size; diff --git a/xlators/protocol/server/src/server-common.c b/xlators/protocol/server/src/server-common.c index 25b36155065..0639ac3feb3 100644 --- a/xlators/protocol/server/src/server-common.c +++ b/xlators/protocol/server/src/server-common.c @@ -541,6 +541,16 @@ server4_post_common_3iatt(server_state_t *state, gfx_common_3iatt_rsp *rsp, } void +server4_post_common_3iatt_noinode(gfx_common_3iatt_rsp *rsp, struct iatt *stbuf, + struct iatt *prebuf_dst, + struct iatt *postbuf_dst) +{ + gfx_stat_from_iattx(&rsp->stat, stbuf); + gfx_stat_from_iattx(&rsp->preparent, prebuf_dst); + gfx_stat_from_iattx(&rsp->postparent, postbuf_dst); +} + +void server4_post_common_2iatt(gfx_common_2iatt_rsp *rsp, struct iatt *prebuf, struct iatt *postbuf) { diff --git a/xlators/protocol/server/src/server-common.h b/xlators/protocol/server/src/server-common.h index 2844ee95756..6200415e304 100644 --- a/xlators/protocol/server/src/server-common.h +++ b/xlators/protocol/server/src/server-common.h @@ -192,3 +192,8 @@ void server4_post_link(server_state_t *state, gfx_common_3iatt_rsp *rsp, inode_t *inode, struct iatt *stbuf, struct iatt *pre, struct iatt *post); + +void +server4_post_common_3iatt_noinode(gfx_common_3iatt_rsp *rsp, struct iatt *stbuf, + struct iatt *prebuf_dst, + struct iatt *postbuf_dst); diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c index c55a422679d..8ad2d8492ed 100644 --- a/xlators/protocol/server/src/server-helpers.c +++ b/xlators/protocol/server/src/server-helpers.c @@ -4948,6 +4948,8 @@ server_populate_compound_response_v2(xlator_t *this, gfx_compound_rsp *rsp, rsp_args->op_errno = gf_errno_to_error(this_args_cbk->op_errno); break; } + case GF_FOP_COPY_FILE_RANGE: + /* Not handling this fop. */ default: return ENOTSUP; } @@ -5380,6 +5382,12 @@ server_get_compound_resolve_v2(server_state_t *state, gfx_compound_req *req) memcpy(state->resolve.gfid, this_req.gfid, 16); break; } + case GF_FOP_COPY_FILE_RANGE: + /* + * Compound operations is not being used anymore and + * planned for subsequent removal. Hence not handling + * this fop here. + */ default: return ENOTSUP; } diff --git a/xlators/protocol/server/src/server-resolve.c b/xlators/protocol/server/src/server-resolve.c index 26260a5ee2c..ec768acba44 100644 --- a/xlators/protocol/server/src/server-resolve.c +++ b/xlators/protocol/server/src/server-resolve.c @@ -545,14 +545,39 @@ server_resolve_fd(call_frame_t *frame) return 0; } - state->fd = gf_fd_fdptr_get(serv_ctx->fdtable, fd_no); - + /* + * With copy_file_range, there will be 2 fds to resolve. + * This same function is called to resolve both the source + * fd and the destination fd. As of now, this function does + * not have any mechanism to distinguish between the 2 fds + * being resolved except for checking the value of state->fd. + * The assumption is that, if source fd the one which is + * being resolved here, then state->fd would be NULL. If it + * is not NULL, then it is the destination fd which is being + * resolved. + * This method (provided the above assumption is true) is + * to achieve the ability to distinguish between 2 fds with + * minimum changes being done to this function. If this way + * is not correct, then more changes might be needed. + */ if (!state->fd) { - gf_msg("", GF_LOG_INFO, EBADF, PS_MSG_FD_NOT_FOUND, - "fd not " - "found in context"); - resolve->op_ret = -1; - resolve->op_errno = EBADF; + state->fd = gf_fd_fdptr_get(serv_ctx->fdtable, fd_no); + if (!state->fd) { + gf_msg("", GF_LOG_INFO, EBADF, PS_MSG_FD_NOT_FOUND, + "fd not " + "found in context"); + resolve->op_ret = -1; + resolve->op_errno = EBADF; + } + } else { + state->fd_out = gf_fd_fdptr_get(serv_ctx->fdtable, fd_no); + if (!state->fd_out) { + gf_msg("", GF_LOG_INFO, EBADF, PS_MSG_FD_NOT_FOUND, + "fd not " + "found in context"); + resolve->op_ret = -1; + resolve->op_errno = EBADF; + } } server_resolve_all(frame); diff --git a/xlators/protocol/server/src/server-rpc-fops_v2.c b/xlators/protocol/server/src/server-rpc-fops_v2.c index c5a8e482621..16570294f6d 100644 --- a/xlators/protocol/server/src/server-rpc-fops_v2.c +++ b/xlators/protocol/server/src/server-rpc-fops_v2.c @@ -2259,6 +2259,64 @@ out: return 0; } +int +server4_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *stbuf, struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata) +{ + gfx_common_3iatt_rsp rsp = { + 0, + }; + server_state_t *state = NULL; + rpcsvc_request_t *req = NULL; + char in_gfid[GF_UUID_BUF_SIZE] = {0}; + char out_gfid[GF_UUID_BUF_SIZE] = {0}; + + dict_to_xdr(xdata, &rsp.xdata); + + if (op_ret < 0) { + state = CALL_STATE(frame); + + uuid_utoa_r(state->resolve.gfid, in_gfid); + uuid_utoa_r(state->resolve2.gfid, out_gfid); + + gf_msg(this->name, fop_log_level(GF_FOP_COPY_FILE_RANGE, op_errno), + op_errno, PS_MSG_WRITE_INFO, + "%" PRId64 ": COPY_FILE_RANGE %" PRId64 " (%s), %" PRId64 + " (%s) client: %s, " + "error-xlator: %s", + frame->root->unique, state->resolve.fd_no, in_gfid, + state->resolve2.fd_no, out_gfid, STACK_CLIENT_NAME(frame->root), + STACK_ERR_XL_NAME(frame->root)); + goto out; + } + + /* + * server4_post_common_3iatt (ex: used by server4_put_cbk and some + * other cbks) also performs inode linking along with copying of 3 + * iatt structures to the response. But, for copy_file_range, linking + * of inode is not needed. Therefore a new function is used to + * construct the response using 3 iatt structures. + * @stbuf: iatt or stat of the source file (or fd) + * @prebuf_dst: iatt or stat of destination file (or fd) before the fop + * @postbuf_dst: iatt or stat of destination file (or fd) after the fop + */ + server4_post_common_3iatt_noinode(&rsp, stbuf, prebuf_dst, postbuf_dst); + +out: + rsp.op_ret = op_ret; + rsp.op_errno = gf_errno_to_error(op_errno); + + req = frame->local; + server_submit_reply(frame, req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gfx_common_3iatt_rsp); + + GF_FREE(rsp.xdata.pairs.pairs_val); + + return 0; +} + /* Resume function section */ int @@ -3448,6 +3506,29 @@ err: } int +server4_copy_file_range_resume(call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND(frame, server4_copy_file_range_cbk, bound_xl, + bound_xl->fops->copy_file_range, state->fd, state->off_in, + state->fd_out, state->off_out, state->size, state->flags, + state->xdata); + + return 0; +err: + server4_copy_file_range_cbk(frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL, NULL, + NULL); + return 0; +} + +int server4_0_stat(rpcsvc_request_t *req) { server_state_t *state = NULL; @@ -6104,6 +6185,53 @@ out: return ret; } +int +server4_0_copy_file_range(rpcsvc_request_t *req) +{ + server_state_t *state = NULL; + call_frame_t *frame = NULL; + gfx_copy_file_range_req args = { + { + 0, + }, + }; + ssize_t len = 0; + int ret = -1; + int op_errno = 0; + + if (!req) + return ret; + + ret = rpc_receive_common(req, &frame, &state, &len, &args, + xdr_gfx_copy_file_range_req, + GF_FOP_COPY_FILE_RANGE); + if (ret != 0) { + goto out; + } + + state->resolve.type = RESOLVE_MUST; + state->resolve.fd_no = args.fd_in; + state->resolve2.type = RESOLVE_MUST; /*making this resolve must */ + state->resolve2.fd_no = args.fd_out; + state->off_in = args.off_in; + state->off_out = args.off_out; + state->size = args.size; + state->flags = args.flag; + memcpy(state->resolve.gfid, args.gfid1, 16); + memcpy(state->resolve2.gfid, args.gfid2, 16); + + xdr_to_dict(&args.xdata, &state->xdata); + + ret = 0; + resolve_and_resume(frame, server4_copy_file_range_resume); +out: + + if (op_errno) + SERVER_REQ_SET_ERROR(req, ret); + + return ret; +} + rpcsvc_actor_t glusterfs4_0_fop_actors[] = { [GFS3_OP_NULL] = {"NULL", GFS3_OP_NULL, server_null, NULL, 0}, [GFS3_OP_STAT] = {"STAT", GFS3_OP_STAT, server4_0_stat, NULL, 0}, @@ -6195,6 +6323,8 @@ rpcsvc_actor_t glusterfs4_0_fop_actors[] = { DRC_NA}, [GFS3_OP_NAMELINK] = {"NAMELINK", GFS3_OP_NAMELINK, server4_0_namelink, NULL, 0, DRC_NA}, + [GFS3_OP_COPY_FILE_RANGE] = {"COPY-FILE-RANGE", GFS3_OP_COPY_FILE_RANGE, + server4_0_copy_file_range, NULL, 0, DRC_NA}, }; struct rpcsvc_program glusterfs4_0_fop_prog = { diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h index 2a77aba1f3c..bdf98c96f1c 100644 --- a/xlators/protocol/server/src/server.h +++ b/xlators/protocol/server/src/server.h @@ -180,7 +180,12 @@ struct _server_state { struct iatt stbuf; int valid; + /* + * this fd is used in all the fd based operations PLUS + * as a source fd in copy_file_range + */ fd_t *fd; + fd_t *fd_out; /* destination fd in copy_file_range */ dict_t *params; int32_t flags; int wbflags; @@ -191,6 +196,15 @@ struct _server_state { size_t size; off_t offset; + /* + * According to the man page of copy_file_range, + * the offsets for source and destination file + * are of type loff_t. But the type loff_t is + * linux specific and is actual a typedef of + * off64_t. + */ + off64_t off_in; /* source offset in copy_file_range */ + off64_t off_out; /* destination offset in copy_file_range */ mode_t mode; dev_t dev; size_t nr_count; diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index ed0516d4c4a..54fc1dc1195 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -3314,3 +3314,23 @@ unlock: out: return ret; } + +int +posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno) +{ + int ret = -1; + + if (inode->ia_type == IA_IFBLK || inode->ia_type == IA_IFCHR) { + *op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_INVALID_ARGUMENT, + "%s received on %s file (%s)", fop, + (inode->ia_type == IA_IFBLK) ? "block" : "char", + uuid_utoa(inode->gfid)); + goto out; + } + + ret = 0; + +out: + return ret; +} diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c index 9e2b37f582c..dafd1855ef9 100644 --- a/xlators/storage/posix/src/posix-inode-fd-ops.c +++ b/xlators/storage/posix/src/posix-inode-fd-ops.c @@ -1960,6 +1960,274 @@ out: } int32_t +posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd_in = -1; + int _fd_out = -1; + struct posix_private *priv = NULL; + struct posix_fd *pfd_in = NULL; + struct posix_fd *pfd_out = NULL; + struct iatt preop_dst = { + 0, + }; + struct iatt postop_dst = { + 0, + }; + struct iatt stbuf = { + 0, + }; + int ret = -1; + dict_t *rsp_xdata = NULL; + int is_append = 0; + gf_boolean_t locked = _gf_false; + gf_boolean_t update_atomic = _gf_false; + posix_inode_ctx_t *ctx = NULL; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd_in, out); + VALIDATE_OR_GOTO(fd_in->inode, out); + VALIDATE_OR_GOTO(fd_out, out); + VALIDATE_OR_GOTO(fd_out->inode, out); + VALIDATE_OR_GOTO(this->private, out); + + priv = this->private; + + VALIDATE_OR_GOTO(priv, out); + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + if (posix_check_dev_file(this, fd_in->inode, "copy_file_range", &op_errno)) + goto out; + + if (posix_check_dev_file(this, fd_out->inode, "copy_file_range", &op_errno)) + goto out; + + ret = posix_fd_ctx_get(fd_in, this, &pfd_in, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd_in); + goto out; + } + + _fd_in = pfd_in->fd; + + ret = posix_fd_ctx_get(fd_out, this, &pfd_out, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd_out); + goto out; + } + + _fd_out = pfd_out->fd; + + /* + * Currently, the internal write is checked via xdata which + * is set by some xlator above. It could be due to several of + * the reasons such as healing or a snapshot operation happening + * using copy_file_range. As of now (i.e. writing the patch with + * this change) none of the xlators above posix are using the + * internal write with copy_file_range. In future it might + * change. Atleast as of now the hope is that, when that happens + * this functon or fop does not require additional changes for + * handling internal writes. + */ + ret = posix_check_internal_writes(this, fd_out, _fd_out, xdata); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "possible overwrite from internal client, fd=%p", fd_out); + op_ret = -1; + op_errno = EBUSY; + goto out; + } + + if (xdata) { + if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) + update_atomic = _gf_true; + } + + /* + * The update_atomic option is to instruct posix to do prestat, + * write and poststat atomically. This is to prevent any modification to + * ia_size and ia_blocks until poststat and the diff in their values + * between pre and poststat could be of use for some translators. + * This is similar to the atomic write operation. atmoic write is + * (i.e. prestat + write + poststat) used by shard as of now. In case, + * some xlator needs copy_file_range to be atomic from prestat and postat + * prespective (i.e. prestat + copy_file_range + poststat) then it has + * to send "GLUSTERFS_WRITE_UPDATE_ATOMIC" key in xdata. + */ + + op_ret = posix_inode_ctx_get_all(fd_out->inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + + if (update_atomic) { + ret = pthread_mutex_lock(&ctx->write_atomic_lock); + if (!ret) + locked = _gf_true; + else { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MUTEX_FAILED, + "failed to hold write atomic lock on %s", + uuid_utoa(fd_out->inode->gfid)); + goto out; + } + } + + op_ret = posix_fdstat(this, fd_out->inode, _fd_out, &preop_dst); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd_out); + goto out; + } + + /* + * Since, only the destination file (fd_out) is undergoing + * modification, the write related tests are done on that. + * i.e. this is treater similar to as if the destination file + * undergoing write fop from maintenance perspective. + */ + if (xdata) { + op_ret = posix_cs_maintenance(this, fd_out, NULL, &_fd_out, &preop_dst, + NULL, xdata, &rsp_xdata, _gf_false); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd_out); + op_errno = EIO; + goto out; + } + } + + /* + * NOTE: This is just doing a single execution of copy_file_range + * system call. If the returned value of this system call is less + * than len, then should we keep doing it in a for loop until the + * copy_file_range of all the len bytes is done? + * Check the example program provided in the man page of + * copy_file_range. + * If so, then a separate variables for both off_in and off_out + * should be used which are initialized to off_in and off_out + * that this function call receives, but then advanced by the + * value returned by sys_copy_file_range and then use that as + * off_in and off_out for next instance of copy_file_range execution. + */ + op_ret = sys_copy_file_range(_fd_in, &off_in, _fd_out, &off_out, len, + flags); + + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED, + "copy_file_range failed: fd_in: %p (gfid: %s) ," + " fd_out %p (gfid:%s)", + fd_in, uuid_utoa(fd_in->inode->gfid), fd_out, + uuid_utoa(fd_out->inode->gfid)); + goto out; + } + + /* + * Let this be as it is for now. This function collects + * infomration such as open fd count etc. So, even though + * is_append does not apply to copy_file_range, for now, + * allowing it to be recorded in the dict as _gf_false. + */ + rsp_xdata = _fill_writev_xdata(fd_out, xdata, this, is_append); + + /* copy_file_range successful, we also need to get the stat of + * the file we wrote to (i.e. destination file or fd_out). + */ + ret = posix_fdstat(this, fd_out->inode, _fd_out, &postop_dst); + if (ret == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd_out); + goto out; + } + + /* + * Also perform the stat on the source fd (i.e. fd_in). For now, + * allowing it to be done within the locked region if the request + * is for atomic operation (and update) of copy_file_range. + */ + ret = posix_fdstat(this, fd_in->inode, _fd_in, &stbuf); + if (ret == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd_in); + goto out; + } + + /* + * The core logic of what time attributes are to be updated + * on a fop is decided at client side xlator utime. + * All the remaining fops call posix_set_ctime function + * to update the {a,m,c}time. But, for all the other fops, + * the operation is happening on only one file (or inode). + * But here, there are 2 fds (source and destination). Hence + * the new function below to update the appropriate times for + * both the source and the destination file. + * For the source file, if at all anything has to be updated, + * it would be atime (as that file is only read, not updated). + * For the destination file, the attributes that require the + * modification would be mtime and ctime. + * What times have to be changed is actually determined by + * utime xlator. But, all of them would be in frame->root->flags. + * So, currently posix assumes that, the atime flag is for + * the source file and the other 2 flags are for the destination + * file. Since, the assumption is rigid (i.e. atime for source + * and {m,c}time for destination), the below function is called + * posix_set_ctime_cfr (cfr standing for copy_file_range). + * FUTURE TODO: + * In future, some other functionality or fop might operate + * simultaneously on 2 files. Then, depending upon what that new + * fop does or what are its requirements, the below function might + * require changes to become generic for consumption in case of + * simultaneous operations on 2 files. + */ + posix_set_ctime_cfr(frame, this, NULL, pfd_in->fd, fd_in->inode, &stbuf, + NULL, pfd_out->fd, fd_out->inode, &postop_dst); + + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + /* + * Record copy_file_range in priv->write_value for now. + * If not needed, remove below section of code along with + * this comment (or add comment to explain why it is not + * needed). + */ + LOCK(&priv->lock); + { + priv->write_value += op_ret; + } + UNLOCK(&priv->lock); + +out: + + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, &stbuf, + &preop_dst, &postop_dst, rsp_xdata); + + if (rsp_xdata) + dict_unref(rsp_xdata); + return 0; +} + +int32_t posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { char *real_path = NULL; diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h index 62af32ac8fe..928723db8f9 100644 --- a/xlators/storage/posix/src/posix-messages.h +++ b/xlators/storage/posix/src/posix-messages.h @@ -67,6 +67,7 @@ GLFS_MSGID(POSIX, P_MSG_XATTR_FAILED, P_MSG_NULL_GFID, P_MSG_FCNTL_FAILED, P_MSG_ANCESTORY_FAILED, P_MSG_DISK_SPACE_CHECK_FAILED, P_MSG_FALLOCATE_FAILED, P_MSG_STOREMDATA_FAILED, P_MSG_FETCHMDATA_FAILED, P_MSG_GETMDATA_FAILED, - P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE); + P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE, P_MSG_MUTEX_FAILED, + P_MSG_COPY_FILE_RANGE_FAILED); #endif /* !_GLUSTERD_MESSAGES_H_ */ diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c index 26fae2019b5..62669a0b83f 100644 --- a/xlators/storage/posix/src/posix-metadata.c +++ b/xlators/storage/posix/src/posix-metadata.c @@ -663,3 +663,81 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, out: return; } + +void +posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, + const char *real_path_in, int fd_in, inode_t *inode_in, + struct iatt *stbuf_in, const char *real_path_out, + int fd_out, inode_t *inode_out, struct iatt *stbuf_out) +{ + posix_mdata_flag_t flag = { + 0, + }; + posix_mdata_flag_t flag_dup = { + 0, + }; + int ret = 0; + struct posix_private *priv = NULL; + + priv = this->private; + + if (priv->ctime) { + (void)posix_get_mdata_flag(frame->root->flags, &flag); + if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { + goto out; + } + + if (frame->root->ctime.tv_sec == 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed, No ctime : in: %s gfid_in:%s " + "out: %s gfid_out:%s", + real_path_in, + inode_in ? uuid_utoa(inode_in->gfid) : "No inode", + real_path_out, + inode_out ? uuid_utoa(inode_out->gfid) : "No inode"); + goto out; + } + + flag_dup = flag; + + /* + * For the destination file, no need to update atime. + * It got modified. Hence the things that need to be + * changed are mtime and ctime (provided the utime + * xlator from the client has set those flags, which + * are just copied to flag_dup). + */ + if (flag.atime) + flag_dup.atime = 0; + + ret = posix_set_mdata_xattr(this, real_path_out, fd_out, inode_out, + &frame->root->ctime, stbuf_out, &flag_dup, + _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path_out, + inode_out ? uuid_utoa(inode_out->gfid) : "No inode"); + } + + /* + * For the source file, no need to change the mtime and ctime. + * For source file, it is only read operation. So, if at all + * anything needs to be updated, it is only the atime. + */ + if (flag.atime) + flag_dup.atime = flag.atime; + flag_dup.mtime = 0; + flag_dup.ctime = 0; + + ret = posix_set_mdata_xattr(this, real_path_in, fd_out, inode_out, + &frame->root->ctime, stbuf_out, &flag_dup, + _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path_in, + inode_in ? uuid_utoa(inode_in->gfid) : "No inode"); + } + } +out: + return; +} diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h index e1b549d55a1..3416148ea97 100644 --- a/xlators/storage/posix/src/posix-metadata.h +++ b/xlators/storage/posix/src/posix-metadata.h @@ -48,5 +48,10 @@ void posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, int fd, inode_t *inode, struct iatt *stbuf); +void +posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, + const char *real_path_in, int fd_in, inode_t *inode_in, + struct iatt *stbuf_in, const char *read_path_put, + int fd_out, inode_t *inode_out, struct iatt *stbuf_out); #endif /* _POSIX_METADATA_H */ diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index d6a20135f96..42b965434b9 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -76,6 +76,7 @@ struct xlator_fops fops = { .seek = posix_seek, .lease = posix_lease, .put = posix_put, + .copy_file_range = posix_copy_file_range, }; struct xlator_cbks cbks = { diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index 1f1d4fc2774..a1ec996f4b2 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -638,6 +638,11 @@ posix_put(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata); int32_t +posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, dict_t *xdata); + +int32_t posix_set_mode_in_dict(dict_t *in_dict, dict_t *out_dict, struct iatt *in_stbuf); @@ -656,5 +661,7 @@ int posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, struct iatt *buf, const char *realpath, dict_t *xattr_req, dict_t **xattr_rsp, gf_boolean_t ignore_failure); +int +posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno); #endif /* _POSIX_H */ |