diff options
57 files changed, 1911 insertions, 19 deletions
diff --git a/api/src/Makefile.am b/api/src/Makefile.am index 6ed30bc99f6..7f9a7d17b35 100644 --- a/api/src/Makefile.am +++ b/api/src/Makefile.am @@ -19,7 +19,7 @@ libgfapi_la_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(top_srcdir)/rpc/xdr/src \ -I$(top_builddir)/rpc/xdr/src \ -DDATADIR=\"$(localstatedir)\" \ - -D__USE_FILE_OFFSET64 + -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases index a71422c8577..0e52c38d346 100644 --- a/api/src/gfapi.aliases +++ b/api/src/gfapi.aliases @@ -188,3 +188,4 @@ _pub_glfs_ftruncate _glfs_ftruncate$GFAPI_future _pub_glfs_ftruncate_async _glfs_ftruncate_async$GFAPI_future _pub_glfs_discard_async _glfs_discard_async$GFAPI_future _pub_glfs_zerofill_async _glfs_zerofill_async$GFAPI_future +_pub_glfs_copy_file_range _glfs_copy_file_range$GFAPI_future
\ No newline at end of file diff --git a/api/src/gfapi.map b/api/src/gfapi.map index c47323781fb..1be2953ce9a 100644 --- a/api/src/gfapi.map +++ b/api/src/gfapi.map @@ -255,5 +255,6 @@ GFAPI_future { glfs_ftruncate_async; glfs_discard_async; glfs_zerofill_async; + glfs_copy_file_range; } GFAPI_4.1.6; diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c index 2a1cc73ccee..f59990aed1f 100644 --- a/api/src/glfs-fops.c +++ b/api/src/glfs-fops.c @@ -1333,6 +1333,161 @@ invalid_fs: } ssize_t +pub_glfs_copy_file_range(struct glfs_fd *glfd_in, off64_t *off_in, + struct glfs_fd *glfd_out, off64_t *off_out, size_t len, + unsigned int flags, struct stat *statbuf, + struct stat *prestat, struct stat *poststat) +{ + xlator_t *subvol = NULL; + int ret = -1; + fd_t *fd_in = NULL; + fd_t *fd_out = NULL; + struct iatt preiatt = + { + 0, + }, + iattbuf = + { + 0, + }, + postiatt = { + 0, + }; + dict_t *fop_attr = NULL; + off64_t pos_in; + off64_t pos_out; + + DECLARE_OLD_THIS; + __GLFS_ENTRY_VALIDATE_FD(glfd_in, invalid_fs); + __GLFS_ENTRY_VALIDATE_FD(glfd_out, invalid_fs); + + GF_REF_GET(glfd_in); + GF_REF_GET(glfd_out); + + if (glfd_in->fs != glfd_out->fs) { + ret = -1; + errno = EXDEV; + goto out; + } + + subvol = glfs_active_subvol(glfd_in->fs); + if (!subvol) { + ret = -1; + errno = EIO; + goto out; + } + + fd_in = glfs_resolve_fd(glfd_in->fs, subvol, glfd_in); + if (!fd_in) { + ret = -1; + errno = EBADFD; + goto out; + } + + fd_out = glfs_resolve_fd(glfd_out->fs, subvol, glfd_out); + if (!fd_out) { + ret = -1; + errno = EBADFD; + goto out; + } + + /* + * This is based on how the vfs layer in the kernel handles + * copy_file_range call. Upon receiving it follows the + * below method to consider the offset. + * if (off_in != NULL) + * use the value off_in to perform the op + * else if off_in == NULL + * use the current file offset position to perform the op + * + * For gfapi, glfd->offset is used. For a freshly opened + * fd, the offset is set to 0. + */ + if (off_in) + pos_in = *off_in; + else + pos_in = glfd_in->offset; + + if (off_out) + pos_out = *off_out; + else + pos_out = glfd_out->offset; + + ret = get_fop_attr_thrd_key(&fop_attr); + if (ret) + gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed"); + + ret = syncop_copy_file_range(subvol, fd_in, pos_in, fd_out, pos_out, len, + flags, &iattbuf, &preiatt, &postiatt, fop_attr, + NULL); + DECODE_SYNCOP_ERR(ret); + + if (ret >= 0) { + pos_in += ret; + pos_out += ret; + + if (off_in) + *off_in = pos_in; + if (off_out) + *off_out = pos_out; + + if (statbuf) + glfs_iatt_to_stat(glfd_in->fs, &iattbuf, statbuf); + if (prestat) + glfs_iatt_to_stat(glfd_in->fs, &preiatt, prestat); + if (poststat) + glfs_iatt_to_stat(glfd_in->fs, &postiatt, poststat); + } + + if (ret <= 0) + goto out; + + /* + * If *off_in is NULL, then there is no offset info that can + * obtained from the input argument. Hence follow below method. + * If *off_in is NULL, then + * glfd->offset = offset + ret; + * else + * do nothing. + * + * According to the man page of copy_file_range, if off_in is + * NULL, then the offset of the source file is advanced by + * the return value of the fop. The same applies to off_out as + * well. Otherwise, if *off_in is not NULL, then the offset + * is not advanced by the filesystem. The entity which sends + * the copy_file_range call is supposed to advance the offset + * value in its buffer (pointed to by *off_in or *off_out) + * by the return value of copy_file_range. + */ + if (!off_in) + glfd_in->offset += ret; + + if (!off_out) + glfd_out->offset += ret; + +out: + if (fd_in) + fd_unref(fd_in); + if (fd_out) + fd_unref(fd_out); + if (glfd_in) + GF_REF_PUT(glfd_in); + if (glfd_out) + GF_REF_PUT(glfd_out); + if (fop_attr) + dict_unref(fop_attr); + + glfs_subvol_done(glfd_in->fs, subvol); + + __GLFS_EXIT_FS; + +invalid_fs: + return ret; +} + +GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_copy_file_range, future); + +ssize_t pub_glfs_pwritev(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt, off_t offset, int flags) { diff --git a/api/src/glfs.h b/api/src/glfs.h index cd642a5ea20..160a784222f 100644 --- a/api/src/glfs.h +++ b/api/src/glfs.h @@ -42,6 +42,38 @@ #include <sys/statvfs.h> #include <inttypes.h> +/* + * For off64_t to be defined, we need both + * __USE_LARGEFILE64 to be true and __off64_t_defnined to be + * false. But, making __USE_LARGEFILE64 true causes other issues + * such as redinition of stat and fstat to stat64 and fstat64 + * respectively which again causes compilation issues. + * Without off64_t being defined, this will not compile as + * copy_file_range uses off64_t. Hence define it here. First + * check whether __off64_t_defined is true or not. <unistd.h> + * sets that flag when it defines off64_t. If __off64_t_defined + * is false and __USE_FILE_OFFSET64 is true, then go on to define + * off64_t using __off64_t. + */ +#ifndef GF_BSD_HOST_OS +#if defined(__USE_FILE_OFFSET64) && !defined(__off64_t_defined) +typedef __off64_t off64_t; +#endif /* defined(__USE_FILE_OFFSET64) && !defined(__off64_t_defined) */ +#else +#include <stdio.h> +#ifndef _OFF64_T_DECLARED +/* + * Including <stdio.h> (done above) should actually define + * _OFF64_T_DECLARED with off64_t data type being available + * for consumption. But, off64_t data type is not recognizable + * for FreeBSD versions less than 11. Hence, int64_t is typedefed + * to off64_t. + */ +#define _OFF64_T_DECLARED +typedef int64_t off64_t; +#endif /* _OFF64_T_DECLARED */ +#endif /* GF_BSD_HOST_OS */ + #if defined(HAVE_SYS_ACL_H) || (defined(USE_POSIX_ACLS) && USE_POSIX_ACLS) #include <sys/acl.h> #else @@ -594,6 +626,13 @@ off_t glfs_lseek(glfs_fd_t *fd, off_t offset, int whence) __THROW GFAPI_PUBLIC(glfs_lseek, 3.4.0); +ssize_t +glfs_copy_file_range(struct glfs_fd *glfd_in, off64_t *off_in, + struct glfs_fd *glfd_out, off64_t *off_out, size_t len, + unsigned int flags, struct stat *statbuf, + struct stat *prestat, struct stat *poststat) __THROW + GFAPI_PUBLIC(glfs_copy_file_range, future); + int glfs_truncate(glfs_t *fs, const char *path, off_t length) __THROW GFAPI_PUBLIC(glfs_truncate, 3.7.15); diff --git a/configure.ac b/configure.ac index 3ddb6f073a5..d3c8f8b9514 100644 --- a/configure.ac +++ b/configure.ac @@ -1018,6 +1018,25 @@ if test "x${have_posix_fallocate}" = "xyes"; then AC_DEFINE(HAVE_POSIX_FALLOCATE, 1, [define if posix_fallocate exists]) fi +# On fedora-29, copy_file_range syscall and the libc API both are present. +# Whereas, on some machines such as centos-7, RHEL-7, the API is not there. +# Only the system call is present. So, this change is to determine whether +# the API is present or not. If not, then check whether the system call is +# present or not. Accordingly sys_copy_file_range function will first call +# the API if it is there. Otherwise it will call syscall(SYS_copy_file_range). +AC_CHECK_FUNC([copy_file_range], [have_copy_file_range=yes]) +if test "x${have_copy_file_range}" = "xyes"; then + AC_DEFINE(HAVE_COPY_FILE_RANGE, 1, [define if copy_file_range exists]) +else + OLD_CFLAGS=${CFLAGS} + CFLAGS="-D_GNU_SOURCE" + AC_CHECK_DECL([SYS_copy_file_range], , , [#include <sys/syscall.h>]) + if test "x${ac_cv_have_decl_SYS_copy_file_range}" = "xyes"; then + AC_DEFINE(HAVE_COPY_FILE_RANGE_SYS, 1, [define if SYS_copy_file_range is available]) + fi + CFLAGS=${OLD_CFLAGS} +fi + BUILD_NANOSECOND_TIMESTAMPS=no AC_CHECK_FUNC([utimensat], [have_utimensat=yes]) if test "x${have_utimensat}" = "xyes"; then diff --git a/glusterfs-api.pc.in b/glusterfs-api.pc.in index 6af4e108f7f..4a2edb7bf07 100644 --- a/glusterfs-api.pc.in +++ b/glusterfs-api.pc.in @@ -9,4 +9,4 @@ Description: GlusterFS API Version: @GFAPI_VERSION@ Requires: @PKGCONFIG_UUID@ Libs: -L${libdir} @GFAPI_LIBS@ -lgfapi -lglusterfs -lgfrpc -lgfxdr -Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -DUSE_POSIX_ACLS=@USE_POSIX_ACLS@ +Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 -DUSE_POSIX_ACLS=@USE_POSIX_ACLS@ diff --git a/libgfchangelog.pc.in b/libgfchangelog.pc.in index e2ff1fb6214..79eac2ad2d3 100644 --- a/libgfchangelog.pc.in +++ b/libgfchangelog.pc.in @@ -9,4 +9,4 @@ Description: GlusterFS Changelog Consumer Library Version: @LIBGFCHANGELOG_VERSION@ Requires: @PKGCONFIG_UUID@ Libs: -L${libdir} -lgfchangelog -lglusterfs -Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 +Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am index 1d06f1586a9..970f4b74978 100644 --- a/libglusterfs/src/Makefile.am +++ b/libglusterfs/src/Makefile.am @@ -6,7 +6,7 @@ libglusterfs_la_CFLAGS = $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \ libglusterfs_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \ -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \ -DXLATORPARENTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)\" \ - -DXXH_NAMESPACE=GF_ \ + -DXXH_NAMESPACE=GF_ -D__USE_LARGEFILE64 \ -I$(top_srcdir)/rpc/xdr/src/ -I$(top_builddir)/rpc/xdr/src/ \ -I$(top_srcdir)/rpc/rpc-lib/src/ -I$(CONTRIBDIR)/rbtree \ -I$(CONTRIBDIR)/libexecinfo ${ARGP_STANDALONE_CPPFLAGS} \ diff --git a/libglusterfs/src/call-stub.c b/libglusterfs/src/call-stub.c index 96454dfaeb5..886dfa52ccc 100644 --- a/libglusterfs/src/call-stub.c +++ b/libglusterfs/src/call-stub.c @@ -1818,6 +1818,51 @@ out: } call_stub_t * +fop_copy_file_range_stub(call_frame_t *frame, fop_copy_file_range_t fn, + fd_t *fd_in, off64_t off_in, fd_t *fd_out, + off64_t off_out, size_t len, uint32_t flags, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + GF_VALIDATE_OR_GOTO("call-stub", frame, out); + GF_VALIDATE_OR_GOTO("call-stub", fn, out); + + stub = stub_new(frame, 1, GF_FOP_COPY_FILE_RANGE); + GF_VALIDATE_OR_GOTO("call-stub", stub, out); + + stub->fn.copy_file_range = fn; + + args_copy_file_range_store(&stub->args, fd_in, off_in, fd_out, off_out, len, + flags, xdata); + +out: + return stub; +} + +call_stub_t * +fop_copy_file_range_cbk_stub(call_frame_t *frame, fop_copy_file_range_cbk_t fn, + int32_t op_ret, int32_t op_errno, + struct iatt *stbuf, struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + GF_VALIDATE_OR_GOTO("call-stub", frame, out); + GF_VALIDATE_OR_GOTO("call-stub", fn, out); + + stub = stub_new(frame, 0, GF_FOP_COPY_FILE_RANGE); + GF_VALIDATE_OR_GOTO("call-stub", stub, out); + + stub->fn_cbk.copy_file_range = fn; + args_copy_file_range_cbk_store(&stub->args_cbk, op_ret, op_errno, stbuf, + prebuf_dst, postbuf_dst, xdata); + +out: + return stub; +} + +call_stub_t * fop_put_stub(call_frame_t *frame, fop_put_t fn, loc_t *loc, mode_t mode, mode_t umask, uint32_t flags, struct iovec *vector, int32_t count, off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata) @@ -2213,6 +2258,13 @@ call_resume_wind(call_stub_t *stub) stub->args.iobref, stub->args.xattr, stub->args.xdata); break; + case GF_FOP_COPY_FILE_RANGE: + stub->fn.copy_file_range( + stub->frame, stub->frame->this, stub->args.fd, + stub->args.off_in, stub->args.fd_dst, stub->args.off_out, + stub->args.size, stub->args.flags, stub->args.xdata); + break; + default: gf_msg_callingfn("call-stub", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ENTRY, @@ -2439,6 +2491,12 @@ call_resume_unwind(call_stub_t *stub) stub->args_cbk.xdata); break; + case GF_FOP_COPY_FILE_RANGE: + STUB_UNWIND(stub, copy_file_range, &stub->args_cbk.stat, + &stub->args_cbk.prestat, &stub->args_cbk.poststat, + stub->args_cbk.xdata); + break; + default: gf_msg_callingfn("call-stub", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ENTRY, diff --git a/libglusterfs/src/default-args.c b/libglusterfs/src/default-args.c index 479974e1637..cfceabd1f46 100644 --- a/libglusterfs/src/default-args.c +++ b/libglusterfs/src/default-args.c @@ -1541,6 +1541,48 @@ args_namelink_store(default_args_t *args, loc_t *loc, dict_t *xdata) return 0; } +int +args_copy_file_range_store(default_args_t *args, fd_t *fd_in, off64_t off_in, + fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, dict_t *xdata) +{ + if (fd_in) + args->fd = fd_ref(fd_in); + if (fd_out) + args->fd_dst = fd_ref(fd_out); + args->size = len; + args->off_in = off_in; + args->off_out = off_out; + args->flags = flags; + + if (xdata) + args->xdata = dict_ref(xdata); + + return 0; +} + +int +args_copy_file_range_cbk_store(default_args_cbk_t *args, int32_t op_ret, + int32_t op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata) +{ + args->op_ret = op_ret; + args->op_errno = op_errno; + if (op_ret >= 0) { + if (postbuf_dst) + args->poststat = *postbuf_dst; + if (prebuf_dst) + args->prestat = *prebuf_dst; + if (stbuf) + args->stat = *stbuf; + } + if (xdata) + args->xdata = dict_ref(xdata); + + return 0; +} + void args_cbk_wipe(default_args_cbk_t *args_cbk) { diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c index 97de8193dcb..5bf64e8c6c6 100644 --- a/libglusterfs/src/defaults-tmpl.c +++ b/libglusterfs/src/defaults-tmpl.c @@ -84,6 +84,7 @@ struct xlator_fops _default_fops = { .put = default_put, .icreate = default_icreate, .namelink = default_namelink, + .copy_file_range = default_copy_file_range, }; struct xlator_fops *default_fops = &_default_fops; diff --git a/libglusterfs/src/generator.py b/libglusterfs/src/generator.py index c17d450502d..5b7aa4764a0 100755 --- a/libglusterfs/src/generator.py +++ b/libglusterfs/src/generator.py @@ -599,6 +599,19 @@ ops['namelink'] = ( ('cbk-arg', 'xdata', 'dict_t *'), ) +ops['copy_file_range'] = ( + ('fop-arg', 'fd_in', 'fd_t *'), + ('fop-arg', 'off_in', 'off64_t '), + ('fop-arg', 'fd_out', 'fd_t *'), + ('fop-arg', 'off_out', 'off64_t '), + ('fop-arg', 'len', 'size_t'), + ('fop-arg', 'flags', 'uint32_t'), + ('fop-arg', 'xdata', 'dict_t *'), + ('cbk-arg', 'stbuf', 'struct iatt *'), + ('cbk-arg', 'prebuf_dst', 'struct iatt *'), + ('cbk-arg', 'postbuf_dst', 'struct iatt *'), + ('cbk-arg', 'xdata', 'dict_t *'), +) ##################################################################### xlator_cbks['forget'] = ( ('fn-arg', 'this', 'xlator_t *'), diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c index 35482545ab3..4fec0638926 100644 --- a/libglusterfs/src/globals.c +++ b/libglusterfs/src/globals.c @@ -77,6 +77,7 @@ const char *gf_fop_list[GF_FOP_MAXVALUE] = { [GF_FOP_PUT] = "PUT", [GF_FOP_ICREATE] = "ICREATE", [GF_FOP_NAMELINK] = "NAMELINK", + [GF_FOP_COPY_FILE_RANGE] = "COPY_FILE_RANGE", }; const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = { diff --git a/libglusterfs/src/glusterfs/call-stub.h b/libglusterfs/src/glusterfs/call-stub.h index bfed0fbc14a..c01c935e73d 100644 --- a/libglusterfs/src/glusterfs/call-stub.h +++ b/libglusterfs/src/glusterfs/call-stub.h @@ -81,6 +81,7 @@ typedef struct _call_stub { fop_put_t put; fop_icreate_t icreate; fop_namelink_t namelink; + fop_copy_file_range_t copy_file_range; } fn; union { @@ -136,6 +137,7 @@ typedef struct _call_stub { fop_put_cbk_t put; fop_icreate_cbk_t icreate; fop_namelink_cbk_t namelink; + fop_copy_file_range_cbk_t copy_file_range; } fn_cbk; default_args_t args; @@ -589,6 +591,18 @@ fop_namelink_cbk_stub(call_frame_t *frame, fop_namelink_cbk_t fn, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata); +call_stub_t * +fop_copy_file_range_stub(call_frame_t *frame, fop_copy_file_range_t fn, + fd_t *fd_in, off64_t off_in, fd_t *fd_out, + off64_t off_out, size_t len, uint32_t flags, + dict_t *xdata); + +call_stub_t * +fop_copy_file_range_cbk_stub(call_frame_t *frame, fop_copy_file_range_cbk_t fn, + int32_t op_ret, int32_t op_errno, + struct iatt *stbuf, struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata); + void call_resume(call_stub_t *stub); void diff --git a/libglusterfs/src/glusterfs/compat.h b/libglusterfs/src/glusterfs/compat.h index 38c07b5ae7c..9374b79f9af 100644 --- a/libglusterfs/src/glusterfs/compat.h +++ b/libglusterfs/src/glusterfs/compat.h @@ -116,6 +116,25 @@ #include <limits.h> #include <libgen.h> +/* + * This is where things like off64_t are defined. + * So include it before declaring _OFF64_T_DECLARED. + * If the freebsd version has support for off64_t + * including stdio.h should be sufficient. + */ +#include <stdio.h> + +#ifndef _OFF64_T_DECLARED +/* + * Including <stdio.h> (done above) should actually define + * _OFF64_T_DECLARED with off64_t data type being available + * for consumption. But, off64_t data type is not recognizable + * for FreeBSD versions less than 11. Hence, int64_t is typedefed + * to off64_t. + */ +#define _OFF64_T_DECLARED +typedef int64_t off64_t; +#endif /* _OFF64_T_DECLARED */ #ifndef XATTR_CREATE enum { diff --git a/libglusterfs/src/glusterfs/default-args.h b/libglusterfs/src/glusterfs/default-args.h index f15f558202b..ca7526fcab6 100644 --- a/libglusterfs/src/glusterfs/default-args.h +++ b/libglusterfs/src/glusterfs/default-args.h @@ -234,6 +234,12 @@ void args_lease_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno, struct gf_lease *lease, dict_t *xdata); +int +args_copy_file_range_cbk_store(default_args_cbk_t *args, int32_t op_ret, + int32_t op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata); + void args_cbk_wipe(default_args_cbk_t *args_cbk); @@ -439,6 +445,11 @@ args_icreate_store(default_args_t *args, loc_t *loc, mode_t mode, int args_namelink_store(default_args_t *args, loc_t *loc, dict_t *xdata); +int +args_copy_file_range_store(default_args_t *args, fd_t *fd_in, off64_t off_in, + fd_t *fd_out, off_t off64_out, size_t len, + uint32_t flags, dict_t *xdata); + void args_cbk_init(default_args_cbk_t *args_cbk); #endif /* _DEFAULT_ARGS_H */ diff --git a/libglusterfs/src/glusterfs/defaults.h b/libglusterfs/src/glusterfs/defaults.h index 5d6b8e28a51..5a818eeb91a 100644 --- a/libglusterfs/src/glusterfs/defaults.h +++ b/libglusterfs/src/glusterfs/defaults.h @@ -48,10 +48,20 @@ typedef struct { } default_args_cbk_t; typedef struct { - loc_t loc; /* @old in rename(), link() */ - loc_t loc2; /* @new in rename(), link() */ - fd_t *fd; + loc_t loc; /* @old in rename(), link() */ + loc_t loc2; /* @new in rename(), link() */ + fd_t *fd; /* for all the fd based ops */ + fd_t *fd_dst; /* Only for copy_file_range destination */ off_t offset; + /* + * According to the man page of copy_file_range, + * the offsets for source and destination file + * are of type loff_t. But the type loff_t is + * linux specific and is actual a typedef of + * off64_t. + */ + off64_t off_in; /* For copy_file_range source fd */ + off64_t off_out; /* For copy_file_range destination fd only */ int mask; size_t size; mode_t mode; @@ -323,6 +333,11 @@ int32_t default_namelink(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); +int32_t +default_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off64_t off_in, fd_t *fd_out, off64_t off_out, + size_t len, uint32_t flags, dict_t *xdata); + /* Resume */ int32_t default_getspec_resume(call_frame_t *frame, xlator_t *this, const char *key, @@ -542,6 +557,11 @@ default_put_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, int32_t count, off_t off, struct iobref *iobref, dict_t *xattr, dict_t *xdata); +int32_t +default_copy_file_range_resume(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off_t off64_in, fd_t *fd_out, off64_t off_out, + size_t len, uint32_t flags, dict_t *xdata); + /* _cbk_resume */ int32_t @@ -813,6 +833,13 @@ int32_t default_namelink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); +int32_t +default_copy_file_range_cbk_resume(call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata); + /* _CBK */ int32_t default_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, @@ -1072,6 +1099,12 @@ default_namelink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, struct iatt *postbuf, dict_t *xdata); int32_t +default_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *stbuf, struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata); + +int32_t default_lookup_failure_cbk(call_frame_t *frame, int32_t op_errno); int32_t @@ -1231,6 +1264,9 @@ int32_t default_namelink_failure_cbk(call_frame_t *frame, int32_t op_errno); int32_t +default_copy_file_range_failure_cbk(call_frame_t *frame, int32_t op_errno); + +int32_t default_mem_acct_init(xlator_t *this); void diff --git a/libglusterfs/src/glusterfs/syncop.h b/libglusterfs/src/glusterfs/syncop.h index 203abe92b57..7a6167b0488 100644 --- a/libglusterfs/src/glusterfs/syncop.h +++ b/libglusterfs/src/glusterfs/syncop.h @@ -138,8 +138,19 @@ typedef struct syncbarrier syncbarrier_t; struct syncargs { int op_ret; int op_errno; + + /* + * The below 3 iatt structures are used in the fops + * whose callbacks get struct iatt as one of the + * a return arguments. Currently, the maximum number + * of iatt structures returned is 3 for some fops + * such as mknod, copy_file_range, mkdir etc. So + * all the following 3 iatt structures would be used + * for those fops. + */ struct iatt iatt1; struct iatt iatt2; + struct iatt iatt3; dict_t *xattr; struct statvfs statvfs_buf; struct iovec *vector; @@ -634,4 +645,17 @@ syncop_entrylk(xlator_t *subvol, const char *volume, loc_t *loc, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata_in, dict_t **xdata_out); +int +syncop_copy_file_range(xlator_t *subvol, fd_t *fd_in, off64_t off_in, + fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, struct iatt *stbuf, + struct iatt *preiatt_dst, struct iatt *postiatt_dst, + dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, struct iatt *postbuf_dst, + dict_t *xdata); + #endif /* _SYNCOP_H */ diff --git a/libglusterfs/src/glusterfs/syscall.h b/libglusterfs/src/glusterfs/syscall.h index faaf694b22c..6b33c141a5e 100644 --- a/libglusterfs/src/glusterfs/syscall.h +++ b/libglusterfs/src/glusterfs/syscall.h @@ -17,6 +17,7 @@ #include <sys/stat.h> #include <sys/time.h> #include <sys/socket.h> +#include <stdio.h> /* GF follows the Linux XATTR definition, which differs in Darwin. */ #define GF_XATTR_CREATE 0x1 /* set value, fail if attr already exists */ @@ -228,4 +229,32 @@ sys_socket(int domain, int type, int protocol); int sys_accept(int sock, struct sockaddr *sockaddr, socklen_t *socklen, int flags); +#ifdef GF_BSD_HOST_OS +#ifndef _OFF64_T_DECLARED +/* + * Including <stdio.h> (done above) should actually define + * _OFF64_T_DECLARED with off64_t data type being available + * for consumption. But, off64_t data type is not recognizable + * for FreeBSD versions less than 11. Hence, int64_t is typedefed + * to off64_t. + */ +#define _OFF64_T_DECLARED +typedef int64_t off64_t; +#endif /* _OFF64_T_DECLARED */ +#endif /* GF_BSD_HOST_OS */ + +/* + * According to the man page of copy_file_range, both off_in and off_out are + * pointers to the data type loff_t (i.e. loff_t *). But, freebsd does not + * have (and recognize) loff_t. Since loff_t is 64 bits, use off64_t + * instead. Since it's a pointer type it should be okay. It just needs + * to be a pointer-to-64-bit pointer for both 32- and 64-bit platforms. + * off64_t is recognized by freebsd. + * TODO: In future, when freebsd can recognize loff_t, probably revisit this + * and change the off_in and off_out to (loff_t *). + */ +ssize_t +sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out, + size_t len, unsigned int flags); + #endif /* __SYSCALL_H__ */ diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h index 4137d12eb27..12d507bc021 100644 --- a/libglusterfs/src/glusterfs/xlator.h +++ b/libglusterfs/src/glusterfs/xlator.h @@ -23,6 +23,7 @@ #include "glusterfs/list.h" #include "glusterfs/latency.h" #include "glusterfs/compat-uuid.h" +#include "glusterfs/syscall.h" #define FIRST_CHILD(xl) (xl->children->xlator) #define SECOND_CHILD(xl) (xl->children->next->xlator) @@ -354,6 +355,11 @@ typedef int32_t (*fop_namelink_cbk_t)(call_frame_t *frame, void *cookie, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata); +typedef int32_t (*fop_copy_file_range_cbk_t)( + call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *stbuf, struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata); + typedef int32_t (*fop_lookup_t)(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); @@ -544,6 +550,11 @@ typedef int32_t (*fop_icreate_t)(call_frame_t *frame, xlator_t *this, typedef int32_t (*fop_namelink_t)(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata); +typedef int32_t (*fop_copy_file_range_t)(call_frame_t *frame, xlator_t *this, + fd_t *fd_in, off64_t off_in, + fd_t *fd_out, off64_t off_out, + size_t len, uint32_t flags, + dict_t *xdata); /* WARNING: make sure the list is in order with FOP definition in `rpc/xdr/src/glusterfs-fops.x`. @@ -609,6 +620,7 @@ struct xlator_fops { fop_put_t put; fop_icreate_t icreate; fop_namelink_t namelink; + fop_copy_file_range_t copy_file_range; /* these entries are used for a typechecking hack in STACK_WIND _only_ */ /* make sure to add _cbk variables only after defining regular fops as @@ -673,6 +685,7 @@ struct xlator_fops { fop_put_cbk_t put_cbk; fop_icreate_cbk_t icreate_cbk; fop_namelink_cbk_t namelink_cbk; + fop_copy_file_range_cbk_t copy_file_range_cbk; }; typedef int32_t (*cbk_forget_t)(xlator_t *this, inode_t *inode); diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym index baf44de64ad..6ca6a639456 100644 --- a/libglusterfs/src/libglusterfs.sym +++ b/libglusterfs/src/libglusterfs.sym @@ -92,6 +92,8 @@ args_xattrop_cbk_store args_xattrop_store args_zerofill_cbk_store args_zerofill_store +args_copy_file_range_cbk_store +args_copy_file_range_store bin_to_data call_resume call_resume_keep_stub @@ -351,6 +353,10 @@ default_put default_put_cbk default_put_failure_cbk default_put_resume +default_copy_file_range +default_copy_file_range_cbk +default_copy_file_range_failure_cbk +default_copy_file_range_resume __dentry_grep dht_is_linkfile dict_add @@ -471,6 +477,8 @@ fd_unref _fini fop_access_stub fop_create_stub +fop_copy_file_range_stub +fop_copy_file_range_cbk_stub fop_discard_stub fop_entrylk_stub fop_enum_to_pri_string @@ -933,6 +941,7 @@ synclock_unlock syncop_access syncop_close syncop_create +syncop_copy_file_range syncopctx_getctx syncopctx_setfsgid syncopctx_setfsgroups @@ -1006,6 +1015,7 @@ sys_chmod sys_chown sys_close sys_closedir +sys_copy_file_range sys_creat sys_fallocate sys_fchmod diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c index b70953725ce..bf70daf95c3 100644 --- a/libglusterfs/src/syncop.c +++ b/libglusterfs/src/syncop.c @@ -3397,4 +3397,65 @@ syncop_namelink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, __wake(args); return 0; -}
\ No newline at end of file +} + +int +syncop_copy_file_range(xlator_t *subvol, fd_t *fd_in, off64_t off_in, + fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, struct iatt *stbuf, + struct iatt *preiatt_dst, struct iatt *postiatt_dst, + dict_t *xdata_in, dict_t **xdata_out) +{ + struct syncargs args = { + 0, + }; + + SYNCOP(subvol, (&args), syncop_copy_file_range_cbk, + subvol->fops->copy_file_range, fd_in, off_in, fd_out, off_out, len, + flags, xdata_in); + + if (stbuf) { + *stbuf = args.iatt1; + } + if (preiatt_dst) { + *preiatt_dst = args.iatt2; + } + if (postiatt_dst) { + *postiatt_dst = args.iatt3; + } + + if (xdata_out) { + *xdata_out = args.xdata; + } else if (args.xdata) { + dict_unref(args.xdata); + } + + errno = args.op_errno; + return args.op_ret; +} + +int +syncop_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, struct iatt *postbuf_dst, + dict_t *xdata) +{ + struct syncargs *args = NULL; + + args = cookie; + + args->op_ret = op_ret; + args->op_errno = op_errno; + if (xdata) + args->xdata = dict_ref(xdata); + + if (op_ret >= 0) { + args->iatt1 = *stbuf; + args->iatt2 = *prebuf_dst; + args->iatt3 = *postbuf_dst; + } + + __wake(args); + + return 0; +} diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c index c72a8e16b34..1d88c8adac1 100644 --- a/libglusterfs/src/syscall.c +++ b/libglusterfs/src/syscall.c @@ -8,8 +8,8 @@ cases as published by the Free Software Foundation. */ -#include "glusterfs/syscall.h" #include "glusterfs/compat.h" +#include "glusterfs/syscall.h" #include "glusterfs/mem-pool.h" #include "glusterfs/libglusterfs-messages.h" @@ -19,6 +19,9 @@ #include <fcntl.h> #include <unistd.h> #include <stdarg.h> +#ifdef HAVE_COPY_FILE_RANGE_SYS +#include <sys/syscall.h> +#endif #define FS_ERROR_LOG(result) \ do { \ @@ -802,3 +805,30 @@ err: #endif return newsock; } + +ssize_t +sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out, + size_t len, unsigned int flags) +{ + /* + * TODO: Add check for other platofrms like freebsd etc if this syscall is + * not generic. + * This is what the function does. + * 1) Check whether copy_file_range API is present. If so call it. + * 2) If copy_file_range API is not present, then check whether + * the system call is there. If so, then use syscall to invoke + * SYS_copy_file_range system call. + * 3) If neither of the above is present, then return ENOSYS. + */ +#ifdef HAVE_COPY_FILE_RANGE + return FS_RET_CHECK( + copy_file_range(fd_in, off_in, fd_out, off_out, len, flags), errno); +#else +#ifdef HAVE_COPY_FILE_RANGE_SYS + return syscall(SYS_copy_file_range, fd_in, off_in, fd_out, off_out, len, + flags); +#else + return -ENOSYS; +#endif /* HAVE_COPY_FILE_RANGE_SYS */ +#endif /* HAVE_COPY_FILE_RANGE */ +} diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c index 1b7c9d46f88..b50848b3476 100644 --- a/libglusterfs/src/xlator.c +++ b/libglusterfs/src/xlator.c @@ -143,6 +143,7 @@ fill_defaults(xlator_t *xl) SET_DEFAULT_FOP(getspec); SET_DEFAULT_FOP(icreate); SET_DEFAULT_FOP(namelink); + SET_DEFAULT_FOP(copy_file_range); if (!xl->cbks) xl->cbks = &default_cbks; diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 4950857ae9e..779878f52be 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -68,6 +68,7 @@ enum gf_fop_procnum { GFS3_OP_ICREATE, GFS3_OP_NAMELINK, GFS3_OP_PUT, + GFS3_OP_COPY_FILE_RANGE, GFS3_OP_MAXVALUE, }; diff --git a/rpc/xdr/src/glusterfs-fops.x b/rpc/xdr/src/glusterfs-fops.x index ffb71d6418a..bacf07735f4 100644 --- a/rpc/xdr/src/glusterfs-fops.x +++ b/rpc/xdr/src/glusterfs-fops.x @@ -77,6 +77,7 @@ enum glusterfs_fop_t { GF_FOP_PUT, GF_FOP_ICREATE, GF_FOP_NAMELINK, + GF_FOP_COPY_FILE_RANGE, GF_FOP_MAXVALUE }; diff --git a/rpc/xdr/src/glusterfs4-xdr.x b/rpc/xdr/src/glusterfs4-xdr.x index c183dbcd704..dcea17fac68 100644 --- a/rpc/xdr/src/glusterfs4-xdr.x +++ b/rpc/xdr/src/glusterfs4-xdr.x @@ -628,6 +628,19 @@ struct gfx_seek_rsp { struct gfx_setvolume_req { gfx_dict dict; } ; + + struct gfx_copy_file_range_req { + opaque gfid1[16]; + opaque gfid2[16]; + quad_t fd_in; + quad_t fd_out; + u_quad_t off_in; + u_quad_t off_out; + unsigned int size; + unsigned int flag; + gfx_dict xdata; /* Extra data */ +}; + struct gfx_setvolume_rsp { int op_ret; int op_errno; diff --git a/rpc/xdr/src/libgfxdr.sym b/rpc/xdr/src/libgfxdr.sym index bd9131be7c6..22cdf30bfda 100644 --- a/rpc/xdr/src/libgfxdr.sym +++ b/rpc/xdr/src/libgfxdr.sym @@ -344,3 +344,4 @@ xdr_compound_req_v2 xdr_gfx_compound_req xdr_compound_rsp_v2 xdr_gfx_compound_rsp +xdr_gfx_copy_file_range_req
\ No newline at end of file diff --git a/tests/basic/gfapi/gfapi-copy-file-range.t b/tests/basic/gfapi/gfapi-copy-file-range.t new file mode 100644 index 00000000000..c24c1433edf --- /dev/null +++ b/tests/basic/gfapi/gfapi-copy-file-range.t @@ -0,0 +1,80 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +TEST glusterd + +# for now, a xfs filesystem with reflink support is created. +# In future, better to make changes in MKFS_LOOP so that, +# once can create a xfs filesystem with reflink enabled in +# generic and simple way, instead of doing below steps each +# time. +TEST truncate -s 2G $B0/xfs_image +mkfs.xfs 2>&1 | grep reflink +if [ $? -eq 0 ]; then + mkfs.xfs -f -i size=512 -m reflink=1 $B0/xfs_image; +else + mkfs.xfs -f -i size=512 $B0/xfs_image; +fi + +TEST mkdir $B0/bricks +TEST mount -t xfs -o loop $B0/xfs_image $B0/bricks + +# Just a single brick volume. More test cases need to be +# added in future for distribute, replicate, +# distributed replicate and distributed replicated sharded +# volumes. +TEST $CLI volume create $V0 $H0:$B0/bricks/brick1; +EXPECT 'Created' volinfo_field $V0 'Status'; + +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 + +TEST dd if=/dev/urandom of=$M0/file bs=1M count=555; + +# check for the existence of the created file +TEST stat $M0/file; + +# grab the size of the file +SRC_SIZE=$(stat -c %s $M0/file); + +logdir=`gluster --print-logdir` + +# TODO: +# For now, do not call copy-file-range utility. This is because, +# the regression machines are centos-7 based which does not have +# copy_file_range API available. So, instead of this testcase +# causing regression failures, for now, this is just a dummy test +# case. Uncomment the below tests (until volume stop) when there +# is support for copy_file_range in the regression machines. +# + +TEST build_tester $(dirname $0)/glfs-copy-file-range.c -lgfapi + +TEST ./$(dirname $0)/glfs-copy-file-range $H0 $V0 $logdir/gfapi-copy-file-range.log /file /new + +# check whether the destination file is created or not +TEST stat $M0/new + +# check the size of the destination file +DST_SIZE=$(stat -c %s $M0/new); + +# The sizes of the source and destination should be same. +# Atleast it ensures that, copy_file_range API is working +# as expected. Whether the actual cloning happened via reflink +# or a read/write happened is different matter. +TEST [ $SRC_SIZE == $DST_SIZE ]; + +cleanup_tester $(dirname $0)/glfs-copy-file-range + +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0 + +UMOUNT_LOOP $B0/bricks; + +cleanup; diff --git a/tests/basic/gfapi/glfs-copy-file-range.c b/tests/basic/gfapi/glfs-copy-file-range.c new file mode 100644 index 00000000000..756c38d21ec --- /dev/null +++ b/tests/basic/gfapi/glfs-copy-file-range.c @@ -0,0 +1,177 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <glusterfs/api/glfs.h> +#include <glusterfs/api/glfs-handles.h> +#include <string.h> +#include <time.h> +#include <libgen.h> + +static void +cleanup(glfs_t *fs) +{ + if (!fs) + return; +#if 0 + /* glfs fini path is still racy and crashing the program. Since + * this program any way has to die, we are not going to call fini + * in the released versions. i.e. final builds. For all + * internal testing lets enable this so that glfs_fini code + * path becomes stable. */ + glfs_fini (fs); +#endif +} + +int +main(int argc, char **argv) +{ + glfs_t *fs = NULL; + int ret = -1; + char *volname = NULL; + char *logfilepath = NULL; + char *path_src = NULL; + char *path_dst = NULL; + glfs_fd_t *glfd_in = NULL; + glfs_fd_t *glfd_out = NULL; + char *volfile_server = NULL; + + struct stat stbuf = { + 0, + }; + struct stat prestat_dst = { + 0, + }; + struct stat poststat_dst = { + 0, + }; + size_t len; + + if (argc < 6) { + printf("%s <volume> <log file path> <source> <destination>", argv[0]); + ret = -1; + goto out; + } + + volfile_server = argv[1]; + volname = argv[2]; + logfilepath = argv[3]; + path_src = argv[4]; + path_dst = argv[5]; + + if (path_src[0] != '/') { + fprintf(stderr, "source path %s is not absolute", path_src); + errno = EINVAL; + goto out; + } + + if (path_dst[0] != '/') { + fprintf(stderr, "destination path %s is not absolute", path_dst); + errno = EINVAL; + goto out; + } + + fs = glfs_new(volname); + if (!fs) { + ret = -errno; + fprintf(stderr, "Not able to initialize volume '%s'", volname); + goto out; + } + + ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 24007); + if (ret < 0) { + ret = -errno; + fprintf(stderr, + "Failed to set the volfile server, " + "%s", + strerror(errno)); + goto out; + } + + ret = glfs_set_logging(fs, logfilepath, 7); + if (ret < 0) { + ret = -errno; + fprintf(stderr, + "Failed to set the log file path, " + "%s", + strerror(errno)); + goto out; + } + + ret = glfs_init(fs); + if (ret < 0) { + ret = -errno; + if (errno == ENOENT) { + fprintf(stderr, "Volume %s does not exist", volname); + } else { + fprintf(stderr, + "%s: Not able to fetch " + "volfile from glusterd", + volname); + } + goto out; + } + + glfd_in = glfs_open(fs, path_src, O_RDONLY | O_NONBLOCK); + if (!glfd_in) { + ret = -errno; + goto out; + } else { + printf("OPEN_SRC: opening %s is success\n", path_src); + } + + glfd_out = glfs_creat(fs, path_dst, O_RDWR, 0644); + if (!glfd_out) { + fprintf(stderr, + "FAILED_DST_OPEN: failed to " + "open (create) %s (%s)\n", + path_dst, strerror(errno)); + ret = -errno; + goto out; + } else { + printf("OPEN_DST: opening %s is success\n", path_dst); + } + + ret = glfs_fstat(glfd_in, &stbuf); + if (ret < 0) { + ret = -errno; + goto out; + } else { + printf("FSTAT_SRC: fstat on %s is success\n", path_dst); + } + + len = stbuf.st_size; + + do { + ret = glfs_copy_file_range(glfd_in, NULL, glfd_out, NULL, len, 0, + &stbuf, &prestat_dst, &poststat_dst); + if (ret == -1) { + fprintf(stderr, "copy_file_range failed with %s\n", + strerror(errno)); + ret = -errno; + break; + } else { + printf("copy_file_range successful\n"); + len -= ret; + } + } while (len > 0); + +out: + if (glfd_in) + glfs_close(glfd_in); + if (glfd_out) + glfs_close(glfd_out); + + cleanup(fs); + + return ret; +} diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index 7bf0d8a8f00..f40b00bba2d 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -2119,6 +2119,19 @@ io_stats_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, } int +io_stats_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *stbuf, struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata) +{ + UPDATE_PROFILE_STATS(frame, COPY_FILE_RANGE); + + STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, stbuf, + prebuf_dst, postbuf_dst, xdata); + return 0; +} + +int io_stats_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata) @@ -2873,6 +2886,19 @@ io_stats_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, } int +io_stats_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off_t off_in, fd_t *fd_out, off_t off_out, size_t len, + uint32_t flags, dict_t *xdata) +{ + START_FOP_LATENCY(frame); + + STACK_WIND(frame, io_stats_copy_file_range_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->copy_file_range, fd_in, off_in, fd_out, + off_out, len, flags, xdata); + return 0; +} + +int io_stats_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { START_FOP_LATENCY(frame); @@ -4189,6 +4215,7 @@ struct xlator_fops fops = { .getactivelk = io_stats_getactivelk, .setactivelk = io_stats_setactivelk, .compound = io_stats_compound, + .copy_file_range = io_stats_copy_file_range, }; struct xlator_cbks cbks = { diff --git a/xlators/features/changelog/lib/src/Makefile.am b/xlators/features/changelog/lib/src/Makefile.am index c4b9a3df692..c933ec53ed2 100644 --- a/xlators/features/changelog/lib/src/Makefile.am +++ b/xlators/features/changelog/lib/src/Makefile.am @@ -1,7 +1,7 @@ libgfchangelog_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \ -DDATADIR=\"$(localstatedir)\" -libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -fpic \ +libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 -fpic \ -I../../../src/ -I$(top_srcdir)/libglusterfs/src \ -I$(top_srcdir)/xlators/features/changelog/src \ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py index ab56dc9a4b3..8730a51d13e 100755 --- a/xlators/features/utime/src/utime-gen-fops-c.py +++ b/xlators/features/utime/src/utime-gen-fops-c.py @@ -62,6 +62,20 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, } """ +FOPS_COPY_FILE_RANGE_TEMPLATE = """ +int32_t +gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + gl_timespec_get(&frame->root->ctime); + + (void) utime_update_attribute_flags(frame, this, GF_FOP_COPY_FILE_RANGE); + STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); + return 0; +} +""" + FOPS_SETATTR_TEMPLATE = """ int32_t gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, @@ -94,6 +108,7 @@ utime_ops = ['fallocate', 'zerofill', 'opendir', 'mknod', 'mkdir', utime_read_op = ['readv'] utime_write_op = ['writev'] utime_setattr_ops = ['setattr', 'fsetattr'] +utime_copy_file_range_ops = ['copy_file_range'] def gen_defaults(): for name in ops: @@ -109,6 +124,9 @@ def gen_defaults(): if name in utime_setattr_ops: print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs)) print(generate(FOPS_SETATTR_TEMPLATE, name, fop_subs)) + if name in utime_copy_file_range_ops: + print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs)) + print(generate(FOPS_COPY_FILE_RANGE_TEMPLATE, name, fop_subs)) for l in open(sys.argv[1], 'r').readlines(): if l.find('#pragma generate') != -1: diff --git a/xlators/features/utime/src/utime-gen-fops-h.py b/xlators/features/utime/src/utime-gen-fops-h.py index 3686f2e3c1e..e96274c229a 100755 --- a/xlators/features/utime/src/utime-gen-fops-h.py +++ b/xlators/features/utime/src/utime-gen-fops-h.py @@ -18,7 +18,7 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, utime_ops = ['fallocate', 'zerofill', 'opendir', 'mknod', 'mkdir', 'unlink', 'rmdir', 'symlink', 'rename', 'link', 'truncate', 'ftruncate', 'create', 'open', 'removexattr', 'fremovexattr', - 'readv', 'writev', 'setattr', 'fsetattr'] + 'readv', 'writev', 'setattr', 'fsetattr', 'copy_file_range'] def gen_defaults(): for name, value in ops.items(): diff --git a/xlators/features/utime/src/utime-helpers.c b/xlators/features/utime/src/utime-helpers.c index c79e12badfa..79cc0145f50 100644 --- a/xlators/features/utime/src/utime-helpers.c +++ b/xlators/features/utime/src/utime-helpers.c @@ -93,6 +93,15 @@ utime_update_attribute_flags(call_frame_t *frame, xlator_t *this, frame->root->flags |= MDATA_CTIME; break; + case GF_FOP_COPY_FILE_RANGE: + /* Below 2 are for destination fd */ + frame->root->flags |= MDATA_CTIME; + frame->root->flags |= MDATA_MTIME; + /* Below flag is for the source fd */ + if (!utime_priv->noatime) { + frame->root->flags |= MDATA_ATIME; + } + break; default: frame->root->flags = 0; } diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 3b2622b431f..3f4e19c211e 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -2993,6 +2993,116 @@ fuse_write(xlator_t *this, fuse_in_header_t *finh, void *msg, return; } +#if FUSE_KERNEL_MINOR_VERSION >= 28 +static int +fuse_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + struct iatt *prebuf_dst, struct iatt *postbuf_dst, + dict_t *xdata) +{ + fuse_state_t *state = NULL; + fuse_in_header_t *finh = NULL; + /* + * Fuse kernel module uses fuse_write_out itself as the + * output collector. In fact, fuse_kernel.h in the upstream + * kernel just defines the input structure fuse_copy_file_range_in + * for the fop. So, just use the fuse_write_out to send the + * response back to the kernel. + */ + struct fuse_write_out fcfro = { + 0, + }; + + char src_gfid[GF_UUID_BUF_SIZE] = {0}; + char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + + state = frame->root->state; + finh = state->finh; + + fuse_log_eh_fop(this, state, frame, op_ret, op_errno); + + if (op_ret >= 0) { + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 ": WRITE => %d/%" GF_PRI_SIZET ",%" PRIu64 + " , %" PRIu64 " ,%" PRIu64 ",%" PRIu64, + frame->root->unique, op_ret, state->size, state->off_in, + state->off_out, stbuf->ia_size, postbuf_dst->ia_size); + + fcfro.size = op_ret; + send_fuse_obj(this, finh, &fcfro); + } else { + if (state->fd && state->fd->inode) + uuid_utoa_r(state->fd->inode->gfid, src_gfid); + else + snprintf(src_gfid, sizeof(src_gfid), "nil"); + + if (state->fd_dst && state->fd_dst->inode) + uuid_utoa_r(state->fd_dst->inode->gfid, dst_gfid); + else + snprintf(dst_gfid, sizeof(dst_gfid), "nil"); + + gf_log("glusterfs-fuse", GF_LOG_WARNING, + "%" PRIu64 + ": COPY_FILE_RANGE => -1 gfid_in=%s fd_in=%p " + "gfid_out=%s fd_out=%p (%s)", + frame->root->unique, src_gfid, state->fd, dst_gfid, + state->fd_dst, strerror(op_errno)); + + send_fuse_err(this, finh, op_errno); + } + + free_fuse_state(state); + STACK_DESTROY(frame->root); + + return 0; +} + +void +fuse_copy_file_range_resume(fuse_state_t *state) +{ + gf_log("glusterfs-fuse", GF_LOG_TRACE, + "%" PRIu64 + ": COPY_FILE_RANGE " + "(input fd: %p (gfid: %s), " + "output fd: %p (gfid: %s) size=%zu, " + "offset_in=%" PRIu64 ", offset_out=%" PRIu64 ")", + state->finh->unique, state->fd, uuid_utoa(state->fd->inode->gfid), + state->fd_dst, uuid_utoa(state->fd_dst->inode->gfid), state->size, + state->off_in, state->off_out); + + FUSE_FOP(state, fuse_copy_file_range_cbk, GF_FOP_COPY_FILE_RANGE, + copy_file_range, state->fd, state->off_in, state->fd_dst, + state->off_out, state->size, state->io_flags, state->xdata); +} + +static void +fuse_copy_file_range(xlator_t *this, fuse_in_header_t *finh, void *msg, + struct iobuf *iobuf) +{ + struct fuse_copy_file_range_in *fcfri = msg; + fuse_state_t *state = NULL; + fd_t *fd_in = NULL; + fd_t *fd_out = NULL; + + GET_STATE(this, finh, state); + + fd_in = FH_TO_FD(fcfri->fh_in); + fd_out = FH_TO_FD(fcfri->fh_out); + state->fd = fd_in; + state->fd_dst = fd_out; + + fuse_resolve_fd_init(state, &state->resolve, fd_in); + fuse_resolve_fd_init(state, &state->resolve2, fd_out); + + state->size = fcfri->len; + state->off_in = fcfri->off_in; + state->off_out = fcfri->off_out; + state->io_flags = fcfri->flags; + + fuse_resolve_and_resume(state, fuse_copy_file_range_resume); +} +#endif /* FUSE_KERNEL_MINOR_VERSION >= 28 */ + #if FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE static int fuse_lseek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, @@ -6087,6 +6197,10 @@ static fuse_handler_t *fuse_std_ops[FUSE_OP_HIGH] = { #if FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE [FUSE_LSEEK] = fuse_lseek, #endif + +#if FUSE_KERNEL_MINOR_VERSION >= 28 + [FUSE_COPY_FILE_RANGE] = fuse_copy_file_range, +#endif }; static fuse_handler_t *fuse_dump_ops[FUSE_OP_HIGH]; diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h index 57380786f17..60702ab1da5 100644 --- a/xlators/mount/fuse/src/fuse-bridge.h +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -41,8 +41,32 @@ #include <glusterfs/gidcache.h> #if defined(GF_LINUX_HOST_OS) || defined(__FreeBSD__) || defined(__NetBSD__) + +/* + * TODO: + * So, with the addition of copy_file_range support, it might + * require a bump up of fuse kernel minor version (like it was + * done when support for lseek fop was added. But, as of now, + * the copy_file_range support has just landed in upstream + * kernel fuse module. So, until, there is a release of that + * fuse as part of a kernel, the FUSE_KERNEL_MINOR_VERSION + * from fuse_kernel.h in the contrib might not be changed. + * If so, then the highest op available should be based on + * the current minor version (which is 24). So, selectively + * determine. When, the minor version is changed to 28 in + * fuse_kernel.h from contrib (because in upstream linux + * kernel source tree, the kernel minor version which + * contains support for copy_file_range is 28), then remove + * the reference to FUSE_LSEEK below and just determine + * FUSE_OP_HIGH based on copy_file_range. + */ +#if FUSE_KERNEL_MINOR_VERSION >= 28 +#define FUSE_OP_HIGH (FUSE_COPY_FILE_RANGE + 1) +#else #define FUSE_OP_HIGH (FUSE_LSEEK + 1) #endif + +#endif #ifdef GF_DARWIN_HOST_OS #define FUSE_OP_HIGH (FUSE_DESTROY + 1) #endif @@ -400,10 +424,22 @@ typedef struct { loc_t loc2; fuse_in_header_t *finh; int32_t flags; + off_t off; + /* + * The man page of copy_file_range tells that the offset + * arguments are of type loff_t *. Here in fuse state, the values of + * those offsets are saved instead of pointers as the kernel sends + * the values of the offsets from those pointers instead of pointers. + * But the type loff_t is linux specific and is actually a typedef of + * off64_t. Hence using off64_t + */ + off64_t off_in; /* for copy_file_range source fd */ + off64_t off_out; /* for copy_file_range destination fd */ size_t size; unsigned long nlookup; fd_t *fd; + fd_t *fd_dst; /* for copy_file_range destination */ dict_t *xattr; dict_t *xdata; char *name; diff --git a/xlators/protocol/client/src/client-common.c b/xlators/protocol/client/src/client-common.c index 7708c820918..64db98d661b 100644 --- a/xlators/protocol/client/src/client-common.c +++ b/xlators/protocol/client/src/client-common.c @@ -2556,6 +2556,38 @@ out: } int +client_pre_copy_file_range_v2(xlator_t *this, gfx_copy_file_range_req *req, + fd_t *fd_in, off64_t off_in, fd_t *fd_out, + off64_t off_out, size_t size, int32_t flags, + dict_t **xdata) +{ + int64_t remote_fd_in = -1; + int64_t remote_fd_out = -1; + int op_errno = ESTALE; + + CLIENT_GET_REMOTE_FD(this, fd_in, FALLBACK_TO_ANON_FD, remote_fd_in, + op_errno, out); + + CLIENT_GET_REMOTE_FD(this, fd_out, FALLBACK_TO_ANON_FD, remote_fd_out, + op_errno, out); + req->size = size; + req->off_in = off_in; + req->off_out = off_out; + req->fd_in = remote_fd_in; + req->fd_out = remote_fd_out; + req->flag = flags; + + memcpy(req->gfid1, fd_in->inode->gfid, 16); + memcpy(req->gfid2, fd_out->inode->gfid, 16); + + dict_to_xdr(*xdata, &req->xdata); + + return 0; +out: + return -op_errno; +} + +int client_pre_statfs_v2(xlator_t *this, gfx_statfs_req *req, loc_t *loc, dict_t *xdata) { diff --git a/xlators/protocol/client/src/client-common.h b/xlators/protocol/client/src/client-common.h index 5214eae128e..a2043d8742a 100644 --- a/xlators/protocol/client/src/client-common.h +++ b/xlators/protocol/client/src/client-common.h @@ -621,4 +621,10 @@ client_post_rename_v2(xlator_t *this, gfx_rename_rsp *rsp, struct iatt *stbuf, struct iatt *prenewparent, struct iatt *postnewparent, dict_t **xdata); +int +client_pre_copy_file_range_v2(xlator_t *this, gfx_copy_file_range_req *req, + fd_t *fd_in, off64_t off_in, fd_t *fd_out, + off64_t off_out, size_t size, int32_t flags, + dict_t **xdata); + #endif /* __CLIENT_COMMON_H__ */ diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c index 849fdfca0bc..55e87b3c370 100644 --- a/xlators/protocol/client/src/client-helpers.c +++ b/xlators/protocol/client/src/client-helpers.c @@ -2459,6 +2459,20 @@ client_handle_fop_requirements_v2( lease, this, &this_req->compound_req_v2_u.compound_lease_req, op_errno, out, &args->loc, &args->lease, args->xdata); break; + case GF_FOP_COPY_FILE_RANGE: + /* + * Not going to handle the copy_file_range fop in compound + * operation. This is because, compound operation is going + * to be removed. In fact, AFR one of the heavy consumer of + * compound operations has stopped using that. + * https://github.com/gluster/glusterfs/issues/414 + * Therefore, sending ENOTSUP error for this fop coming as + * comound request. Though, there was no need of handling + * "case GF_FOP_COPY_FILE_RANGE" technically, this comment + * under the label of GF_FOP_COPY_FILE_RANGE will help in + * understanding that this fop does not handle the compund + * request and why. + */ default: return ENOTSUP; } @@ -2631,6 +2645,14 @@ compound_request_cleanup_v2(gfx_compound_req *req) case GF_FOP_SEEK: CLIENT4_COMPOUND_FOP_CLEANUP(curr_req, seek); break; + case GF_FOP_COPY_FILE_RANGE: + /* + * This fop is not handled in compund operations. + * Check the comment added under this fop's section + * in the compound_request_cleanup_v2. Therefore + * keeping this label only as a placeholder with + * a message that, this fop is not handled. + */ default: break; } @@ -3004,6 +3026,12 @@ client_process_response_v2(call_frame_t *frame, xlator_t *this, &this_args_cbk->lease, xdata); break; } + case GF_FOP_COPY_FILE_RANGE: + /* + * Not handling this fop. Returning ENOTSUP. Check + * the comment added for this fop in the function + * client_handle_fop_requirements_v2. + */ default: return -ENOTSUP; } diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c index ca180c1db4b..8f3ee41e5c5 100644 --- a/xlators/protocol/client/src/client-rpc-fops_v2.c +++ b/xlators/protocol/client/src/client-rpc-fops_v2.c @@ -2833,6 +2833,72 @@ out: return 0; } +int +client4_0_copy_file_range_cbk(struct rpc_req *req, struct iovec *iov, int count, + void *myframe) +{ + gfx_common_3iatt_rsp rsp = { + 0, + }; + call_frame_t *frame = NULL; + struct iatt stbuf = { + 0, + }; + struct iatt prestat = { + 0, + }; + struct iatt poststat = { + 0, + }; + int ret = 0; + xlator_t *this = NULL; + dict_t *xdata = NULL; + clnt_local_t *local = NULL; + + this = THIS; + + frame = myframe; + local = frame->local; + + if (-1 == req->rpc_status) { + rsp.op_ret = -1; + rsp.op_errno = ENOTCONN; + goto out; + } + + ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gfx_common_3iatt_rsp); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, EINVAL, PC_MSG_XDR_DECODING_FAILED, + "XDR decoding failed"); + rsp.op_ret = -1; + rsp.op_errno = EINVAL; + goto out; + } + + ret = client_post_common_3iatt(this, &rsp, &stbuf, &prestat, &poststat, + &xdata); + if (ret < 0) + goto out; +out: + if (rsp.op_ret == -1) { + gf_msg(this->name, GF_LOG_WARNING, gf_error_to_errno(rsp.op_errno), + PC_MSG_REMOTE_OP_FAILED, "remote operation failed"); + } else if (rsp.op_ret >= 0) { + if (local->attempt_reopen) + client_attempt_reopen(local->fd, this); + if (local->attempt_reopen_out) + client_attempt_reopen(local->fd_out, this); + } + CLIENT_STACK_UNWIND(copy_file_range, frame, rsp.op_ret, + gf_error_to_errno(rsp.op_errno), &stbuf, &prestat, + &poststat, xdata); + + if (xdata) + dict_unref(xdata); + + return 0; +} + int32_t client4_0_releasedir(call_frame_t *frame, xlator_t *this, void *data) { @@ -5846,6 +5912,80 @@ unwind: } int32_t +client4_0_copy_file_range(call_frame_t *frame, xlator_t *this, void *data) +{ + clnt_args_t *args = NULL; + clnt_conf_t *conf = NULL; + clnt_local_t *local = NULL; + gfx_copy_file_range_req req = { + { + 0, + }, + }; + int op_errno = ESTALE; + int ret = 0; + + if (!frame || !this || !data) + goto unwind; + + args = data; + conf = this->private; + + ret = client_pre_copy_file_range_v2(this, &req, args->fd, args->off_in, + args->fd_out, args->off_out, args->size, + args->flags, &args->xdata); + + if (ret) { + op_errno = -ret; + goto unwind; + } + + ret = client_fd_fop_prepare_local(frame, args->fd, req.fd_in); + if (ret) { + op_errno = -ret; + goto unwind; + } + + /* + * Since frame->local is allocated in above function call + * itself, better to use it (with the assumption that it + * has been allocated) directly instead of again calling + * client_fd_fop_prepare_local or modifying it, as doing + * so requires changes in other places as well. + */ + + local = frame->local; + local->fd_out = fd_ref(args->fd_out); + local->attempt_reopen_out = client_is_reopen_needed(args->fd_out, this, + req.fd_out); + + ret = client_submit_request( + this, &req, frame, conf->fops, GFS3_OP_COPY_FILE_RANGE, + client4_0_copy_file_range_cbk, NULL, NULL, 0, NULL, 0, NULL, + (xdrproc_t)xdr_gfx_copy_file_range_req); + if (ret) { + /* + * If the lower layers fail to submit a request, they'll also + * do the unwind for us (see rpc_clnt_submit), so don't unwind + * here in such cases. + */ + gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, + "failed to send the fop"); + } + + GF_FREE(req.xdata.pairs.pairs_val); + + return 0; + +unwind: + CLIENT_STACK_UNWIND(copy_file_range, frame, -1, op_errno, NULL, NULL, NULL, + NULL); + GF_FREE(req.xdata.pairs.pairs_val); + + return 0; +} + +int32_t client4_0_fsetattr(call_frame_t *frame, xlator_t *this, void *data) { clnt_args_t *args = NULL; @@ -6257,6 +6397,7 @@ rpc_clnt_procedure_t clnt4_0_fop_actors[GF_FOP_MAXVALUE] = { [GF_FOP_COMPOUND] = {"COMPOUND", client4_0_compound}, [GF_FOP_ICREATE] = {"ICREATE", client4_0_icreate}, [GF_FOP_NAMELINK] = {"NAMELINK", client4_0_namelink}, + [GF_FOP_COPY_FILE_RANGE] = {"COPY-FILE-RANGE", client4_0_copy_file_range}, }; rpc_clnt_prog_t clnt4_0_fop_prog = { diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 38723b43b45..c8e84f6e1b7 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -1129,6 +1129,41 @@ out: return 0; } +int32_t +client_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off_t off_in, fd_t *fd_out, off_t off_out, size_t len, + uint32_t flags, dict_t *xdata) +{ + int ret = -1; + clnt_conf_t *conf = NULL; + rpc_clnt_procedure_t *proc = NULL; + clnt_args_t args = { + 0, + }; + + conf = this->private; + if (!conf || !conf->fops) + goto out; + + args.fd = fd_in; + args.fd_out = fd_out; + args.offset = off_in; + args.off_out = off_out; + args.size = len; + args.flags = flags; + args.xdata = xdata; + + proc = &conf->fops->proctable[GF_FOP_COPY_FILE_RANGE]; + if (proc->fn) + ret = proc->fn(frame, this, &args); +out: + if (ret) + STACK_UNWIND_STRICT(copy_file_range, frame, -1, ENOTCONN, NULL, NULL, + NULL, NULL); + + return 0; +} + static gf_boolean_t is_client_rpc_init_command(dict_t *dict, xlator_t *this, char **value) { @@ -2898,6 +2933,7 @@ struct xlator_fops fops = { .icreate = client_icreate, .namelink = client_namelink, .put = client_put, + .copy_file_range = client_copy_file_range, }; struct xlator_dumpops dumpops = { diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index 5fc75a84628..71f84f3ca89 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -269,6 +269,7 @@ typedef struct client_local { loc_t loc; loc_t loc2; fd_t *fd; + fd_t *fd_out; /* used in copy_file_range */ clnt_fd_ctx_t *fdctx; uint32_t flags; struct iobref *iobref; @@ -280,6 +281,11 @@ typedef struct client_local { pthread_mutex_t mutex; char *name; gf_boolean_t attempt_reopen; + /* + * The below boolean variable is used + * only for copy_file_range fop + */ + gf_boolean_t attempt_reopen_out; /* required for compound fops */ compound_args_t *compound_args; unsigned int length; /* length of a compound fop */ @@ -289,7 +295,13 @@ typedef struct client_local { typedef struct client_args { loc_t *loc; + /* + * This is the source fd for copy_file_range and + * the default fd for any other fd based fop which + * requires only one fd (i.e. opetates on one fd) + */ fd_t *fd; + fd_t *fd_out; /* this is the destination fd for copy_file_range */ const char *linkname; struct iobref *iobref; struct iovec *vector; @@ -301,7 +313,17 @@ typedef struct client_args { struct gf_flock *flock; const char *volume; const char *basename; + off_t offset; + /* + * According to the man page of copy_file_range, + * the offsets for source and destination file + * are of type loff_t. But the type loff_t is + * linux specific and is actual a typedef of + * off64_t. + */ + off64_t off_in; /* used in copy_file_range for source fd */ + off64_t off_out; /* used in copy_file_range for dst fd */ int32_t mask; int32_t cmd; size_t size; diff --git a/xlators/protocol/server/src/server-common.c b/xlators/protocol/server/src/server-common.c index 25b36155065..0639ac3feb3 100644 --- a/xlators/protocol/server/src/server-common.c +++ b/xlators/protocol/server/src/server-common.c @@ -541,6 +541,16 @@ server4_post_common_3iatt(server_state_t *state, gfx_common_3iatt_rsp *rsp, } void +server4_post_common_3iatt_noinode(gfx_common_3iatt_rsp *rsp, struct iatt *stbuf, + struct iatt *prebuf_dst, + struct iatt *postbuf_dst) +{ + gfx_stat_from_iattx(&rsp->stat, stbuf); + gfx_stat_from_iattx(&rsp->preparent, prebuf_dst); + gfx_stat_from_iattx(&rsp->postparent, postbuf_dst); +} + +void server4_post_common_2iatt(gfx_common_2iatt_rsp *rsp, struct iatt *prebuf, struct iatt *postbuf) { diff --git a/xlators/protocol/server/src/server-common.h b/xlators/protocol/server/src/server-common.h index 2844ee95756..6200415e304 100644 --- a/xlators/protocol/server/src/server-common.h +++ b/xlators/protocol/server/src/server-common.h @@ -192,3 +192,8 @@ void server4_post_link(server_state_t *state, gfx_common_3iatt_rsp *rsp, inode_t *inode, struct iatt *stbuf, struct iatt *pre, struct iatt *post); + +void +server4_post_common_3iatt_noinode(gfx_common_3iatt_rsp *rsp, struct iatt *stbuf, + struct iatt *prebuf_dst, + struct iatt *postbuf_dst); diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c index c55a422679d..8ad2d8492ed 100644 --- a/xlators/protocol/server/src/server-helpers.c +++ b/xlators/protocol/server/src/server-helpers.c @@ -4948,6 +4948,8 @@ server_populate_compound_response_v2(xlator_t *this, gfx_compound_rsp *rsp, rsp_args->op_errno = gf_errno_to_error(this_args_cbk->op_errno); break; } + case GF_FOP_COPY_FILE_RANGE: + /* Not handling this fop. */ default: return ENOTSUP; } @@ -5380,6 +5382,12 @@ server_get_compound_resolve_v2(server_state_t *state, gfx_compound_req *req) memcpy(state->resolve.gfid, this_req.gfid, 16); break; } + case GF_FOP_COPY_FILE_RANGE: + /* + * Compound operations is not being used anymore and + * planned for subsequent removal. Hence not handling + * this fop here. + */ default: return ENOTSUP; } diff --git a/xlators/protocol/server/src/server-resolve.c b/xlators/protocol/server/src/server-resolve.c index 26260a5ee2c..ec768acba44 100644 --- a/xlators/protocol/server/src/server-resolve.c +++ b/xlators/protocol/server/src/server-resolve.c @@ -545,14 +545,39 @@ server_resolve_fd(call_frame_t *frame) return 0; } - state->fd = gf_fd_fdptr_get(serv_ctx->fdtable, fd_no); - + /* + * With copy_file_range, there will be 2 fds to resolve. + * This same function is called to resolve both the source + * fd and the destination fd. As of now, this function does + * not have any mechanism to distinguish between the 2 fds + * being resolved except for checking the value of state->fd. + * The assumption is that, if source fd the one which is + * being resolved here, then state->fd would be NULL. If it + * is not NULL, then it is the destination fd which is being + * resolved. + * This method (provided the above assumption is true) is + * to achieve the ability to distinguish between 2 fds with + * minimum changes being done to this function. If this way + * is not correct, then more changes might be needed. + */ if (!state->fd) { - gf_msg("", GF_LOG_INFO, EBADF, PS_MSG_FD_NOT_FOUND, - "fd not " - "found in context"); - resolve->op_ret = -1; - resolve->op_errno = EBADF; + state->fd = gf_fd_fdptr_get(serv_ctx->fdtable, fd_no); + if (!state->fd) { + gf_msg("", GF_LOG_INFO, EBADF, PS_MSG_FD_NOT_FOUND, + "fd not " + "found in context"); + resolve->op_ret = -1; + resolve->op_errno = EBADF; + } + } else { + state->fd_out = gf_fd_fdptr_get(serv_ctx->fdtable, fd_no); + if (!state->fd_out) { + gf_msg("", GF_LOG_INFO, EBADF, PS_MSG_FD_NOT_FOUND, + "fd not " + "found in context"); + resolve->op_ret = -1; + resolve->op_errno = EBADF; + } } server_resolve_all(frame); diff --git a/xlators/protocol/server/src/server-rpc-fops_v2.c b/xlators/protocol/server/src/server-rpc-fops_v2.c index c5a8e482621..16570294f6d 100644 --- a/xlators/protocol/server/src/server-rpc-fops_v2.c +++ b/xlators/protocol/server/src/server-rpc-fops_v2.c @@ -2259,6 +2259,64 @@ out: return 0; } +int +server4_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *stbuf, struct iatt *prebuf_dst, + struct iatt *postbuf_dst, dict_t *xdata) +{ + gfx_common_3iatt_rsp rsp = { + 0, + }; + server_state_t *state = NULL; + rpcsvc_request_t *req = NULL; + char in_gfid[GF_UUID_BUF_SIZE] = {0}; + char out_gfid[GF_UUID_BUF_SIZE] = {0}; + + dict_to_xdr(xdata, &rsp.xdata); + + if (op_ret < 0) { + state = CALL_STATE(frame); + + uuid_utoa_r(state->resolve.gfid, in_gfid); + uuid_utoa_r(state->resolve2.gfid, out_gfid); + + gf_msg(this->name, fop_log_level(GF_FOP_COPY_FILE_RANGE, op_errno), + op_errno, PS_MSG_WRITE_INFO, + "%" PRId64 ": COPY_FILE_RANGE %" PRId64 " (%s), %" PRId64 + " (%s) client: %s, " + "error-xlator: %s", + frame->root->unique, state->resolve.fd_no, in_gfid, + state->resolve2.fd_no, out_gfid, STACK_CLIENT_NAME(frame->root), + STACK_ERR_XL_NAME(frame->root)); + goto out; + } + + /* + * server4_post_common_3iatt (ex: used by server4_put_cbk and some + * other cbks) also performs inode linking along with copying of 3 + * iatt structures to the response. But, for copy_file_range, linking + * of inode is not needed. Therefore a new function is used to + * construct the response using 3 iatt structures. + * @stbuf: iatt or stat of the source file (or fd) + * @prebuf_dst: iatt or stat of destination file (or fd) before the fop + * @postbuf_dst: iatt or stat of destination file (or fd) after the fop + */ + server4_post_common_3iatt_noinode(&rsp, stbuf, prebuf_dst, postbuf_dst); + +out: + rsp.op_ret = op_ret; + rsp.op_errno = gf_errno_to_error(op_errno); + + req = frame->local; + server_submit_reply(frame, req, &rsp, NULL, 0, NULL, + (xdrproc_t)xdr_gfx_common_3iatt_rsp); + + GF_FREE(rsp.xdata.pairs.pairs_val); + + return 0; +} + /* Resume function section */ int @@ -3448,6 +3506,29 @@ err: } int +server4_copy_file_range_resume(call_frame_t *frame, xlator_t *bound_xl) +{ + server_state_t *state = NULL; + + state = CALL_STATE(frame); + + if (state->resolve.op_ret != 0) + goto err; + + STACK_WIND(frame, server4_copy_file_range_cbk, bound_xl, + bound_xl->fops->copy_file_range, state->fd, state->off_in, + state->fd_out, state->off_out, state->size, state->flags, + state->xdata); + + return 0; +err: + server4_copy_file_range_cbk(frame, NULL, frame->this, state->resolve.op_ret, + state->resolve.op_errno, NULL, NULL, NULL, + NULL); + return 0; +} + +int server4_0_stat(rpcsvc_request_t *req) { server_state_t *state = NULL; @@ -6104,6 +6185,53 @@ out: return ret; } +int +server4_0_copy_file_range(rpcsvc_request_t *req) +{ + server_state_t *state = NULL; + call_frame_t *frame = NULL; + gfx_copy_file_range_req args = { + { + 0, + }, + }; + ssize_t len = 0; + int ret = -1; + int op_errno = 0; + + if (!req) + return ret; + + ret = rpc_receive_common(req, &frame, &state, &len, &args, + xdr_gfx_copy_file_range_req, + GF_FOP_COPY_FILE_RANGE); + if (ret != 0) { + goto out; + } + + state->resolve.type = RESOLVE_MUST; + state->resolve.fd_no = args.fd_in; + state->resolve2.type = RESOLVE_MUST; /*making this resolve must */ + state->resolve2.fd_no = args.fd_out; + state->off_in = args.off_in; + state->off_out = args.off_out; + state->size = args.size; + state->flags = args.flag; + memcpy(state->resolve.gfid, args.gfid1, 16); + memcpy(state->resolve2.gfid, args.gfid2, 16); + + xdr_to_dict(&args.xdata, &state->xdata); + + ret = 0; + resolve_and_resume(frame, server4_copy_file_range_resume); +out: + + if (op_errno) + SERVER_REQ_SET_ERROR(req, ret); + + return ret; +} + rpcsvc_actor_t glusterfs4_0_fop_actors[] = { [GFS3_OP_NULL] = {"NULL", GFS3_OP_NULL, server_null, NULL, 0}, [GFS3_OP_STAT] = {"STAT", GFS3_OP_STAT, server4_0_stat, NULL, 0}, @@ -6195,6 +6323,8 @@ rpcsvc_actor_t glusterfs4_0_fop_actors[] = { DRC_NA}, [GFS3_OP_NAMELINK] = {"NAMELINK", GFS3_OP_NAMELINK, server4_0_namelink, NULL, 0, DRC_NA}, + [GFS3_OP_COPY_FILE_RANGE] = {"COPY-FILE-RANGE", GFS3_OP_COPY_FILE_RANGE, + server4_0_copy_file_range, NULL, 0, DRC_NA}, }; struct rpcsvc_program glusterfs4_0_fop_prog = { diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h index 2a77aba1f3c..bdf98c96f1c 100644 --- a/xlators/protocol/server/src/server.h +++ b/xlators/protocol/server/src/server.h @@ -180,7 +180,12 @@ struct _server_state { struct iatt stbuf; int valid; + /* + * this fd is used in all the fd based operations PLUS + * as a source fd in copy_file_range + */ fd_t *fd; + fd_t *fd_out; /* destination fd in copy_file_range */ dict_t *params; int32_t flags; int wbflags; @@ -191,6 +196,15 @@ struct _server_state { size_t size; off_t offset; + /* + * According to the man page of copy_file_range, + * the offsets for source and destination file + * are of type loff_t. But the type loff_t is + * linux specific and is actual a typedef of + * off64_t. + */ + off64_t off_in; /* source offset in copy_file_range */ + off64_t off_out; /* destination offset in copy_file_range */ mode_t mode; dev_t dev; size_t nr_count; diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index ed0516d4c4a..54fc1dc1195 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -3314,3 +3314,23 @@ unlock: out: return ret; } + +int +posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno) +{ + int ret = -1; + + if (inode->ia_type == IA_IFBLK || inode->ia_type == IA_IFCHR) { + *op_errno = EINVAL; + gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_INVALID_ARGUMENT, + "%s received on %s file (%s)", fop, + (inode->ia_type == IA_IFBLK) ? "block" : "char", + uuid_utoa(inode->gfid)); + goto out; + } + + ret = 0; + +out: + return ret; +} diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c index 9e2b37f582c..dafd1855ef9 100644 --- a/xlators/storage/posix/src/posix-inode-fd-ops.c +++ b/xlators/storage/posix/src/posix-inode-fd-ops.c @@ -1960,6 +1960,274 @@ out: } int32_t +posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = 0; + int _fd_in = -1; + int _fd_out = -1; + struct posix_private *priv = NULL; + struct posix_fd *pfd_in = NULL; + struct posix_fd *pfd_out = NULL; + struct iatt preop_dst = { + 0, + }; + struct iatt postop_dst = { + 0, + }; + struct iatt stbuf = { + 0, + }; + int ret = -1; + dict_t *rsp_xdata = NULL; + int is_append = 0; + gf_boolean_t locked = _gf_false; + gf_boolean_t update_atomic = _gf_false; + posix_inode_ctx_t *ctx = NULL; + + VALIDATE_OR_GOTO(frame, out); + VALIDATE_OR_GOTO(this, out); + VALIDATE_OR_GOTO(fd_in, out); + VALIDATE_OR_GOTO(fd_in->inode, out); + VALIDATE_OR_GOTO(fd_out, out); + VALIDATE_OR_GOTO(fd_out->inode, out); + VALIDATE_OR_GOTO(this->private, out); + + priv = this->private; + + VALIDATE_OR_GOTO(priv, out); + DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + + if (posix_check_dev_file(this, fd_in->inode, "copy_file_range", &op_errno)) + goto out; + + if (posix_check_dev_file(this, fd_out->inode, "copy_file_range", &op_errno)) + goto out; + + ret = posix_fd_ctx_get(fd_in, this, &pfd_in, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd_in); + goto out; + } + + _fd_in = pfd_in->fd; + + ret = posix_fd_ctx_get(fd_out, this, &pfd_out, &op_errno); + if (ret < 0) { + gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, + "pfd is NULL from fd=%p", fd_out); + goto out; + } + + _fd_out = pfd_out->fd; + + /* + * Currently, the internal write is checked via xdata which + * is set by some xlator above. It could be due to several of + * the reasons such as healing or a snapshot operation happening + * using copy_file_range. As of now (i.e. writing the patch with + * this change) none of the xlators above posix are using the + * internal write with copy_file_range. In future it might + * change. Atleast as of now the hope is that, when that happens + * this functon or fop does not require additional changes for + * handling internal writes. + */ + ret = posix_check_internal_writes(this, fd_out, _fd_out, xdata); + if (ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "possible overwrite from internal client, fd=%p", fd_out); + op_ret = -1; + op_errno = EBUSY; + goto out; + } + + if (xdata) { + if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) + update_atomic = _gf_true; + } + + /* + * The update_atomic option is to instruct posix to do prestat, + * write and poststat atomically. This is to prevent any modification to + * ia_size and ia_blocks until poststat and the diff in their values + * between pre and poststat could be of use for some translators. + * This is similar to the atomic write operation. atmoic write is + * (i.e. prestat + write + poststat) used by shard as of now. In case, + * some xlator needs copy_file_range to be atomic from prestat and postat + * prespective (i.e. prestat + copy_file_range + poststat) then it has + * to send "GLUSTERFS_WRITE_UPDATE_ATOMIC" key in xdata. + */ + + op_ret = posix_inode_ctx_get_all(fd_out->inode, this, &ctx); + if (op_ret < 0) { + op_errno = ENOMEM; + goto out; + } + + if (update_atomic) { + ret = pthread_mutex_lock(&ctx->write_atomic_lock); + if (!ret) + locked = _gf_true; + else { + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MUTEX_FAILED, + "failed to hold write atomic lock on %s", + uuid_utoa(fd_out->inode->gfid)); + goto out; + } + } + + op_ret = posix_fdstat(this, fd_out->inode, _fd_out, &preop_dst); + if (op_ret == -1) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "pre-operation fstat failed on fd=%p", fd_out); + goto out; + } + + /* + * Since, only the destination file (fd_out) is undergoing + * modification, the write related tests are done on that. + * i.e. this is treater similar to as if the destination file + * undergoing write fop from maintenance perspective. + */ + if (xdata) { + op_ret = posix_cs_maintenance(this, fd_out, NULL, &_fd_out, &preop_dst, + NULL, xdata, &rsp_xdata, _gf_false); + if (op_ret < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, 0, + "file state check failed, fd %p", fd_out); + op_errno = EIO; + goto out; + } + } + + /* + * NOTE: This is just doing a single execution of copy_file_range + * system call. If the returned value of this system call is less + * than len, then should we keep doing it in a for loop until the + * copy_file_range of all the len bytes is done? + * Check the example program provided in the man page of + * copy_file_range. + * If so, then a separate variables for both off_in and off_out + * should be used which are initialized to off_in and off_out + * that this function call receives, but then advanced by the + * value returned by sys_copy_file_range and then use that as + * off_in and off_out for next instance of copy_file_range execution. + */ + op_ret = sys_copy_file_range(_fd_in, &off_in, _fd_out, &off_out, len, + flags); + + if (op_ret < 0) { + op_errno = -op_ret; + op_ret = -1; + gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED, + "copy_file_range failed: fd_in: %p (gfid: %s) ," + " fd_out %p (gfid:%s)", + fd_in, uuid_utoa(fd_in->inode->gfid), fd_out, + uuid_utoa(fd_out->inode->gfid)); + goto out; + } + + /* + * Let this be as it is for now. This function collects + * infomration such as open fd count etc. So, even though + * is_append does not apply to copy_file_range, for now, + * allowing it to be recorded in the dict as _gf_false. + */ + rsp_xdata = _fill_writev_xdata(fd_out, xdata, this, is_append); + + /* copy_file_range successful, we also need to get the stat of + * the file we wrote to (i.e. destination file or fd_out). + */ + ret = posix_fdstat(this, fd_out->inode, _fd_out, &postop_dst); + if (ret == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd_out); + goto out; + } + + /* + * Also perform the stat on the source fd (i.e. fd_in). For now, + * allowing it to be done within the locked region if the request + * is for atomic operation (and update) of copy_file_range. + */ + ret = posix_fdstat(this, fd_in->inode, _fd_in, &stbuf); + if (ret == -1) { + op_ret = -1; + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, + "post-operation fstat failed on fd=%p", fd_in); + goto out; + } + + /* + * The core logic of what time attributes are to be updated + * on a fop is decided at client side xlator utime. + * All the remaining fops call posix_set_ctime function + * to update the {a,m,c}time. But, for all the other fops, + * the operation is happening on only one file (or inode). + * But here, there are 2 fds (source and destination). Hence + * the new function below to update the appropriate times for + * both the source and the destination file. + * For the source file, if at all anything has to be updated, + * it would be atime (as that file is only read, not updated). + * For the destination file, the attributes that require the + * modification would be mtime and ctime. + * What times have to be changed is actually determined by + * utime xlator. But, all of them would be in frame->root->flags. + * So, currently posix assumes that, the atime flag is for + * the source file and the other 2 flags are for the destination + * file. Since, the assumption is rigid (i.e. atime for source + * and {m,c}time for destination), the below function is called + * posix_set_ctime_cfr (cfr standing for copy_file_range). + * FUTURE TODO: + * In future, some other functionality or fop might operate + * simultaneously on 2 files. Then, depending upon what that new + * fop does or what are its requirements, the below function might + * require changes to become generic for consumption in case of + * simultaneous operations on 2 files. + */ + posix_set_ctime_cfr(frame, this, NULL, pfd_in->fd, fd_in->inode, &stbuf, + NULL, pfd_out->fd, fd_out->inode, &postop_dst); + + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + /* + * Record copy_file_range in priv->write_value for now. + * If not needed, remove below section of code along with + * this comment (or add comment to explain why it is not + * needed). + */ + LOCK(&priv->lock); + { + priv->write_value += op_ret; + } + UNLOCK(&priv->lock); + +out: + + if (locked) { + pthread_mutex_unlock(&ctx->write_atomic_lock); + locked = _gf_false; + } + + STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, &stbuf, + &preop_dst, &postop_dst, rsp_xdata); + + if (rsp_xdata) + dict_unref(rsp_xdata); + return 0; +} + +int32_t posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { char *real_path = NULL; diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h index 62af32ac8fe..928723db8f9 100644 --- a/xlators/storage/posix/src/posix-messages.h +++ b/xlators/storage/posix/src/posix-messages.h @@ -67,6 +67,7 @@ GLFS_MSGID(POSIX, P_MSG_XATTR_FAILED, P_MSG_NULL_GFID, P_MSG_FCNTL_FAILED, P_MSG_ANCESTORY_FAILED, P_MSG_DISK_SPACE_CHECK_FAILED, P_MSG_FALLOCATE_FAILED, P_MSG_STOREMDATA_FAILED, P_MSG_FETCHMDATA_FAILED, P_MSG_GETMDATA_FAILED, - P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE); + P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE, P_MSG_MUTEX_FAILED, + P_MSG_COPY_FILE_RANGE_FAILED); #endif /* !_GLUSTERD_MESSAGES_H_ */ diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c index 26fae2019b5..62669a0b83f 100644 --- a/xlators/storage/posix/src/posix-metadata.c +++ b/xlators/storage/posix/src/posix-metadata.c @@ -663,3 +663,81 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, out: return; } + +void +posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, + const char *real_path_in, int fd_in, inode_t *inode_in, + struct iatt *stbuf_in, const char *real_path_out, + int fd_out, inode_t *inode_out, struct iatt *stbuf_out) +{ + posix_mdata_flag_t flag = { + 0, + }; + posix_mdata_flag_t flag_dup = { + 0, + }; + int ret = 0; + struct posix_private *priv = NULL; + + priv = this->private; + + if (priv->ctime) { + (void)posix_get_mdata_flag(frame->root->flags, &flag); + if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { + goto out; + } + + if (frame->root->ctime.tv_sec == 0) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed, No ctime : in: %s gfid_in:%s " + "out: %s gfid_out:%s", + real_path_in, + inode_in ? uuid_utoa(inode_in->gfid) : "No inode", + real_path_out, + inode_out ? uuid_utoa(inode_out->gfid) : "No inode"); + goto out; + } + + flag_dup = flag; + + /* + * For the destination file, no need to update atime. + * It got modified. Hence the things that need to be + * changed are mtime and ctime (provided the utime + * xlator from the client has set those flags, which + * are just copied to flag_dup). + */ + if (flag.atime) + flag_dup.atime = 0; + + ret = posix_set_mdata_xattr(this, real_path_out, fd_out, inode_out, + &frame->root->ctime, stbuf_out, &flag_dup, + _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path_out, + inode_out ? uuid_utoa(inode_out->gfid) : "No inode"); + } + + /* + * For the source file, no need to change the mtime and ctime. + * For source file, it is only read operation. So, if at all + * anything needs to be updated, it is only the atime. + */ + if (flag.atime) + flag_dup.atime = flag.atime; + flag_dup.mtime = 0; + flag_dup.ctime = 0; + + ret = posix_set_mdata_xattr(this, real_path_in, fd_out, inode_out, + &frame->root->ctime, stbuf_out, &flag_dup, + _gf_false); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, + "posix set mdata failed on file: %s gfid:%s", real_path_in, + inode_in ? uuid_utoa(inode_in->gfid) : "No inode"); + } + } +out: + return; +} diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h index e1b549d55a1..3416148ea97 100644 --- a/xlators/storage/posix/src/posix-metadata.h +++ b/xlators/storage/posix/src/posix-metadata.h @@ -48,5 +48,10 @@ void posix_set_parent_ctime(call_frame_t *frame, xlator_t *this, const char *real_path, int fd, inode_t *inode, struct iatt *stbuf); +void +posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, + const char *real_path_in, int fd_in, inode_t *inode_in, + struct iatt *stbuf_in, const char *read_path_put, + int fd_out, inode_t *inode_out, struct iatt *stbuf_out); #endif /* _POSIX_METADATA_H */ diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index d6a20135f96..42b965434b9 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -76,6 +76,7 @@ struct xlator_fops fops = { .seek = posix_seek, .lease = posix_lease, .put = posix_put, + .copy_file_range = posix_copy_file_range, }; struct xlator_cbks cbks = { diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index 1f1d4fc2774..a1ec996f4b2 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -638,6 +638,11 @@ posix_put(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata); int32_t +posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, + off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len, + uint32_t flags, dict_t *xdata); + +int32_t posix_set_mode_in_dict(dict_t *in_dict, dict_t *out_dict, struct iatt *in_stbuf); @@ -656,5 +661,7 @@ int posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd, struct iatt *buf, const char *realpath, dict_t *xattr_req, dict_t **xattr_rsp, gf_boolean_t ignore_failure); +int +posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno); #endif /* _POSIX_H */ |