summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--api/src/Makefile.am2
-rw-r--r--api/src/gfapi.aliases1
-rw-r--r--api/src/gfapi.map1
-rw-r--r--api/src/glfs-fops.c155
-rw-r--r--api/src/glfs.h39
-rw-r--r--configure.ac19
-rw-r--r--glusterfs-api.pc.in2
-rw-r--r--libgfchangelog.pc.in2
-rw-r--r--libglusterfs/src/Makefile.am2
-rw-r--r--libglusterfs/src/call-stub.c58
-rw-r--r--libglusterfs/src/default-args.c42
-rw-r--r--libglusterfs/src/defaults-tmpl.c1
-rwxr-xr-xlibglusterfs/src/generator.py13
-rw-r--r--libglusterfs/src/globals.c1
-rw-r--r--libglusterfs/src/glusterfs/call-stub.h14
-rw-r--r--libglusterfs/src/glusterfs/compat.h19
-rw-r--r--libglusterfs/src/glusterfs/default-args.h11
-rw-r--r--libglusterfs/src/glusterfs/defaults.h42
-rw-r--r--libglusterfs/src/glusterfs/syncop.h24
-rw-r--r--libglusterfs/src/glusterfs/syscall.h29
-rw-r--r--libglusterfs/src/glusterfs/xlator.h13
-rw-r--r--libglusterfs/src/libglusterfs.sym10
-rw-r--r--libglusterfs/src/syncop.c63
-rw-r--r--libglusterfs/src/syscall.c32
-rw-r--r--libglusterfs/src/xlator.c1
-rw-r--r--rpc/rpc-lib/src/protocol-common.h1
-rw-r--r--rpc/xdr/src/glusterfs-fops.x1
-rw-r--r--rpc/xdr/src/glusterfs4-xdr.x13
-rw-r--r--rpc/xdr/src/libgfxdr.sym1
-rw-r--r--tests/basic/gfapi/gfapi-copy-file-range.t80
-rw-r--r--tests/basic/gfapi/glfs-copy-file-range.c177
-rw-r--r--xlators/debug/io-stats/src/io-stats.c27
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am2
-rwxr-xr-xxlators/features/utime/src/utime-gen-fops-c.py18
-rwxr-xr-xxlators/features/utime/src/utime-gen-fops-h.py2
-rw-r--r--xlators/features/utime/src/utime-helpers.c9
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c114
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h36
-rw-r--r--xlators/protocol/client/src/client-common.c32
-rw-r--r--xlators/protocol/client/src/client-common.h6
-rw-r--r--xlators/protocol/client/src/client-helpers.c28
-rw-r--r--xlators/protocol/client/src/client-rpc-fops_v2.c141
-rw-r--r--xlators/protocol/client/src/client.c36
-rw-r--r--xlators/protocol/client/src/client.h22
-rw-r--r--xlators/protocol/server/src/server-common.c10
-rw-r--r--xlators/protocol/server/src/server-common.h5
-rw-r--r--xlators/protocol/server/src/server-helpers.c8
-rw-r--r--xlators/protocol/server/src/server-resolve.c39
-rw-r--r--xlators/protocol/server/src/server-rpc-fops_v2.c130
-rw-r--r--xlators/protocol/server/src/server.h14
-rw-r--r--xlators/storage/posix/src/posix-helpers.c20
-rw-r--r--xlators/storage/posix/src/posix-inode-fd-ops.c268
-rw-r--r--xlators/storage/posix/src/posix-messages.h3
-rw-r--r--xlators/storage/posix/src/posix-metadata.c78
-rw-r--r--xlators/storage/posix/src/posix-metadata.h5
-rw-r--r--xlators/storage/posix/src/posix.c1
-rw-r--r--xlators/storage/posix/src/posix.h7
57 files changed, 1911 insertions, 19 deletions
diff --git a/api/src/Makefile.am b/api/src/Makefile.am
index 6ed30bc99f6..7f9a7d17b35 100644
--- a/api/src/Makefile.am
+++ b/api/src/Makefile.am
@@ -19,7 +19,7 @@ libgfapi_la_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/rpc/xdr/src \
-I$(top_builddir)/rpc/xdr/src \
-DDATADIR=\"$(localstatedir)\" \
- -D__USE_FILE_OFFSET64
+ -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64
AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases
index a71422c8577..0e52c38d346 100644
--- a/api/src/gfapi.aliases
+++ b/api/src/gfapi.aliases
@@ -188,3 +188,4 @@ _pub_glfs_ftruncate _glfs_ftruncate$GFAPI_future
_pub_glfs_ftruncate_async _glfs_ftruncate_async$GFAPI_future
_pub_glfs_discard_async _glfs_discard_async$GFAPI_future
_pub_glfs_zerofill_async _glfs_zerofill_async$GFAPI_future
+_pub_glfs_copy_file_range _glfs_copy_file_range$GFAPI_future \ No newline at end of file
diff --git a/api/src/gfapi.map b/api/src/gfapi.map
index c47323781fb..1be2953ce9a 100644
--- a/api/src/gfapi.map
+++ b/api/src/gfapi.map
@@ -255,5 +255,6 @@ GFAPI_future {
glfs_ftruncate_async;
glfs_discard_async;
glfs_zerofill_async;
+ glfs_copy_file_range;
} GFAPI_4.1.6;
diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
index 2a1cc73ccee..f59990aed1f 100644
--- a/api/src/glfs-fops.c
+++ b/api/src/glfs-fops.c
@@ -1333,6 +1333,161 @@ invalid_fs:
}
ssize_t
+pub_glfs_copy_file_range(struct glfs_fd *glfd_in, off64_t *off_in,
+ struct glfs_fd *glfd_out, off64_t *off_out, size_t len,
+ unsigned int flags, struct stat *statbuf,
+ struct stat *prestat, struct stat *poststat)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ fd_t *fd_in = NULL;
+ fd_t *fd_out = NULL;
+ struct iatt preiatt =
+ {
+ 0,
+ },
+ iattbuf =
+ {
+ 0,
+ },
+ postiatt = {
+ 0,
+ };
+ dict_t *fop_attr = NULL;
+ off64_t pos_in;
+ off64_t pos_out;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd_in, invalid_fs);
+ __GLFS_ENTRY_VALIDATE_FD(glfd_out, invalid_fs);
+
+ GF_REF_GET(glfd_in);
+ GF_REF_GET(glfd_out);
+
+ if (glfd_in->fs != glfd_out->fs) {
+ ret = -1;
+ errno = EXDEV;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(glfd_in->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd_in = glfs_resolve_fd(glfd_in->fs, subvol, glfd_in);
+ if (!fd_in) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ fd_out = glfs_resolve_fd(glfd_out->fs, subvol, glfd_out);
+ if (!fd_out) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ /*
+ * This is based on how the vfs layer in the kernel handles
+ * copy_file_range call. Upon receiving it follows the
+ * below method to consider the offset.
+ * if (off_in != NULL)
+ * use the value off_in to perform the op
+ * else if off_in == NULL
+ * use the current file offset position to perform the op
+ *
+ * For gfapi, glfd->offset is used. For a freshly opened
+ * fd, the offset is set to 0.
+ */
+ if (off_in)
+ pos_in = *off_in;
+ else
+ pos_in = glfd_in->offset;
+
+ if (off_out)
+ pos_out = *off_out;
+ else
+ pos_out = glfd_out->offset;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_copy_file_range(subvol, fd_in, pos_in, fd_out, pos_out, len,
+ flags, &iattbuf, &preiatt, &postiatt, fop_attr,
+ NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret >= 0) {
+ pos_in += ret;
+ pos_out += ret;
+
+ if (off_in)
+ *off_in = pos_in;
+ if (off_out)
+ *off_out = pos_out;
+
+ if (statbuf)
+ glfs_iatt_to_stat(glfd_in->fs, &iattbuf, statbuf);
+ if (prestat)
+ glfs_iatt_to_stat(glfd_in->fs, &preiatt, prestat);
+ if (poststat)
+ glfs_iatt_to_stat(glfd_in->fs, &postiatt, poststat);
+ }
+
+ if (ret <= 0)
+ goto out;
+
+ /*
+ * If *off_in is NULL, then there is no offset info that can
+ * obtained from the input argument. Hence follow below method.
+ * If *off_in is NULL, then
+ * glfd->offset = offset + ret;
+ * else
+ * do nothing.
+ *
+ * According to the man page of copy_file_range, if off_in is
+ * NULL, then the offset of the source file is advanced by
+ * the return value of the fop. The same applies to off_out as
+ * well. Otherwise, if *off_in is not NULL, then the offset
+ * is not advanced by the filesystem. The entity which sends
+ * the copy_file_range call is supposed to advance the offset
+ * value in its buffer (pointed to by *off_in or *off_out)
+ * by the return value of copy_file_range.
+ */
+ if (!off_in)
+ glfd_in->offset += ret;
+
+ if (!off_out)
+ glfd_out->offset += ret;
+
+out:
+ if (fd_in)
+ fd_unref(fd_in);
+ if (fd_out)
+ fd_unref(fd_out);
+ if (glfd_in)
+ GF_REF_PUT(glfd_in);
+ if (glfd_out)
+ GF_REF_PUT(glfd_out);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd_in->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_copy_file_range, future);
+
+ssize_t
pub_glfs_pwritev(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
off_t offset, int flags)
{
diff --git a/api/src/glfs.h b/api/src/glfs.h
index cd642a5ea20..160a784222f 100644
--- a/api/src/glfs.h
+++ b/api/src/glfs.h
@@ -42,6 +42,38 @@
#include <sys/statvfs.h>
#include <inttypes.h>
+/*
+ * For off64_t to be defined, we need both
+ * __USE_LARGEFILE64 to be true and __off64_t_defnined to be
+ * false. But, making __USE_LARGEFILE64 true causes other issues
+ * such as redinition of stat and fstat to stat64 and fstat64
+ * respectively which again causes compilation issues.
+ * Without off64_t being defined, this will not compile as
+ * copy_file_range uses off64_t. Hence define it here. First
+ * check whether __off64_t_defined is true or not. <unistd.h>
+ * sets that flag when it defines off64_t. If __off64_t_defined
+ * is false and __USE_FILE_OFFSET64 is true, then go on to define
+ * off64_t using __off64_t.
+ */
+#ifndef GF_BSD_HOST_OS
+#if defined(__USE_FILE_OFFSET64) && !defined(__off64_t_defined)
+typedef __off64_t off64_t;
+#endif /* defined(__USE_FILE_OFFSET64) && !defined(__off64_t_defined) */
+#else
+#include <stdio.h>
+#ifndef _OFF64_T_DECLARED
+/*
+ * Including <stdio.h> (done above) should actually define
+ * _OFF64_T_DECLARED with off64_t data type being available
+ * for consumption. But, off64_t data type is not recognizable
+ * for FreeBSD versions less than 11. Hence, int64_t is typedefed
+ * to off64_t.
+ */
+#define _OFF64_T_DECLARED
+typedef int64_t off64_t;
+#endif /* _OFF64_T_DECLARED */
+#endif /* GF_BSD_HOST_OS */
+
#if defined(HAVE_SYS_ACL_H) || (defined(USE_POSIX_ACLS) && USE_POSIX_ACLS)
#include <sys/acl.h>
#else
@@ -594,6 +626,13 @@ off_t
glfs_lseek(glfs_fd_t *fd, off_t offset, int whence) __THROW
GFAPI_PUBLIC(glfs_lseek, 3.4.0);
+ssize_t
+glfs_copy_file_range(struct glfs_fd *glfd_in, off64_t *off_in,
+ struct glfs_fd *glfd_out, off64_t *off_out, size_t len,
+ unsigned int flags, struct stat *statbuf,
+ struct stat *prestat, struct stat *poststat) __THROW
+ GFAPI_PUBLIC(glfs_copy_file_range, future);
+
int
glfs_truncate(glfs_t *fs, const char *path, off_t length) __THROW
GFAPI_PUBLIC(glfs_truncate, 3.7.15);
diff --git a/configure.ac b/configure.ac
index 3ddb6f073a5..d3c8f8b9514 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1018,6 +1018,25 @@ if test "x${have_posix_fallocate}" = "xyes"; then
AC_DEFINE(HAVE_POSIX_FALLOCATE, 1, [define if posix_fallocate exists])
fi
+# On fedora-29, copy_file_range syscall and the libc API both are present.
+# Whereas, on some machines such as centos-7, RHEL-7, the API is not there.
+# Only the system call is present. So, this change is to determine whether
+# the API is present or not. If not, then check whether the system call is
+# present or not. Accordingly sys_copy_file_range function will first call
+# the API if it is there. Otherwise it will call syscall(SYS_copy_file_range).
+AC_CHECK_FUNC([copy_file_range], [have_copy_file_range=yes])
+if test "x${have_copy_file_range}" = "xyes"; then
+ AC_DEFINE(HAVE_COPY_FILE_RANGE, 1, [define if copy_file_range exists])
+else
+ OLD_CFLAGS=${CFLAGS}
+ CFLAGS="-D_GNU_SOURCE"
+ AC_CHECK_DECL([SYS_copy_file_range], , , [#include <sys/syscall.h>])
+ if test "x${ac_cv_have_decl_SYS_copy_file_range}" = "xyes"; then
+ AC_DEFINE(HAVE_COPY_FILE_RANGE_SYS, 1, [define if SYS_copy_file_range is available])
+ fi
+ CFLAGS=${OLD_CFLAGS}
+fi
+
BUILD_NANOSECOND_TIMESTAMPS=no
AC_CHECK_FUNC([utimensat], [have_utimensat=yes])
if test "x${have_utimensat}" = "xyes"; then
diff --git a/glusterfs-api.pc.in b/glusterfs-api.pc.in
index 6af4e108f7f..4a2edb7bf07 100644
--- a/glusterfs-api.pc.in
+++ b/glusterfs-api.pc.in
@@ -9,4 +9,4 @@ Description: GlusterFS API
Version: @GFAPI_VERSION@
Requires: @PKGCONFIG_UUID@
Libs: -L${libdir} @GFAPI_LIBS@ -lgfapi -lglusterfs -lgfrpc -lgfxdr
-Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -DUSE_POSIX_ACLS=@USE_POSIX_ACLS@
+Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 -DUSE_POSIX_ACLS=@USE_POSIX_ACLS@
diff --git a/libgfchangelog.pc.in b/libgfchangelog.pc.in
index e2ff1fb6214..79eac2ad2d3 100644
--- a/libgfchangelog.pc.in
+++ b/libgfchangelog.pc.in
@@ -9,4 +9,4 @@ Description: GlusterFS Changelog Consumer Library
Version: @LIBGFCHANGELOG_VERSION@
Requires: @PKGCONFIG_UUID@
Libs: -L${libdir} -lgfchangelog -lglusterfs
-Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64
+Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am
index 1d06f1586a9..970f4b74978 100644
--- a/libglusterfs/src/Makefile.am
+++ b/libglusterfs/src/Makefile.am
@@ -6,7 +6,7 @@ libglusterfs_la_CFLAGS = $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
libglusterfs_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
-DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
-DXLATORPARENTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)\" \
- -DXXH_NAMESPACE=GF_ \
+ -DXXH_NAMESPACE=GF_ -D__USE_LARGEFILE64 \
-I$(top_srcdir)/rpc/xdr/src/ -I$(top_builddir)/rpc/xdr/src/ \
-I$(top_srcdir)/rpc/rpc-lib/src/ -I$(CONTRIBDIR)/rbtree \
-I$(CONTRIBDIR)/libexecinfo ${ARGP_STANDALONE_CPPFLAGS} \
diff --git a/libglusterfs/src/call-stub.c b/libglusterfs/src/call-stub.c
index 96454dfaeb5..886dfa52ccc 100644
--- a/libglusterfs/src/call-stub.c
+++ b/libglusterfs/src/call-stub.c
@@ -1818,6 +1818,51 @@ out:
}
call_stub_t *
+fop_copy_file_range_stub(call_frame_t *frame, fop_copy_file_range_t fn,
+ fd_t *fd_in, off64_t off_in, fd_t *fd_out,
+ off64_t off_out, size_t len, uint32_t flags,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
+
+ stub = stub_new(frame, 1, GF_FOP_COPY_FILE_RANGE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+
+ stub->fn.copy_file_range = fn;
+
+ args_copy_file_range_store(&stub->args, fd_in, off_in, fd_out, off_out, len,
+ flags, xdata);
+
+out:
+ return stub;
+}
+
+call_stub_t *
+fop_copy_file_range_cbk_stub(call_frame_t *frame, fop_copy_file_range_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
+
+ stub = stub_new(frame, 0, GF_FOP_COPY_FILE_RANGE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+
+ stub->fn_cbk.copy_file_range = fn;
+ args_copy_file_range_cbk_store(&stub->args_cbk, op_ret, op_errno, stbuf,
+ prebuf_dst, postbuf_dst, xdata);
+
+out:
+ return stub;
+}
+
+call_stub_t *
fop_put_stub(call_frame_t *frame, fop_put_t fn, loc_t *loc, mode_t mode,
mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,
off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata)
@@ -2213,6 +2258,13 @@ call_resume_wind(call_stub_t *stub)
stub->args.iobref, stub->args.xattr, stub->args.xdata);
break;
+ case GF_FOP_COPY_FILE_RANGE:
+ stub->fn.copy_file_range(
+ stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.off_in, stub->args.fd_dst, stub->args.off_out,
+ stub->args.size, stub->args.flags, stub->args.xdata);
+ break;
+
default:
gf_msg_callingfn("call-stub", GF_LOG_ERROR, EINVAL,
LG_MSG_INVALID_ENTRY,
@@ -2439,6 +2491,12 @@ call_resume_unwind(call_stub_t *stub)
stub->args_cbk.xdata);
break;
+ case GF_FOP_COPY_FILE_RANGE:
+ STUB_UNWIND(stub, copy_file_range, &stub->args_cbk.stat,
+ &stub->args_cbk.prestat, &stub->args_cbk.poststat,
+ stub->args_cbk.xdata);
+ break;
+
default:
gf_msg_callingfn("call-stub", GF_LOG_ERROR, EINVAL,
LG_MSG_INVALID_ENTRY,
diff --git a/libglusterfs/src/default-args.c b/libglusterfs/src/default-args.c
index 479974e1637..cfceabd1f46 100644
--- a/libglusterfs/src/default-args.c
+++ b/libglusterfs/src/default-args.c
@@ -1541,6 +1541,48 @@ args_namelink_store(default_args_t *args, loc_t *loc, dict_t *xdata)
return 0;
}
+int
+args_copy_file_range_store(default_args_t *args, fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, dict_t *xdata)
+{
+ if (fd_in)
+ args->fd = fd_ref(fd_in);
+ if (fd_out)
+ args->fd_dst = fd_ref(fd_out);
+ args->size = len;
+ args->off_in = off_in;
+ args->off_out = off_out;
+ args->flags = flags;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_copy_file_range_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret >= 0) {
+ if (postbuf_dst)
+ args->poststat = *postbuf_dst;
+ if (prebuf_dst)
+ args->prestat = *prebuf_dst;
+ if (stbuf)
+ args->stat = *stbuf;
+ }
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
void
args_cbk_wipe(default_args_cbk_t *args_cbk)
{
diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c
index 97de8193dcb..5bf64e8c6c6 100644
--- a/libglusterfs/src/defaults-tmpl.c
+++ b/libglusterfs/src/defaults-tmpl.c
@@ -84,6 +84,7 @@ struct xlator_fops _default_fops = {
.put = default_put,
.icreate = default_icreate,
.namelink = default_namelink,
+ .copy_file_range = default_copy_file_range,
};
struct xlator_fops *default_fops = &_default_fops;
diff --git a/libglusterfs/src/generator.py b/libglusterfs/src/generator.py
index c17d450502d..5b7aa4764a0 100755
--- a/libglusterfs/src/generator.py
+++ b/libglusterfs/src/generator.py
@@ -599,6 +599,19 @@ ops['namelink'] = (
('cbk-arg', 'xdata', 'dict_t *'),
)
+ops['copy_file_range'] = (
+ ('fop-arg', 'fd_in', 'fd_t *'),
+ ('fop-arg', 'off_in', 'off64_t '),
+ ('fop-arg', 'fd_out', 'fd_t *'),
+ ('fop-arg', 'off_out', 'off64_t '),
+ ('fop-arg', 'len', 'size_t'),
+ ('fop-arg', 'flags', 'uint32_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'stbuf', 'struct iatt *'),
+ ('cbk-arg', 'prebuf_dst', 'struct iatt *'),
+ ('cbk-arg', 'postbuf_dst', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
#####################################################################
xlator_cbks['forget'] = (
('fn-arg', 'this', 'xlator_t *'),
diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c
index 35482545ab3..4fec0638926 100644
--- a/libglusterfs/src/globals.c
+++ b/libglusterfs/src/globals.c
@@ -77,6 +77,7 @@ const char *gf_fop_list[GF_FOP_MAXVALUE] = {
[GF_FOP_PUT] = "PUT",
[GF_FOP_ICREATE] = "ICREATE",
[GF_FOP_NAMELINK] = "NAMELINK",
+ [GF_FOP_COPY_FILE_RANGE] = "COPY_FILE_RANGE",
};
const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = {
diff --git a/libglusterfs/src/glusterfs/call-stub.h b/libglusterfs/src/glusterfs/call-stub.h
index bfed0fbc14a..c01c935e73d 100644
--- a/libglusterfs/src/glusterfs/call-stub.h
+++ b/libglusterfs/src/glusterfs/call-stub.h
@@ -81,6 +81,7 @@ typedef struct _call_stub {
fop_put_t put;
fop_icreate_t icreate;
fop_namelink_t namelink;
+ fop_copy_file_range_t copy_file_range;
} fn;
union {
@@ -136,6 +137,7 @@ typedef struct _call_stub {
fop_put_cbk_t put;
fop_icreate_cbk_t icreate;
fop_namelink_cbk_t namelink;
+ fop_copy_file_range_cbk_t copy_file_range;
} fn_cbk;
default_args_t args;
@@ -589,6 +591,18 @@ fop_namelink_cbk_stub(call_frame_t *frame, fop_namelink_cbk_t fn,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata);
+call_stub_t *
+fop_copy_file_range_stub(call_frame_t *frame, fop_copy_file_range_t fn,
+ fd_t *fd_in, off64_t off_in, fd_t *fd_out,
+ off64_t off_out, size_t len, uint32_t flags,
+ dict_t *xdata);
+
+call_stub_t *
+fop_copy_file_range_cbk_stub(call_frame_t *frame, fop_copy_file_range_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
void
call_resume(call_stub_t *stub);
void
diff --git a/libglusterfs/src/glusterfs/compat.h b/libglusterfs/src/glusterfs/compat.h
index 38c07b5ae7c..9374b79f9af 100644
--- a/libglusterfs/src/glusterfs/compat.h
+++ b/libglusterfs/src/glusterfs/compat.h
@@ -116,6 +116,25 @@
#include <limits.h>
#include <libgen.h>
+/*
+ * This is where things like off64_t are defined.
+ * So include it before declaring _OFF64_T_DECLARED.
+ * If the freebsd version has support for off64_t
+ * including stdio.h should be sufficient.
+ */
+#include <stdio.h>
+
+#ifndef _OFF64_T_DECLARED
+/*
+ * Including <stdio.h> (done above) should actually define
+ * _OFF64_T_DECLARED with off64_t data type being available
+ * for consumption. But, off64_t data type is not recognizable
+ * for FreeBSD versions less than 11. Hence, int64_t is typedefed
+ * to off64_t.
+ */
+#define _OFF64_T_DECLARED
+typedef int64_t off64_t;
+#endif /* _OFF64_T_DECLARED */
#ifndef XATTR_CREATE
enum {
diff --git a/libglusterfs/src/glusterfs/default-args.h b/libglusterfs/src/glusterfs/default-args.h
index f15f558202b..ca7526fcab6 100644
--- a/libglusterfs/src/glusterfs/default-args.h
+++ b/libglusterfs/src/glusterfs/default-args.h
@@ -234,6 +234,12 @@ void
args_lease_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
struct gf_lease *lease, dict_t *xdata);
+int
+args_copy_file_range_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
void
args_cbk_wipe(default_args_cbk_t *args_cbk);
@@ -439,6 +445,11 @@ args_icreate_store(default_args_t *args, loc_t *loc, mode_t mode,
int
args_namelink_store(default_args_t *args, loc_t *loc, dict_t *xdata);
+int
+args_copy_file_range_store(default_args_t *args, fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off_t off64_out, size_t len,
+ uint32_t flags, dict_t *xdata);
+
void
args_cbk_init(default_args_cbk_t *args_cbk);
#endif /* _DEFAULT_ARGS_H */
diff --git a/libglusterfs/src/glusterfs/defaults.h b/libglusterfs/src/glusterfs/defaults.h
index 5d6b8e28a51..5a818eeb91a 100644
--- a/libglusterfs/src/glusterfs/defaults.h
+++ b/libglusterfs/src/glusterfs/defaults.h
@@ -48,10 +48,20 @@ typedef struct {
} default_args_cbk_t;
typedef struct {
- loc_t loc; /* @old in rename(), link() */
- loc_t loc2; /* @new in rename(), link() */
- fd_t *fd;
+ loc_t loc; /* @old in rename(), link() */
+ loc_t loc2; /* @new in rename(), link() */
+ fd_t *fd; /* for all the fd based ops */
+ fd_t *fd_dst; /* Only for copy_file_range destination */
off_t offset;
+ /*
+ * According to the man page of copy_file_range,
+ * the offsets for source and destination file
+ * are of type loff_t. But the type loff_t is
+ * linux specific and is actual a typedef of
+ * off64_t.
+ */
+ off64_t off_in; /* For copy_file_range source fd */
+ off64_t off_out; /* For copy_file_range destination fd only */
int mask;
size_t size;
mode_t mode;
@@ -323,6 +333,11 @@ int32_t
default_namelink(call_frame_t *frame, xlator_t *this, loc_t *loc,
dict_t *xdata);
+int32_t
+default_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off64_t off_in, fd_t *fd_out, off64_t off_out,
+ size_t len, uint32_t flags, dict_t *xdata);
+
/* Resume */
int32_t
default_getspec_resume(call_frame_t *frame, xlator_t *this, const char *key,
@@ -542,6 +557,11 @@ default_put_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
int32_t count, off_t off, struct iobref *iobref,
dict_t *xattr, dict_t *xdata);
+int32_t
+default_copy_file_range_resume(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off_t off64_in, fd_t *fd_out, off64_t off_out,
+ size_t len, uint32_t flags, dict_t *xdata);
+
/* _cbk_resume */
int32_t
@@ -813,6 +833,13 @@ int32_t
default_namelink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
dict_t *xdata);
+int32_t
+default_copy_file_range_cbk_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
/* _CBK */
int32_t
default_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
@@ -1072,6 +1099,12 @@ default_namelink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *postbuf, dict_t *xdata);
int32_t
+default_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
+int32_t
default_lookup_failure_cbk(call_frame_t *frame, int32_t op_errno);
int32_t
@@ -1231,6 +1264,9 @@ int32_t
default_namelink_failure_cbk(call_frame_t *frame, int32_t op_errno);
int32_t
+default_copy_file_range_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
default_mem_acct_init(xlator_t *this);
void
diff --git a/libglusterfs/src/glusterfs/syncop.h b/libglusterfs/src/glusterfs/syncop.h
index 203abe92b57..7a6167b0488 100644
--- a/libglusterfs/src/glusterfs/syncop.h
+++ b/libglusterfs/src/glusterfs/syncop.h
@@ -138,8 +138,19 @@ typedef struct syncbarrier syncbarrier_t;
struct syncargs {
int op_ret;
int op_errno;
+
+ /*
+ * The below 3 iatt structures are used in the fops
+ * whose callbacks get struct iatt as one of the
+ * a return arguments. Currently, the maximum number
+ * of iatt structures returned is 3 for some fops
+ * such as mknod, copy_file_range, mkdir etc. So
+ * all the following 3 iatt structures would be used
+ * for those fops.
+ */
struct iatt iatt1;
struct iatt iatt2;
+ struct iatt iatt3;
dict_t *xattr;
struct statvfs statvfs_buf;
struct iovec *vector;
@@ -634,4 +645,17 @@ syncop_entrylk(xlator_t *subvol, const char *volume, loc_t *loc,
const char *basename, entrylk_cmd cmd, entrylk_type type,
dict_t *xdata_in, dict_t **xdata_out);
+int
+syncop_copy_file_range(xlator_t *subvol, fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, struct iatt *stbuf,
+ struct iatt *preiatt_dst, struct iatt *postiatt_dst,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst, struct iatt *postbuf_dst,
+ dict_t *xdata);
+
#endif /* _SYNCOP_H */
diff --git a/libglusterfs/src/glusterfs/syscall.h b/libglusterfs/src/glusterfs/syscall.h
index faaf694b22c..6b33c141a5e 100644
--- a/libglusterfs/src/glusterfs/syscall.h
+++ b/libglusterfs/src/glusterfs/syscall.h
@@ -17,6 +17,7 @@
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/socket.h>
+#include <stdio.h>
/* GF follows the Linux XATTR definition, which differs in Darwin. */
#define GF_XATTR_CREATE 0x1 /* set value, fail if attr already exists */
@@ -228,4 +229,32 @@ sys_socket(int domain, int type, int protocol);
int
sys_accept(int sock, struct sockaddr *sockaddr, socklen_t *socklen, int flags);
+#ifdef GF_BSD_HOST_OS
+#ifndef _OFF64_T_DECLARED
+/*
+ * Including <stdio.h> (done above) should actually define
+ * _OFF64_T_DECLARED with off64_t data type being available
+ * for consumption. But, off64_t data type is not recognizable
+ * for FreeBSD versions less than 11. Hence, int64_t is typedefed
+ * to off64_t.
+ */
+#define _OFF64_T_DECLARED
+typedef int64_t off64_t;
+#endif /* _OFF64_T_DECLARED */
+#endif /* GF_BSD_HOST_OS */
+
+/*
+ * According to the man page of copy_file_range, both off_in and off_out are
+ * pointers to the data type loff_t (i.e. loff_t *). But, freebsd does not
+ * have (and recognize) loff_t. Since loff_t is 64 bits, use off64_t
+ * instead. Since it's a pointer type it should be okay. It just needs
+ * to be a pointer-to-64-bit pointer for both 32- and 64-bit platforms.
+ * off64_t is recognized by freebsd.
+ * TODO: In future, when freebsd can recognize loff_t, probably revisit this
+ * and change the off_in and off_out to (loff_t *).
+ */
+ssize_t
+sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out,
+ size_t len, unsigned int flags);
+
#endif /* __SYSCALL_H__ */
diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
index 4137d12eb27..12d507bc021 100644
--- a/libglusterfs/src/glusterfs/xlator.h
+++ b/libglusterfs/src/glusterfs/xlator.h
@@ -23,6 +23,7 @@
#include "glusterfs/list.h"
#include "glusterfs/latency.h"
#include "glusterfs/compat-uuid.h"
+#include "glusterfs/syscall.h"
#define FIRST_CHILD(xl) (xl->children->xlator)
#define SECOND_CHILD(xl) (xl->children->next->xlator)
@@ -354,6 +355,11 @@ typedef int32_t (*fop_namelink_cbk_t)(call_frame_t *frame, void *cookie,
int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata);
+typedef int32_t (*fop_copy_file_range_cbk_t)(
+ call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
typedef int32_t (*fop_lookup_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
dict_t *xdata);
@@ -544,6 +550,11 @@ typedef int32_t (*fop_icreate_t)(call_frame_t *frame, xlator_t *this,
typedef int32_t (*fop_namelink_t)(call_frame_t *frame, xlator_t *this,
loc_t *loc, dict_t *xdata);
+typedef int32_t (*fop_copy_file_range_t)(call_frame_t *frame, xlator_t *this,
+ fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off64_t off_out,
+ size_t len, uint32_t flags,
+ dict_t *xdata);
/* WARNING: make sure the list is in order with FOP definition in
`rpc/xdr/src/glusterfs-fops.x`.
@@ -609,6 +620,7 @@ struct xlator_fops {
fop_put_t put;
fop_icreate_t icreate;
fop_namelink_t namelink;
+ fop_copy_file_range_t copy_file_range;
/* these entries are used for a typechecking hack in STACK_WIND _only_ */
/* make sure to add _cbk variables only after defining regular fops as
@@ -673,6 +685,7 @@ struct xlator_fops {
fop_put_cbk_t put_cbk;
fop_icreate_cbk_t icreate_cbk;
fop_namelink_cbk_t namelink_cbk;
+ fop_copy_file_range_cbk_t copy_file_range_cbk;
};
typedef int32_t (*cbk_forget_t)(xlator_t *this, inode_t *inode);
diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
index baf44de64ad..6ca6a639456 100644
--- a/libglusterfs/src/libglusterfs.sym
+++ b/libglusterfs/src/libglusterfs.sym
@@ -92,6 +92,8 @@ args_xattrop_cbk_store
args_xattrop_store
args_zerofill_cbk_store
args_zerofill_store
+args_copy_file_range_cbk_store
+args_copy_file_range_store
bin_to_data
call_resume
call_resume_keep_stub
@@ -351,6 +353,10 @@ default_put
default_put_cbk
default_put_failure_cbk
default_put_resume
+default_copy_file_range
+default_copy_file_range_cbk
+default_copy_file_range_failure_cbk
+default_copy_file_range_resume
__dentry_grep
dht_is_linkfile
dict_add
@@ -471,6 +477,8 @@ fd_unref
_fini
fop_access_stub
fop_create_stub
+fop_copy_file_range_stub
+fop_copy_file_range_cbk_stub
fop_discard_stub
fop_entrylk_stub
fop_enum_to_pri_string
@@ -933,6 +941,7 @@ synclock_unlock
syncop_access
syncop_close
syncop_create
+syncop_copy_file_range
syncopctx_getctx
syncopctx_setfsgid
syncopctx_setfsgroups
@@ -1006,6 +1015,7 @@ sys_chmod
sys_chown
sys_close
sys_closedir
+sys_copy_file_range
sys_creat
sys_fallocate
sys_fchmod
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
index b70953725ce..bf70daf95c3 100644
--- a/libglusterfs/src/syncop.c
+++ b/libglusterfs/src/syncop.c
@@ -3397,4 +3397,65 @@ syncop_namelink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
__wake(args);
return 0;
-} \ No newline at end of file
+}
+
+int
+syncop_copy_file_range(xlator_t *subvol, fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, struct iatt *stbuf,
+ struct iatt *preiatt_dst, struct iatt *postiatt_dst,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_copy_file_range_cbk,
+ subvol->fops->copy_file_range, fd_in, off_in, fd_out, off_out, len,
+ flags, xdata_in);
+
+ if (stbuf) {
+ *stbuf = args.iatt1;
+ }
+ if (preiatt_dst) {
+ *preiatt_dst = args.iatt2;
+ }
+ if (postiatt_dst) {
+ *postiatt_dst = args.iatt3;
+ }
+
+ if (xdata_out) {
+ *xdata_out = args.xdata;
+ } else if (args.xdata) {
+ dict_unref(args.xdata);
+ }
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst, struct iatt *postbuf_dst,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ args->iatt1 = *stbuf;
+ args->iatt2 = *prebuf_dst;
+ args->iatt3 = *postbuf_dst;
+ }
+
+ __wake(args);
+
+ return 0;
+}
diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c
index c72a8e16b34..1d88c8adac1 100644
--- a/libglusterfs/src/syscall.c
+++ b/libglusterfs/src/syscall.c
@@ -8,8 +8,8 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs/syscall.h"
#include "glusterfs/compat.h"
+#include "glusterfs/syscall.h"
#include "glusterfs/mem-pool.h"
#include "glusterfs/libglusterfs-messages.h"
@@ -19,6 +19,9 @@
#include <fcntl.h>
#include <unistd.h>
#include <stdarg.h>
+#ifdef HAVE_COPY_FILE_RANGE_SYS
+#include <sys/syscall.h>
+#endif
#define FS_ERROR_LOG(result) \
do { \
@@ -802,3 +805,30 @@ err:
#endif
return newsock;
}
+
+ssize_t
+sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out,
+ size_t len, unsigned int flags)
+{
+ /*
+ * TODO: Add check for other platofrms like freebsd etc if this syscall is
+ * not generic.
+ * This is what the function does.
+ * 1) Check whether copy_file_range API is present. If so call it.
+ * 2) If copy_file_range API is not present, then check whether
+ * the system call is there. If so, then use syscall to invoke
+ * SYS_copy_file_range system call.
+ * 3) If neither of the above is present, then return ENOSYS.
+ */
+#ifdef HAVE_COPY_FILE_RANGE
+ return FS_RET_CHECK(
+ copy_file_range(fd_in, off_in, fd_out, off_out, len, flags), errno);
+#else
+#ifdef HAVE_COPY_FILE_RANGE_SYS
+ return syscall(SYS_copy_file_range, fd_in, off_in, fd_out, off_out, len,
+ flags);
+#else
+ return -ENOSYS;
+#endif /* HAVE_COPY_FILE_RANGE_SYS */
+#endif /* HAVE_COPY_FILE_RANGE */
+}
diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
index 1b7c9d46f88..b50848b3476 100644
--- a/libglusterfs/src/xlator.c
+++ b/libglusterfs/src/xlator.c
@@ -143,6 +143,7 @@ fill_defaults(xlator_t *xl)
SET_DEFAULT_FOP(getspec);
SET_DEFAULT_FOP(icreate);
SET_DEFAULT_FOP(namelink);
+ SET_DEFAULT_FOP(copy_file_range);
if (!xl->cbks)
xl->cbks = &default_cbks;
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index 4950857ae9e..779878f52be 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -68,6 +68,7 @@ enum gf_fop_procnum {
GFS3_OP_ICREATE,
GFS3_OP_NAMELINK,
GFS3_OP_PUT,
+ GFS3_OP_COPY_FILE_RANGE,
GFS3_OP_MAXVALUE,
};
diff --git a/rpc/xdr/src/glusterfs-fops.x b/rpc/xdr/src/glusterfs-fops.x
index ffb71d6418a..bacf07735f4 100644
--- a/rpc/xdr/src/glusterfs-fops.x
+++ b/rpc/xdr/src/glusterfs-fops.x
@@ -77,6 +77,7 @@ enum glusterfs_fop_t {
GF_FOP_PUT,
GF_FOP_ICREATE,
GF_FOP_NAMELINK,
+ GF_FOP_COPY_FILE_RANGE,
GF_FOP_MAXVALUE
};
diff --git a/rpc/xdr/src/glusterfs4-xdr.x b/rpc/xdr/src/glusterfs4-xdr.x
index c183dbcd704..dcea17fac68 100644
--- a/rpc/xdr/src/glusterfs4-xdr.x
+++ b/rpc/xdr/src/glusterfs4-xdr.x
@@ -628,6 +628,19 @@ struct gfx_seek_rsp {
struct gfx_setvolume_req {
gfx_dict dict;
} ;
+
+ struct gfx_copy_file_range_req {
+ opaque gfid1[16];
+ opaque gfid2[16];
+ quad_t fd_in;
+ quad_t fd_out;
+ u_quad_t off_in;
+ u_quad_t off_out;
+ unsigned int size;
+ unsigned int flag;
+ gfx_dict xdata; /* Extra data */
+};
+
struct gfx_setvolume_rsp {
int op_ret;
int op_errno;
diff --git a/rpc/xdr/src/libgfxdr.sym b/rpc/xdr/src/libgfxdr.sym
index bd9131be7c6..22cdf30bfda 100644
--- a/rpc/xdr/src/libgfxdr.sym
+++ b/rpc/xdr/src/libgfxdr.sym
@@ -344,3 +344,4 @@ xdr_compound_req_v2
xdr_gfx_compound_req
xdr_compound_rsp_v2
xdr_gfx_compound_rsp
+xdr_gfx_copy_file_range_req \ No newline at end of file
diff --git a/tests/basic/gfapi/gfapi-copy-file-range.t b/tests/basic/gfapi/gfapi-copy-file-range.t
new file mode 100644
index 00000000000..c24c1433edf
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-copy-file-range.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+# for now, a xfs filesystem with reflink support is created.
+# In future, better to make changes in MKFS_LOOP so that,
+# once can create a xfs filesystem with reflink enabled in
+# generic and simple way, instead of doing below steps each
+# time.
+TEST truncate -s 2G $B0/xfs_image
+mkfs.xfs 2>&1 | grep reflink
+if [ $? -eq 0 ]; then
+ mkfs.xfs -f -i size=512 -m reflink=1 $B0/xfs_image;
+else
+ mkfs.xfs -f -i size=512 $B0/xfs_image;
+fi
+
+TEST mkdir $B0/bricks
+TEST mount -t xfs -o loop $B0/xfs_image $B0/bricks
+
+# Just a single brick volume. More test cases need to be
+# added in future for distribute, replicate,
+# distributed replicate and distributed replicated sharded
+# volumes.
+TEST $CLI volume create $V0 $H0:$B0/bricks/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST dd if=/dev/urandom of=$M0/file bs=1M count=555;
+
+# check for the existence of the created file
+TEST stat $M0/file;
+
+# grab the size of the file
+SRC_SIZE=$(stat -c %s $M0/file);
+
+logdir=`gluster --print-logdir`
+
+# TODO:
+# For now, do not call copy-file-range utility. This is because,
+# the regression machines are centos-7 based which does not have
+# copy_file_range API available. So, instead of this testcase
+# causing regression failures, for now, this is just a dummy test
+# case. Uncomment the below tests (until volume stop) when there
+# is support for copy_file_range in the regression machines.
+#
+
+TEST build_tester $(dirname $0)/glfs-copy-file-range.c -lgfapi
+
+TEST ./$(dirname $0)/glfs-copy-file-range $H0 $V0 $logdir/gfapi-copy-file-range.log /file /new
+
+# check whether the destination file is created or not
+TEST stat $M0/new
+
+# check the size of the destination file
+DST_SIZE=$(stat -c %s $M0/new);
+
+# The sizes of the source and destination should be same.
+# Atleast it ensures that, copy_file_range API is working
+# as expected. Whether the actual cloning happened via reflink
+# or a read/write happened is different matter.
+TEST [ $SRC_SIZE == $DST_SIZE ];
+
+cleanup_tester $(dirname $0)/glfs-copy-file-range
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+UMOUNT_LOOP $B0/bricks;
+
+cleanup;
diff --git a/tests/basic/gfapi/glfs-copy-file-range.c b/tests/basic/gfapi/glfs-copy-file-range.c
new file mode 100644
index 00000000000..756c38d21ec
--- /dev/null
+++ b/tests/basic/gfapi/glfs-copy-file-range.c
@@ -0,0 +1,177 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <string.h>
+#include <time.h>
+#include <libgen.h>
+
+static void
+cleanup(glfs_t *fs)
+{
+ if (!fs)
+ return;
+#if 0
+ /* glfs fini path is still racy and crashing the program. Since
+ * this program any way has to die, we are not going to call fini
+ * in the released versions. i.e. final builds. For all
+ * internal testing lets enable this so that glfs_fini code
+ * path becomes stable. */
+ glfs_fini (fs);
+#endif
+}
+
+int
+main(int argc, char **argv)
+{
+ glfs_t *fs = NULL;
+ int ret = -1;
+ char *volname = NULL;
+ char *logfilepath = NULL;
+ char *path_src = NULL;
+ char *path_dst = NULL;
+ glfs_fd_t *glfd_in = NULL;
+ glfs_fd_t *glfd_out = NULL;
+ char *volfile_server = NULL;
+
+ struct stat stbuf = {
+ 0,
+ };
+ struct stat prestat_dst = {
+ 0,
+ };
+ struct stat poststat_dst = {
+ 0,
+ };
+ size_t len;
+
+ if (argc < 6) {
+ printf("%s <volume> <log file path> <source> <destination>", argv[0]);
+ ret = -1;
+ goto out;
+ }
+
+ volfile_server = argv[1];
+ volname = argv[2];
+ logfilepath = argv[3];
+ path_src = argv[4];
+ path_dst = argv[5];
+
+ if (path_src[0] != '/') {
+ fprintf(stderr, "source path %s is not absolute", path_src);
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (path_dst[0] != '/') {
+ fprintf(stderr, "destination path %s is not absolute", path_dst);
+ errno = EINVAL;
+ goto out;
+ }
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ ret = -errno;
+ fprintf(stderr, "Not able to initialize volume '%s'", volname);
+ goto out;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 24007);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "Failed to set the volfile server, "
+ "%s",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfilepath, 7);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "Failed to set the log file path, "
+ "%s",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ ret = -errno;
+ if (errno == ENOENT) {
+ fprintf(stderr, "Volume %s does not exist", volname);
+ } else {
+ fprintf(stderr,
+ "%s: Not able to fetch "
+ "volfile from glusterd",
+ volname);
+ }
+ goto out;
+ }
+
+ glfd_in = glfs_open(fs, path_src, O_RDONLY | O_NONBLOCK);
+ if (!glfd_in) {
+ ret = -errno;
+ goto out;
+ } else {
+ printf("OPEN_SRC: opening %s is success\n", path_src);
+ }
+
+ glfd_out = glfs_creat(fs, path_dst, O_RDWR, 0644);
+ if (!glfd_out) {
+ fprintf(stderr,
+ "FAILED_DST_OPEN: failed to "
+ "open (create) %s (%s)\n",
+ path_dst, strerror(errno));
+ ret = -errno;
+ goto out;
+ } else {
+ printf("OPEN_DST: opening %s is success\n", path_dst);
+ }
+
+ ret = glfs_fstat(glfd_in, &stbuf);
+ if (ret < 0) {
+ ret = -errno;
+ goto out;
+ } else {
+ printf("FSTAT_SRC: fstat on %s is success\n", path_dst);
+ }
+
+ len = stbuf.st_size;
+
+ do {
+ ret = glfs_copy_file_range(glfd_in, NULL, glfd_out, NULL, len, 0,
+ &stbuf, &prestat_dst, &poststat_dst);
+ if (ret == -1) {
+ fprintf(stderr, "copy_file_range failed with %s\n",
+ strerror(errno));
+ ret = -errno;
+ break;
+ } else {
+ printf("copy_file_range successful\n");
+ len -= ret;
+ }
+ } while (len > 0);
+
+out:
+ if (glfd_in)
+ glfs_close(glfd_in);
+ if (glfd_out)
+ glfs_close(glfd_out);
+
+ cleanup(fs);
+
+ return ret;
+}
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index 7bf0d8a8f00..f40b00bba2d 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -2119,6 +2119,19 @@ io_stats_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
}
int
+io_stats_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, COPY_FILE_RANGE);
+
+ STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, stbuf,
+ prebuf_dst, postbuf_dst, xdata);
+ return 0;
+}
+
+int
io_stats_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
dict_t *xdata)
@@ -2873,6 +2886,19 @@ io_stats_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
}
int
+io_stats_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off_t off_in, fd_t *fd_out, off_t off_out, size_t len,
+ uint32_t flags, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_copy_file_range_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->copy_file_range, fd_in, off_in, fd_out,
+ off_out, len, flags, xdata);
+ return 0;
+}
+
+int
io_stats_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
START_FOP_LATENCY(frame);
@@ -4189,6 +4215,7 @@ struct xlator_fops fops = {
.getactivelk = io_stats_getactivelk,
.setactivelk = io_stats_setactivelk,
.compound = io_stats_compound,
+ .copy_file_range = io_stats_copy_file_range,
};
struct xlator_cbks cbks = {
diff --git a/xlators/features/changelog/lib/src/Makefile.am b/xlators/features/changelog/lib/src/Makefile.am
index c4b9a3df692..c933ec53ed2 100644
--- a/xlators/features/changelog/lib/src/Makefile.am
+++ b/xlators/features/changelog/lib/src/Makefile.am
@@ -1,7 +1,7 @@
libgfchangelog_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
-DDATADIR=\"$(localstatedir)\"
-libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -fpic \
+libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 -fpic \
-I../../../src/ -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/xlators/features/changelog/src \
-I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py
index ab56dc9a4b3..8730a51d13e 100755
--- a/xlators/features/utime/src/utime-gen-fops-c.py
+++ b/xlators/features/utime/src/utime-gen-fops-c.py
@@ -62,6 +62,20 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
}
"""
+FOPS_COPY_FILE_RANGE_TEMPLATE = """
+int32_t
+gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ gl_timespec_get(&frame->root->ctime);
+
+ (void) utime_update_attribute_flags(frame, this, GF_FOP_COPY_FILE_RANGE);
+ STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
+ return 0;
+}
+"""
+
FOPS_SETATTR_TEMPLATE = """
int32_t
gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
@@ -94,6 +108,7 @@ utime_ops = ['fallocate', 'zerofill', 'opendir', 'mknod', 'mkdir',
utime_read_op = ['readv']
utime_write_op = ['writev']
utime_setattr_ops = ['setattr', 'fsetattr']
+utime_copy_file_range_ops = ['copy_file_range']
def gen_defaults():
for name in ops:
@@ -109,6 +124,9 @@ def gen_defaults():
if name in utime_setattr_ops:
print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs))
print(generate(FOPS_SETATTR_TEMPLATE, name, fop_subs))
+ if name in utime_copy_file_range_ops:
+ print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs))
+ print(generate(FOPS_COPY_FILE_RANGE_TEMPLATE, name, fop_subs))
for l in open(sys.argv[1], 'r').readlines():
if l.find('#pragma generate') != -1:
diff --git a/xlators/features/utime/src/utime-gen-fops-h.py b/xlators/features/utime/src/utime-gen-fops-h.py
index 3686f2e3c1e..e96274c229a 100755
--- a/xlators/features/utime/src/utime-gen-fops-h.py
+++ b/xlators/features/utime/src/utime-gen-fops-h.py
@@ -18,7 +18,7 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
utime_ops = ['fallocate', 'zerofill', 'opendir', 'mknod', 'mkdir',
'unlink', 'rmdir', 'symlink', 'rename', 'link', 'truncate',
'ftruncate', 'create', 'open', 'removexattr', 'fremovexattr',
- 'readv', 'writev', 'setattr', 'fsetattr']
+ 'readv', 'writev', 'setattr', 'fsetattr', 'copy_file_range']
def gen_defaults():
for name, value in ops.items():
diff --git a/xlators/features/utime/src/utime-helpers.c b/xlators/features/utime/src/utime-helpers.c
index c79e12badfa..79cc0145f50 100644
--- a/xlators/features/utime/src/utime-helpers.c
+++ b/xlators/features/utime/src/utime-helpers.c
@@ -93,6 +93,15 @@ utime_update_attribute_flags(call_frame_t *frame, xlator_t *this,
frame->root->flags |= MDATA_CTIME;
break;
+ case GF_FOP_COPY_FILE_RANGE:
+ /* Below 2 are for destination fd */
+ frame->root->flags |= MDATA_CTIME;
+ frame->root->flags |= MDATA_MTIME;
+ /* Below flag is for the source fd */
+ if (!utime_priv->noatime) {
+ frame->root->flags |= MDATA_ATIME;
+ }
+ break;
default:
frame->root->flags = 0;
}
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 3b2622b431f..3f4e19c211e 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -2993,6 +2993,116 @@ fuse_write(xlator_t *this, fuse_in_header_t *finh, void *msg,
return;
}
+#if FUSE_KERNEL_MINOR_VERSION >= 28
+static int
+fuse_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst, struct iatt *postbuf_dst,
+ dict_t *xdata)
+{
+ fuse_state_t *state = NULL;
+ fuse_in_header_t *finh = NULL;
+ /*
+ * Fuse kernel module uses fuse_write_out itself as the
+ * output collector. In fact, fuse_kernel.h in the upstream
+ * kernel just defines the input structure fuse_copy_file_range_in
+ * for the fop. So, just use the fuse_write_out to send the
+ * response back to the kernel.
+ */
+ struct fuse_write_out fcfro = {
+ 0,
+ };
+
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+
+ state = frame->root->state;
+ finh = state->finh;
+
+ fuse_log_eh_fop(this, state, frame, op_ret, op_errno);
+
+ if (op_ret >= 0) {
+ gf_log("glusterfs-fuse", GF_LOG_TRACE,
+ "%" PRIu64 ": WRITE => %d/%" GF_PRI_SIZET ",%" PRIu64
+ " , %" PRIu64 " ,%" PRIu64 ",%" PRIu64,
+ frame->root->unique, op_ret, state->size, state->off_in,
+ state->off_out, stbuf->ia_size, postbuf_dst->ia_size);
+
+ fcfro.size = op_ret;
+ send_fuse_obj(this, finh, &fcfro);
+ } else {
+ if (state->fd && state->fd->inode)
+ uuid_utoa_r(state->fd->inode->gfid, src_gfid);
+ else
+ snprintf(src_gfid, sizeof(src_gfid), "nil");
+
+ if (state->fd_dst && state->fd_dst->inode)
+ uuid_utoa_r(state->fd_dst->inode->gfid, dst_gfid);
+ else
+ snprintf(dst_gfid, sizeof(dst_gfid), "nil");
+
+ gf_log("glusterfs-fuse", GF_LOG_WARNING,
+ "%" PRIu64
+ ": COPY_FILE_RANGE => -1 gfid_in=%s fd_in=%p "
+ "gfid_out=%s fd_out=%p (%s)",
+ frame->root->unique, src_gfid, state->fd, dst_gfid,
+ state->fd_dst, strerror(op_errno));
+
+ send_fuse_err(this, finh, op_errno);
+ }
+
+ free_fuse_state(state);
+ STACK_DESTROY(frame->root);
+
+ return 0;
+}
+
+void
+fuse_copy_file_range_resume(fuse_state_t *state)
+{
+ gf_log("glusterfs-fuse", GF_LOG_TRACE,
+ "%" PRIu64
+ ": COPY_FILE_RANGE "
+ "(input fd: %p (gfid: %s), "
+ "output fd: %p (gfid: %s) size=%zu, "
+ "offset_in=%" PRIu64 ", offset_out=%" PRIu64 ")",
+ state->finh->unique, state->fd, uuid_utoa(state->fd->inode->gfid),
+ state->fd_dst, uuid_utoa(state->fd_dst->inode->gfid), state->size,
+ state->off_in, state->off_out);
+
+ FUSE_FOP(state, fuse_copy_file_range_cbk, GF_FOP_COPY_FILE_RANGE,
+ copy_file_range, state->fd, state->off_in, state->fd_dst,
+ state->off_out, state->size, state->io_flags, state->xdata);
+}
+
+static void
+fuse_copy_file_range(xlator_t *this, fuse_in_header_t *finh, void *msg,
+ struct iobuf *iobuf)
+{
+ struct fuse_copy_file_range_in *fcfri = msg;
+ fuse_state_t *state = NULL;
+ fd_t *fd_in = NULL;
+ fd_t *fd_out = NULL;
+
+ GET_STATE(this, finh, state);
+
+ fd_in = FH_TO_FD(fcfri->fh_in);
+ fd_out = FH_TO_FD(fcfri->fh_out);
+ state->fd = fd_in;
+ state->fd_dst = fd_out;
+
+ fuse_resolve_fd_init(state, &state->resolve, fd_in);
+ fuse_resolve_fd_init(state, &state->resolve2, fd_out);
+
+ state->size = fcfri->len;
+ state->off_in = fcfri->off_in;
+ state->off_out = fcfri->off_out;
+ state->io_flags = fcfri->flags;
+
+ fuse_resolve_and_resume(state, fuse_copy_file_range_resume);
+}
+#endif /* FUSE_KERNEL_MINOR_VERSION >= 28 */
+
#if FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE
static int
fuse_lseek_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
@@ -6087,6 +6197,10 @@ static fuse_handler_t *fuse_std_ops[FUSE_OP_HIGH] = {
#if FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE
[FUSE_LSEEK] = fuse_lseek,
#endif
+
+#if FUSE_KERNEL_MINOR_VERSION >= 28
+ [FUSE_COPY_FILE_RANGE] = fuse_copy_file_range,
+#endif
};
static fuse_handler_t *fuse_dump_ops[FUSE_OP_HIGH];
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
index 57380786f17..60702ab1da5 100644
--- a/xlators/mount/fuse/src/fuse-bridge.h
+++ b/xlators/mount/fuse/src/fuse-bridge.h
@@ -41,8 +41,32 @@
#include <glusterfs/gidcache.h>
#if defined(GF_LINUX_HOST_OS) || defined(__FreeBSD__) || defined(__NetBSD__)
+
+/*
+ * TODO:
+ * So, with the addition of copy_file_range support, it might
+ * require a bump up of fuse kernel minor version (like it was
+ * done when support for lseek fop was added. But, as of now,
+ * the copy_file_range support has just landed in upstream
+ * kernel fuse module. So, until, there is a release of that
+ * fuse as part of a kernel, the FUSE_KERNEL_MINOR_VERSION
+ * from fuse_kernel.h in the contrib might not be changed.
+ * If so, then the highest op available should be based on
+ * the current minor version (which is 24). So, selectively
+ * determine. When, the minor version is changed to 28 in
+ * fuse_kernel.h from contrib (because in upstream linux
+ * kernel source tree, the kernel minor version which
+ * contains support for copy_file_range is 28), then remove
+ * the reference to FUSE_LSEEK below and just determine
+ * FUSE_OP_HIGH based on copy_file_range.
+ */
+#if FUSE_KERNEL_MINOR_VERSION >= 28
+#define FUSE_OP_HIGH (FUSE_COPY_FILE_RANGE + 1)
+#else
#define FUSE_OP_HIGH (FUSE_LSEEK + 1)
#endif
+
+#endif
#ifdef GF_DARWIN_HOST_OS
#define FUSE_OP_HIGH (FUSE_DESTROY + 1)
#endif
@@ -400,10 +424,22 @@ typedef struct {
loc_t loc2;
fuse_in_header_t *finh;
int32_t flags;
+
off_t off;
+ /*
+ * The man page of copy_file_range tells that the offset
+ * arguments are of type loff_t *. Here in fuse state, the values of
+ * those offsets are saved instead of pointers as the kernel sends
+ * the values of the offsets from those pointers instead of pointers.
+ * But the type loff_t is linux specific and is actually a typedef of
+ * off64_t. Hence using off64_t
+ */
+ off64_t off_in; /* for copy_file_range source fd */
+ off64_t off_out; /* for copy_file_range destination fd */
size_t size;
unsigned long nlookup;
fd_t *fd;
+ fd_t *fd_dst; /* for copy_file_range destination */
dict_t *xattr;
dict_t *xdata;
char *name;
diff --git a/xlators/protocol/client/src/client-common.c b/xlators/protocol/client/src/client-common.c
index 7708c820918..64db98d661b 100644
--- a/xlators/protocol/client/src/client-common.c
+++ b/xlators/protocol/client/src/client-common.c
@@ -2556,6 +2556,38 @@ out:
}
int
+client_pre_copy_file_range_v2(xlator_t *this, gfx_copy_file_range_req *req,
+ fd_t *fd_in, off64_t off_in, fd_t *fd_out,
+ off64_t off_out, size_t size, int32_t flags,
+ dict_t **xdata)
+{
+ int64_t remote_fd_in = -1;
+ int64_t remote_fd_out = -1;
+ int op_errno = ESTALE;
+
+ CLIENT_GET_REMOTE_FD(this, fd_in, FALLBACK_TO_ANON_FD, remote_fd_in,
+ op_errno, out);
+
+ CLIENT_GET_REMOTE_FD(this, fd_out, FALLBACK_TO_ANON_FD, remote_fd_out,
+ op_errno, out);
+ req->size = size;
+ req->off_in = off_in;
+ req->off_out = off_out;
+ req->fd_in = remote_fd_in;
+ req->fd_out = remote_fd_out;
+ req->flag = flags;
+
+ memcpy(req->gfid1, fd_in->inode->gfid, 16);
+ memcpy(req->gfid2, fd_out->inode->gfid, 16);
+
+ dict_to_xdr(*xdata, &req->xdata);
+
+ return 0;
+out:
+ return -op_errno;
+}
+
+int
client_pre_statfs_v2(xlator_t *this, gfx_statfs_req *req, loc_t *loc,
dict_t *xdata)
{
diff --git a/xlators/protocol/client/src/client-common.h b/xlators/protocol/client/src/client-common.h
index 5214eae128e..a2043d8742a 100644
--- a/xlators/protocol/client/src/client-common.h
+++ b/xlators/protocol/client/src/client-common.h
@@ -621,4 +621,10 @@ client_post_rename_v2(xlator_t *this, gfx_rename_rsp *rsp, struct iatt *stbuf,
struct iatt *prenewparent, struct iatt *postnewparent,
dict_t **xdata);
+int
+client_pre_copy_file_range_v2(xlator_t *this, gfx_copy_file_range_req *req,
+ fd_t *fd_in, off64_t off_in, fd_t *fd_out,
+ off64_t off_out, size_t size, int32_t flags,
+ dict_t **xdata);
+
#endif /* __CLIENT_COMMON_H__ */
diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c
index 849fdfca0bc..55e87b3c370 100644
--- a/xlators/protocol/client/src/client-helpers.c
+++ b/xlators/protocol/client/src/client-helpers.c
@@ -2459,6 +2459,20 @@ client_handle_fop_requirements_v2(
lease, this, &this_req->compound_req_v2_u.compound_lease_req,
op_errno, out, &args->loc, &args->lease, args->xdata);
break;
+ case GF_FOP_COPY_FILE_RANGE:
+ /*
+ * Not going to handle the copy_file_range fop in compound
+ * operation. This is because, compound operation is going
+ * to be removed. In fact, AFR one of the heavy consumer of
+ * compound operations has stopped using that.
+ * https://github.com/gluster/glusterfs/issues/414
+ * Therefore, sending ENOTSUP error for this fop coming as
+ * comound request. Though, there was no need of handling
+ * "case GF_FOP_COPY_FILE_RANGE" technically, this comment
+ * under the label of GF_FOP_COPY_FILE_RANGE will help in
+ * understanding that this fop does not handle the compund
+ * request and why.
+ */
default:
return ENOTSUP;
}
@@ -2631,6 +2645,14 @@ compound_request_cleanup_v2(gfx_compound_req *req)
case GF_FOP_SEEK:
CLIENT4_COMPOUND_FOP_CLEANUP(curr_req, seek);
break;
+ case GF_FOP_COPY_FILE_RANGE:
+ /*
+ * This fop is not handled in compund operations.
+ * Check the comment added under this fop's section
+ * in the compound_request_cleanup_v2. Therefore
+ * keeping this label only as a placeholder with
+ * a message that, this fop is not handled.
+ */
default:
break;
}
@@ -3004,6 +3026,12 @@ client_process_response_v2(call_frame_t *frame, xlator_t *this,
&this_args_cbk->lease, xdata);
break;
}
+ case GF_FOP_COPY_FILE_RANGE:
+ /*
+ * Not handling this fop. Returning ENOTSUP. Check
+ * the comment added for this fop in the function
+ * client_handle_fop_requirements_v2.
+ */
default:
return -ENOTSUP;
}
diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c
index ca180c1db4b..8f3ee41e5c5 100644
--- a/xlators/protocol/client/src/client-rpc-fops_v2.c
+++ b/xlators/protocol/client/src/client-rpc-fops_v2.c
@@ -2833,6 +2833,72 @@ out:
return 0;
}
+int
+client4_0_copy_file_range_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gfx_common_3iatt_rsp rsp = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ struct iatt prestat = {
+ 0,
+ };
+ struct iatt poststat = {
+ 0,
+ };
+ int ret = 0;
+ xlator_t *this = NULL;
+ dict_t *xdata = NULL;
+ clnt_local_t *local = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gfx_common_3iatt_rsp);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, PC_MSG_XDR_DECODING_FAILED,
+ "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = client_post_common_3iatt(this, &rsp, &stbuf, &prestat, &poststat,
+ &xdata);
+ if (ret < 0)
+ goto out;
+out:
+ if (rsp.op_ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, gf_error_to_errno(rsp.op_errno),
+ PC_MSG_REMOTE_OP_FAILED, "remote operation failed");
+ } else if (rsp.op_ret >= 0) {
+ if (local->attempt_reopen)
+ client_attempt_reopen(local->fd, this);
+ if (local->attempt_reopen_out)
+ client_attempt_reopen(local->fd_out, this);
+ }
+ CLIENT_STACK_UNWIND(copy_file_range, frame, rsp.op_ret,
+ gf_error_to_errno(rsp.op_errno), &stbuf, &prestat,
+ &poststat, xdata);
+
+ if (xdata)
+ dict_unref(xdata);
+
+ return 0;
+}
+
int32_t
client4_0_releasedir(call_frame_t *frame, xlator_t *this, void *data)
{
@@ -5846,6 +5912,80 @@ unwind:
}
int32_t
+client4_0_copy_file_range(call_frame_t *frame, xlator_t *this, void *data)
+{
+ clnt_args_t *args = NULL;
+ clnt_conf_t *conf = NULL;
+ clnt_local_t *local = NULL;
+ gfx_copy_file_range_req req = {
+ {
+ 0,
+ },
+ };
+ int op_errno = ESTALE;
+ int ret = 0;
+
+ if (!frame || !this || !data)
+ goto unwind;
+
+ args = data;
+ conf = this->private;
+
+ ret = client_pre_copy_file_range_v2(this, &req, args->fd, args->off_in,
+ args->fd_out, args->off_out, args->size,
+ args->flags, &args->xdata);
+
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ ret = client_fd_fop_prepare_local(frame, args->fd, req.fd_in);
+ if (ret) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ /*
+ * Since frame->local is allocated in above function call
+ * itself, better to use it (with the assumption that it
+ * has been allocated) directly instead of again calling
+ * client_fd_fop_prepare_local or modifying it, as doing
+ * so requires changes in other places as well.
+ */
+
+ local = frame->local;
+ local->fd_out = fd_ref(args->fd_out);
+ local->attempt_reopen_out = client_is_reopen_needed(args->fd_out, this,
+ req.fd_out);
+
+ ret = client_submit_request(
+ this, &req, frame, conf->fops, GFS3_OP_COPY_FILE_RANGE,
+ client4_0_copy_file_range_cbk, NULL, NULL, 0, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfx_copy_file_range_req);
+ if (ret) {
+ /*
+ * If the lower layers fail to submit a request, they'll also
+ * do the unwind for us (see rpc_clnt_submit), so don't unwind
+ * here in such cases.
+ */
+ gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED,
+ "failed to send the fop");
+ }
+
+ GF_FREE(req.xdata.pairs.pairs_val);
+
+ return 0;
+
+unwind:
+ CLIENT_STACK_UNWIND(copy_file_range, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+ GF_FREE(req.xdata.pairs.pairs_val);
+
+ return 0;
+}
+
+int32_t
client4_0_fsetattr(call_frame_t *frame, xlator_t *this, void *data)
{
clnt_args_t *args = NULL;
@@ -6257,6 +6397,7 @@ rpc_clnt_procedure_t clnt4_0_fop_actors[GF_FOP_MAXVALUE] = {
[GF_FOP_COMPOUND] = {"COMPOUND", client4_0_compound},
[GF_FOP_ICREATE] = {"ICREATE", client4_0_icreate},
[GF_FOP_NAMELINK] = {"NAMELINK", client4_0_namelink},
+ [GF_FOP_COPY_FILE_RANGE] = {"COPY-FILE-RANGE", client4_0_copy_file_range},
};
rpc_clnt_prog_t clnt4_0_fop_prog = {
diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c
index 38723b43b45..c8e84f6e1b7 100644
--- a/xlators/protocol/client/src/client.c
+++ b/xlators/protocol/client/src/client.c
@@ -1129,6 +1129,41 @@ out:
return 0;
}
+int32_t
+client_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off_t off_in, fd_t *fd_out, off_t off_out, size_t len,
+ uint32_t flags, dict_t *xdata)
+{
+ int ret = -1;
+ clnt_conf_t *conf = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ clnt_args_t args = {
+ 0,
+ };
+
+ conf = this->private;
+ if (!conf || !conf->fops)
+ goto out;
+
+ args.fd = fd_in;
+ args.fd_out = fd_out;
+ args.offset = off_in;
+ args.off_out = off_out;
+ args.size = len;
+ args.flags = flags;
+ args.xdata = xdata;
+
+ proc = &conf->fops->proctable[GF_FOP_COPY_FILE_RANGE];
+ if (proc->fn)
+ ret = proc->fn(frame, this, &args);
+out:
+ if (ret)
+ STACK_UNWIND_STRICT(copy_file_range, frame, -1, ENOTCONN, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
+}
+
static gf_boolean_t
is_client_rpc_init_command(dict_t *dict, xlator_t *this, char **value)
{
@@ -2898,6 +2933,7 @@ struct xlator_fops fops = {
.icreate = client_icreate,
.namelink = client_namelink,
.put = client_put,
+ .copy_file_range = client_copy_file_range,
};
struct xlator_dumpops dumpops = {
diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h
index 5fc75a84628..71f84f3ca89 100644
--- a/xlators/protocol/client/src/client.h
+++ b/xlators/protocol/client/src/client.h
@@ -269,6 +269,7 @@ typedef struct client_local {
loc_t loc;
loc_t loc2;
fd_t *fd;
+ fd_t *fd_out; /* used in copy_file_range */
clnt_fd_ctx_t *fdctx;
uint32_t flags;
struct iobref *iobref;
@@ -280,6 +281,11 @@ typedef struct client_local {
pthread_mutex_t mutex;
char *name;
gf_boolean_t attempt_reopen;
+ /*
+ * The below boolean variable is used
+ * only for copy_file_range fop
+ */
+ gf_boolean_t attempt_reopen_out;
/* required for compound fops */
compound_args_t *compound_args;
unsigned int length; /* length of a compound fop */
@@ -289,7 +295,13 @@ typedef struct client_local {
typedef struct client_args {
loc_t *loc;
+ /*
+ * This is the source fd for copy_file_range and
+ * the default fd for any other fd based fop which
+ * requires only one fd (i.e. opetates on one fd)
+ */
fd_t *fd;
+ fd_t *fd_out; /* this is the destination fd for copy_file_range */
const char *linkname;
struct iobref *iobref;
struct iovec *vector;
@@ -301,7 +313,17 @@ typedef struct client_args {
struct gf_flock *flock;
const char *volume;
const char *basename;
+
off_t offset;
+ /*
+ * According to the man page of copy_file_range,
+ * the offsets for source and destination file
+ * are of type loff_t. But the type loff_t is
+ * linux specific and is actual a typedef of
+ * off64_t.
+ */
+ off64_t off_in; /* used in copy_file_range for source fd */
+ off64_t off_out; /* used in copy_file_range for dst fd */
int32_t mask;
int32_t cmd;
size_t size;
diff --git a/xlators/protocol/server/src/server-common.c b/xlators/protocol/server/src/server-common.c
index 25b36155065..0639ac3feb3 100644
--- a/xlators/protocol/server/src/server-common.c
+++ b/xlators/protocol/server/src/server-common.c
@@ -541,6 +541,16 @@ server4_post_common_3iatt(server_state_t *state, gfx_common_3iatt_rsp *rsp,
}
void
+server4_post_common_3iatt_noinode(gfx_common_3iatt_rsp *rsp, struct iatt *stbuf,
+ struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst)
+{
+ gfx_stat_from_iattx(&rsp->stat, stbuf);
+ gfx_stat_from_iattx(&rsp->preparent, prebuf_dst);
+ gfx_stat_from_iattx(&rsp->postparent, postbuf_dst);
+}
+
+void
server4_post_common_2iatt(gfx_common_2iatt_rsp *rsp, struct iatt *prebuf,
struct iatt *postbuf)
{
diff --git a/xlators/protocol/server/src/server-common.h b/xlators/protocol/server/src/server-common.h
index 2844ee95756..6200415e304 100644
--- a/xlators/protocol/server/src/server-common.h
+++ b/xlators/protocol/server/src/server-common.h
@@ -192,3 +192,8 @@ void
server4_post_link(server_state_t *state, gfx_common_3iatt_rsp *rsp,
inode_t *inode, struct iatt *stbuf, struct iatt *pre,
struct iatt *post);
+
+void
+server4_post_common_3iatt_noinode(gfx_common_3iatt_rsp *rsp, struct iatt *stbuf,
+ struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst);
diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c
index c55a422679d..8ad2d8492ed 100644
--- a/xlators/protocol/server/src/server-helpers.c
+++ b/xlators/protocol/server/src/server-helpers.c
@@ -4948,6 +4948,8 @@ server_populate_compound_response_v2(xlator_t *this, gfx_compound_rsp *rsp,
rsp_args->op_errno = gf_errno_to_error(this_args_cbk->op_errno);
break;
}
+ case GF_FOP_COPY_FILE_RANGE:
+ /* Not handling this fop. */
default:
return ENOTSUP;
}
@@ -5380,6 +5382,12 @@ server_get_compound_resolve_v2(server_state_t *state, gfx_compound_req *req)
memcpy(state->resolve.gfid, this_req.gfid, 16);
break;
}
+ case GF_FOP_COPY_FILE_RANGE:
+ /*
+ * Compound operations is not being used anymore and
+ * planned for subsequent removal. Hence not handling
+ * this fop here.
+ */
default:
return ENOTSUP;
}
diff --git a/xlators/protocol/server/src/server-resolve.c b/xlators/protocol/server/src/server-resolve.c
index 26260a5ee2c..ec768acba44 100644
--- a/xlators/protocol/server/src/server-resolve.c
+++ b/xlators/protocol/server/src/server-resolve.c
@@ -545,14 +545,39 @@ server_resolve_fd(call_frame_t *frame)
return 0;
}
- state->fd = gf_fd_fdptr_get(serv_ctx->fdtable, fd_no);
-
+ /*
+ * With copy_file_range, there will be 2 fds to resolve.
+ * This same function is called to resolve both the source
+ * fd and the destination fd. As of now, this function does
+ * not have any mechanism to distinguish between the 2 fds
+ * being resolved except for checking the value of state->fd.
+ * The assumption is that, if source fd the one which is
+ * being resolved here, then state->fd would be NULL. If it
+ * is not NULL, then it is the destination fd which is being
+ * resolved.
+ * This method (provided the above assumption is true) is
+ * to achieve the ability to distinguish between 2 fds with
+ * minimum changes being done to this function. If this way
+ * is not correct, then more changes might be needed.
+ */
if (!state->fd) {
- gf_msg("", GF_LOG_INFO, EBADF, PS_MSG_FD_NOT_FOUND,
- "fd not "
- "found in context");
- resolve->op_ret = -1;
- resolve->op_errno = EBADF;
+ state->fd = gf_fd_fdptr_get(serv_ctx->fdtable, fd_no);
+ if (!state->fd) {
+ gf_msg("", GF_LOG_INFO, EBADF, PS_MSG_FD_NOT_FOUND,
+ "fd not "
+ "found in context");
+ resolve->op_ret = -1;
+ resolve->op_errno = EBADF;
+ }
+ } else {
+ state->fd_out = gf_fd_fdptr_get(serv_ctx->fdtable, fd_no);
+ if (!state->fd_out) {
+ gf_msg("", GF_LOG_INFO, EBADF, PS_MSG_FD_NOT_FOUND,
+ "fd not "
+ "found in context");
+ resolve->op_ret = -1;
+ resolve->op_errno = EBADF;
+ }
}
server_resolve_all(frame);
diff --git a/xlators/protocol/server/src/server-rpc-fops_v2.c b/xlators/protocol/server/src/server-rpc-fops_v2.c
index c5a8e482621..16570294f6d 100644
--- a/xlators/protocol/server/src/server-rpc-fops_v2.c
+++ b/xlators/protocol/server/src/server-rpc-fops_v2.c
@@ -2259,6 +2259,64 @@ out:
return 0;
}
+int
+server4_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata)
+{
+ gfx_common_3iatt_rsp rsp = {
+ 0,
+ };
+ server_state_t *state = NULL;
+ rpcsvc_request_t *req = NULL;
+ char in_gfid[GF_UUID_BUF_SIZE] = {0};
+ char out_gfid[GF_UUID_BUF_SIZE] = {0};
+
+ dict_to_xdr(xdata, &rsp.xdata);
+
+ if (op_ret < 0) {
+ state = CALL_STATE(frame);
+
+ uuid_utoa_r(state->resolve.gfid, in_gfid);
+ uuid_utoa_r(state->resolve2.gfid, out_gfid);
+
+ gf_msg(this->name, fop_log_level(GF_FOP_COPY_FILE_RANGE, op_errno),
+ op_errno, PS_MSG_WRITE_INFO,
+ "%" PRId64 ": COPY_FILE_RANGE %" PRId64 " (%s), %" PRId64
+ " (%s) client: %s, "
+ "error-xlator: %s",
+ frame->root->unique, state->resolve.fd_no, in_gfid,
+ state->resolve2.fd_no, out_gfid, STACK_CLIENT_NAME(frame->root),
+ STACK_ERR_XL_NAME(frame->root));
+ goto out;
+ }
+
+ /*
+ * server4_post_common_3iatt (ex: used by server4_put_cbk and some
+ * other cbks) also performs inode linking along with copying of 3
+ * iatt structures to the response. But, for copy_file_range, linking
+ * of inode is not needed. Therefore a new function is used to
+ * construct the response using 3 iatt structures.
+ * @stbuf: iatt or stat of the source file (or fd)
+ * @prebuf_dst: iatt or stat of destination file (or fd) before the fop
+ * @postbuf_dst: iatt or stat of destination file (or fd) after the fop
+ */
+ server4_post_common_3iatt_noinode(&rsp, stbuf, prebuf_dst, postbuf_dst);
+
+out:
+ rsp.op_ret = op_ret;
+ rsp.op_errno = gf_errno_to_error(op_errno);
+
+ req = frame->local;
+ server_submit_reply(frame, req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gfx_common_3iatt_rsp);
+
+ GF_FREE(rsp.xdata.pairs.pairs_val);
+
+ return 0;
+}
+
/* Resume function section */
int
@@ -3448,6 +3506,29 @@ err:
}
int
+server4_copy_file_range_resume(call_frame_t *frame, xlator_t *bound_xl)
+{
+ server_state_t *state = NULL;
+
+ state = CALL_STATE(frame);
+
+ if (state->resolve.op_ret != 0)
+ goto err;
+
+ STACK_WIND(frame, server4_copy_file_range_cbk, bound_xl,
+ bound_xl->fops->copy_file_range, state->fd, state->off_in,
+ state->fd_out, state->off_out, state->size, state->flags,
+ state->xdata);
+
+ return 0;
+err:
+ server4_copy_file_range_cbk(frame, NULL, frame->this, state->resolve.op_ret,
+ state->resolve.op_errno, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+int
server4_0_stat(rpcsvc_request_t *req)
{
server_state_t *state = NULL;
@@ -6104,6 +6185,53 @@ out:
return ret;
}
+int
+server4_0_copy_file_range(rpcsvc_request_t *req)
+{
+ server_state_t *state = NULL;
+ call_frame_t *frame = NULL;
+ gfx_copy_file_range_req args = {
+ {
+ 0,
+ },
+ };
+ ssize_t len = 0;
+ int ret = -1;
+ int op_errno = 0;
+
+ if (!req)
+ return ret;
+
+ ret = rpc_receive_common(req, &frame, &state, &len, &args,
+ xdr_gfx_copy_file_range_req,
+ GF_FOP_COPY_FILE_RANGE);
+ if (ret != 0) {
+ goto out;
+ }
+
+ state->resolve.type = RESOLVE_MUST;
+ state->resolve.fd_no = args.fd_in;
+ state->resolve2.type = RESOLVE_MUST; /*making this resolve must */
+ state->resolve2.fd_no = args.fd_out;
+ state->off_in = args.off_in;
+ state->off_out = args.off_out;
+ state->size = args.size;
+ state->flags = args.flag;
+ memcpy(state->resolve.gfid, args.gfid1, 16);
+ memcpy(state->resolve2.gfid, args.gfid2, 16);
+
+ xdr_to_dict(&args.xdata, &state->xdata);
+
+ ret = 0;
+ resolve_and_resume(frame, server4_copy_file_range_resume);
+out:
+
+ if (op_errno)
+ SERVER_REQ_SET_ERROR(req, ret);
+
+ return ret;
+}
+
rpcsvc_actor_t glusterfs4_0_fop_actors[] = {
[GFS3_OP_NULL] = {"NULL", GFS3_OP_NULL, server_null, NULL, 0},
[GFS3_OP_STAT] = {"STAT", GFS3_OP_STAT, server4_0_stat, NULL, 0},
@@ -6195,6 +6323,8 @@ rpcsvc_actor_t glusterfs4_0_fop_actors[] = {
DRC_NA},
[GFS3_OP_NAMELINK] = {"NAMELINK", GFS3_OP_NAMELINK, server4_0_namelink,
NULL, 0, DRC_NA},
+ [GFS3_OP_COPY_FILE_RANGE] = {"COPY-FILE-RANGE", GFS3_OP_COPY_FILE_RANGE,
+ server4_0_copy_file_range, NULL, 0, DRC_NA},
};
struct rpcsvc_program glusterfs4_0_fop_prog = {
diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h
index 2a77aba1f3c..bdf98c96f1c 100644
--- a/xlators/protocol/server/src/server.h
+++ b/xlators/protocol/server/src/server.h
@@ -180,7 +180,12 @@ struct _server_state {
struct iatt stbuf;
int valid;
+ /*
+ * this fd is used in all the fd based operations PLUS
+ * as a source fd in copy_file_range
+ */
fd_t *fd;
+ fd_t *fd_out; /* destination fd in copy_file_range */
dict_t *params;
int32_t flags;
int wbflags;
@@ -191,6 +196,15 @@ struct _server_state {
size_t size;
off_t offset;
+ /*
+ * According to the man page of copy_file_range,
+ * the offsets for source and destination file
+ * are of type loff_t. But the type loff_t is
+ * linux specific and is actual a typedef of
+ * off64_t.
+ */
+ off64_t off_in; /* source offset in copy_file_range */
+ off64_t off_out; /* destination offset in copy_file_range */
mode_t mode;
dev_t dev;
size_t nr_count;
diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c
index ed0516d4c4a..54fc1dc1195 100644
--- a/xlators/storage/posix/src/posix-helpers.c
+++ b/xlators/storage/posix/src/posix-helpers.c
@@ -3314,3 +3314,23 @@ unlock:
out:
return ret;
}
+
+int
+posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno)
+{
+ int ret = -1;
+
+ if (inode->ia_type == IA_IFBLK || inode->ia_type == IA_IFCHR) {
+ *op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_INVALID_ARGUMENT,
+ "%s received on %s file (%s)", fop,
+ (inode->ia_type == IA_IFBLK) ? "block" : "char",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c
index 9e2b37f582c..dafd1855ef9 100644
--- a/xlators/storage/posix/src/posix-inode-fd-ops.c
+++ b/xlators/storage/posix/src/posix-inode-fd-ops.c
@@ -1960,6 +1960,274 @@ out:
}
int32_t
+posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int _fd_in = -1;
+ int _fd_out = -1;
+ struct posix_private *priv = NULL;
+ struct posix_fd *pfd_in = NULL;
+ struct posix_fd *pfd_out = NULL;
+ struct iatt preop_dst = {
+ 0,
+ };
+ struct iatt postop_dst = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *rsp_xdata = NULL;
+ int is_append = 0;
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t update_atomic = _gf_false;
+ posix_inode_ctx_t *ctx = NULL;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(this, out);
+ VALIDATE_OR_GOTO(fd_in, out);
+ VALIDATE_OR_GOTO(fd_in->inode, out);
+ VALIDATE_OR_GOTO(fd_out, out);
+ VALIDATE_OR_GOTO(fd_out->inode, out);
+ VALIDATE_OR_GOTO(this->private, out);
+
+ priv = this->private;
+
+ VALIDATE_OR_GOTO(priv, out);
+ DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out);
+
+ if (posix_check_dev_file(this, fd_in->inode, "copy_file_range", &op_errno))
+ goto out;
+
+ if (posix_check_dev_file(this, fd_out->inode, "copy_file_range", &op_errno))
+ goto out;
+
+ ret = posix_fd_ctx_get(fd_in, this, &pfd_in, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd_in);
+ goto out;
+ }
+
+ _fd_in = pfd_in->fd;
+
+ ret = posix_fd_ctx_get(fd_out, this, &pfd_out, &op_errno);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL,
+ "pfd is NULL from fd=%p", fd_out);
+ goto out;
+ }
+
+ _fd_out = pfd_out->fd;
+
+ /*
+ * Currently, the internal write is checked via xdata which
+ * is set by some xlator above. It could be due to several of
+ * the reasons such as healing or a snapshot operation happening
+ * using copy_file_range. As of now (i.e. writing the patch with
+ * this change) none of the xlators above posix are using the
+ * internal write with copy_file_range. In future it might
+ * change. Atleast as of now the hope is that, when that happens
+ * this functon or fop does not require additional changes for
+ * handling internal writes.
+ */
+ ret = posix_check_internal_writes(this, fd_out, _fd_out, xdata);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "possible overwrite from internal client, fd=%p", fd_out);
+ op_ret = -1;
+ op_errno = EBUSY;
+ goto out;
+ }
+
+ if (xdata) {
+ if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC))
+ update_atomic = _gf_true;
+ }
+
+ /*
+ * The update_atomic option is to instruct posix to do prestat,
+ * write and poststat atomically. This is to prevent any modification to
+ * ia_size and ia_blocks until poststat and the diff in their values
+ * between pre and poststat could be of use for some translators.
+ * This is similar to the atomic write operation. atmoic write is
+ * (i.e. prestat + write + poststat) used by shard as of now. In case,
+ * some xlator needs copy_file_range to be atomic from prestat and postat
+ * prespective (i.e. prestat + copy_file_range + poststat) then it has
+ * to send "GLUSTERFS_WRITE_UPDATE_ATOMIC" key in xdata.
+ */
+
+ op_ret = posix_inode_ctx_get_all(fd_out->inode, this, &ctx);
+ if (op_ret < 0) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (update_atomic) {
+ ret = pthread_mutex_lock(&ctx->write_atomic_lock);
+ if (!ret)
+ locked = _gf_true;
+ else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MUTEX_FAILED,
+ "failed to hold write atomic lock on %s",
+ uuid_utoa(fd_out->inode->gfid));
+ goto out;
+ }
+ }
+
+ op_ret = posix_fdstat(this, fd_out->inode, _fd_out, &preop_dst);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "pre-operation fstat failed on fd=%p", fd_out);
+ goto out;
+ }
+
+ /*
+ * Since, only the destination file (fd_out) is undergoing
+ * modification, the write related tests are done on that.
+ * i.e. this is treater similar to as if the destination file
+ * undergoing write fop from maintenance perspective.
+ */
+ if (xdata) {
+ op_ret = posix_cs_maintenance(this, fd_out, NULL, &_fd_out, &preop_dst,
+ NULL, xdata, &rsp_xdata, _gf_false);
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state check failed, fd %p", fd_out);
+ op_errno = EIO;
+ goto out;
+ }
+ }
+
+ /*
+ * NOTE: This is just doing a single execution of copy_file_range
+ * system call. If the returned value of this system call is less
+ * than len, then should we keep doing it in a for loop until the
+ * copy_file_range of all the len bytes is done?
+ * Check the example program provided in the man page of
+ * copy_file_range.
+ * If so, then a separate variables for both off_in and off_out
+ * should be used which are initialized to off_in and off_out
+ * that this function call receives, but then advanced by the
+ * value returned by sys_copy_file_range and then use that as
+ * off_in and off_out for next instance of copy_file_range execution.
+ */
+ op_ret = sys_copy_file_range(_fd_in, &off_in, _fd_out, &off_out, len,
+ flags);
+
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED,
+ "copy_file_range failed: fd_in: %p (gfid: %s) ,"
+ " fd_out %p (gfid:%s)",
+ fd_in, uuid_utoa(fd_in->inode->gfid), fd_out,
+ uuid_utoa(fd_out->inode->gfid));
+ goto out;
+ }
+
+ /*
+ * Let this be as it is for now. This function collects
+ * infomration such as open fd count etc. So, even though
+ * is_append does not apply to copy_file_range, for now,
+ * allowing it to be recorded in the dict as _gf_false.
+ */
+ rsp_xdata = _fill_writev_xdata(fd_out, xdata, this, is_append);
+
+ /* copy_file_range successful, we also need to get the stat of
+ * the file we wrote to (i.e. destination file or fd_out).
+ */
+ ret = posix_fdstat(this, fd_out->inode, _fd_out, &postop_dst);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p", fd_out);
+ goto out;
+ }
+
+ /*
+ * Also perform the stat on the source fd (i.e. fd_in). For now,
+ * allowing it to be done within the locked region if the request
+ * is for atomic operation (and update) of copy_file_range.
+ */
+ ret = posix_fdstat(this, fd_in->inode, _fd_in, &stbuf);
+ if (ret == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED,
+ "post-operation fstat failed on fd=%p", fd_in);
+ goto out;
+ }
+
+ /*
+ * The core logic of what time attributes are to be updated
+ * on a fop is decided at client side xlator utime.
+ * All the remaining fops call posix_set_ctime function
+ * to update the {a,m,c}time. But, for all the other fops,
+ * the operation is happening on only one file (or inode).
+ * But here, there are 2 fds (source and destination). Hence
+ * the new function below to update the appropriate times for
+ * both the source and the destination file.
+ * For the source file, if at all anything has to be updated,
+ * it would be atime (as that file is only read, not updated).
+ * For the destination file, the attributes that require the
+ * modification would be mtime and ctime.
+ * What times have to be changed is actually determined by
+ * utime xlator. But, all of them would be in frame->root->flags.
+ * So, currently posix assumes that, the atime flag is for
+ * the source file and the other 2 flags are for the destination
+ * file. Since, the assumption is rigid (i.e. atime for source
+ * and {m,c}time for destination), the below function is called
+ * posix_set_ctime_cfr (cfr standing for copy_file_range).
+ * FUTURE TODO:
+ * In future, some other functionality or fop might operate
+ * simultaneously on 2 files. Then, depending upon what that new
+ * fop does or what are its requirements, the below function might
+ * require changes to become generic for consumption in case of
+ * simultaneous operations on 2 files.
+ */
+ posix_set_ctime_cfr(frame, this, NULL, pfd_in->fd, fd_in->inode, &stbuf,
+ NULL, pfd_out->fd, fd_out->inode, &postop_dst);
+
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+
+ /*
+ * Record copy_file_range in priv->write_value for now.
+ * If not needed, remove below section of code along with
+ * this comment (or add comment to explain why it is not
+ * needed).
+ */
+ LOCK(&priv->lock);
+ {
+ priv->write_value += op_ret;
+ }
+ UNLOCK(&priv->lock);
+
+out:
+
+ if (locked) {
+ pthread_mutex_unlock(&ctx->write_atomic_lock);
+ locked = _gf_false;
+ }
+
+ STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, &stbuf,
+ &preop_dst, &postop_dst, rsp_xdata);
+
+ if (rsp_xdata)
+ dict_unref(rsp_xdata);
+ return 0;
+}
+
+int32_t
posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
char *real_path = NULL;
diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h
index 62af32ac8fe..928723db8f9 100644
--- a/xlators/storage/posix/src/posix-messages.h
+++ b/xlators/storage/posix/src/posix-messages.h
@@ -67,6 +67,7 @@ GLFS_MSGID(POSIX, P_MSG_XATTR_FAILED, P_MSG_NULL_GFID, P_MSG_FCNTL_FAILED,
P_MSG_ANCESTORY_FAILED, P_MSG_DISK_SPACE_CHECK_FAILED,
P_MSG_FALLOCATE_FAILED, P_MSG_STOREMDATA_FAILED,
P_MSG_FETCHMDATA_FAILED, P_MSG_GETMDATA_FAILED,
- P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE);
+ P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE, P_MSG_MUTEX_FAILED,
+ P_MSG_COPY_FILE_RANGE_FAILED);
#endif /* !_GLUSTERD_MESSAGES_H_ */
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
index 26fae2019b5..62669a0b83f 100644
--- a/xlators/storage/posix/src/posix-metadata.c
+++ b/xlators/storage/posix/src/posix-metadata.c
@@ -663,3 +663,81 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this,
out:
return;
}
+
+void
+posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this,
+ const char *real_path_in, int fd_in, inode_t *inode_in,
+ struct iatt *stbuf_in, const char *real_path_out,
+ int fd_out, inode_t *inode_out, struct iatt *stbuf_out)
+{
+ posix_mdata_flag_t flag = {
+ 0,
+ };
+ posix_mdata_flag_t flag_dup = {
+ 0,
+ };
+ int ret = 0;
+ struct posix_private *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->ctime) {
+ (void)posix_get_mdata_flag(frame->root->flags, &flag);
+ if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) {
+ goto out;
+ }
+
+ if (frame->root->ctime.tv_sec == 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata failed, No ctime : in: %s gfid_in:%s "
+ "out: %s gfid_out:%s",
+ real_path_in,
+ inode_in ? uuid_utoa(inode_in->gfid) : "No inode",
+ real_path_out,
+ inode_out ? uuid_utoa(inode_out->gfid) : "No inode");
+ goto out;
+ }
+
+ flag_dup = flag;
+
+ /*
+ * For the destination file, no need to update atime.
+ * It got modified. Hence the things that need to be
+ * changed are mtime and ctime (provided the utime
+ * xlator from the client has set those flags, which
+ * are just copied to flag_dup).
+ */
+ if (flag.atime)
+ flag_dup.atime = 0;
+
+ ret = posix_set_mdata_xattr(this, real_path_out, fd_out, inode_out,
+ &frame->root->ctime, stbuf_out, &flag_dup,
+ _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata failed on file: %s gfid:%s", real_path_out,
+ inode_out ? uuid_utoa(inode_out->gfid) : "No inode");
+ }
+
+ /*
+ * For the source file, no need to change the mtime and ctime.
+ * For source file, it is only read operation. So, if at all
+ * anything needs to be updated, it is only the atime.
+ */
+ if (flag.atime)
+ flag_dup.atime = flag.atime;
+ flag_dup.mtime = 0;
+ flag_dup.ctime = 0;
+
+ ret = posix_set_mdata_xattr(this, real_path_in, fd_out, inode_out,
+ &frame->root->ctime, stbuf_out, &flag_dup,
+ _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED,
+ "posix set mdata failed on file: %s gfid:%s", real_path_in,
+ inode_in ? uuid_utoa(inode_in->gfid) : "No inode");
+ }
+ }
+out:
+ return;
+}
diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h
index e1b549d55a1..3416148ea97 100644
--- a/xlators/storage/posix/src/posix-metadata.h
+++ b/xlators/storage/posix/src/posix-metadata.h
@@ -48,5 +48,10 @@ void
posix_set_parent_ctime(call_frame_t *frame, xlator_t *this,
const char *real_path, int fd, inode_t *inode,
struct iatt *stbuf);
+void
+posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this,
+ const char *real_path_in, int fd_in, inode_t *inode_in,
+ struct iatt *stbuf_in, const char *read_path_put,
+ int fd_out, inode_t *inode_out, struct iatt *stbuf_out);
#endif /* _POSIX_METADATA_H */
diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c
index d6a20135f96..42b965434b9 100644
--- a/xlators/storage/posix/src/posix.c
+++ b/xlators/storage/posix/src/posix.c
@@ -76,6 +76,7 @@ struct xlator_fops fops = {
.seek = posix_seek,
.lease = posix_lease,
.put = posix_put,
+ .copy_file_range = posix_copy_file_range,
};
struct xlator_cbks cbks = {
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index 1f1d4fc2774..a1ec996f4b2 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -638,6 +638,11 @@ posix_put(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata);
int32_t
+posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, dict_t *xdata);
+
+int32_t
posix_set_mode_in_dict(dict_t *in_dict, dict_t *out_dict,
struct iatt *in_stbuf);
@@ -656,5 +661,7 @@ int
posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd,
struct iatt *buf, const char *realpath, dict_t *xattr_req,
dict_t **xattr_rsp, gf_boolean_t ignore_failure);
+int
+posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno);
#endif /* _POSIX_H */