diff options
57 files changed, 1911 insertions, 19 deletions
diff --git a/api/src/Makefile.am b/api/src/Makefile.am index 6ed30bc99f6..7f9a7d17b35 100644 --- a/api/src/Makefile.am +++ b/api/src/Makefile.am @@ -19,7 +19,7 @@ libgfapi_la_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \          -I$(top_srcdir)/rpc/xdr/src \          -I$(top_builddir)/rpc/xdr/src \          -DDATADIR=\"$(localstatedir)\" \ -        -D__USE_FILE_OFFSET64 +        -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64  AM_CFLAGS = -Wall $(GF_CFLAGS) diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases index a71422c8577..0e52c38d346 100644 --- a/api/src/gfapi.aliases +++ b/api/src/gfapi.aliases @@ -188,3 +188,4 @@ _pub_glfs_ftruncate _glfs_ftruncate$GFAPI_future  _pub_glfs_ftruncate_async _glfs_ftruncate_async$GFAPI_future  _pub_glfs_discard_async _glfs_discard_async$GFAPI_future  _pub_glfs_zerofill_async _glfs_zerofill_async$GFAPI_future +_pub_glfs_copy_file_range _glfs_copy_file_range$GFAPI_future
\ No newline at end of file diff --git a/api/src/gfapi.map b/api/src/gfapi.map index c47323781fb..1be2953ce9a 100644 --- a/api/src/gfapi.map +++ b/api/src/gfapi.map @@ -255,5 +255,6 @@ GFAPI_future {  		glfs_ftruncate_async;  		glfs_discard_async;  		glfs_zerofill_async; +		glfs_copy_file_range;  } GFAPI_4.1.6; diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c index 2a1cc73ccee..f59990aed1f 100644 --- a/api/src/glfs-fops.c +++ b/api/src/glfs-fops.c @@ -1333,6 +1333,161 @@ invalid_fs:  }  ssize_t +pub_glfs_copy_file_range(struct glfs_fd *glfd_in, off64_t *off_in, +                         struct glfs_fd *glfd_out, off64_t *off_out, size_t len, +                         unsigned int flags, struct stat *statbuf, +                         struct stat *prestat, struct stat *poststat) +{ +    xlator_t *subvol = NULL; +    int ret = -1; +    fd_t *fd_in = NULL; +    fd_t *fd_out = NULL; +    struct iatt preiatt = +                    { +                        0, +                    }, +                iattbuf = +                    { +                        0, +                    }, +                postiatt = { +                    0, +                }; +    dict_t *fop_attr = NULL; +    off64_t pos_in; +    off64_t pos_out; + +    DECLARE_OLD_THIS; +    __GLFS_ENTRY_VALIDATE_FD(glfd_in, invalid_fs); +    __GLFS_ENTRY_VALIDATE_FD(glfd_out, invalid_fs); + +    GF_REF_GET(glfd_in); +    GF_REF_GET(glfd_out); + +    if (glfd_in->fs != glfd_out->fs) { +        ret = -1; +        errno = EXDEV; +        goto out; +    } + +    subvol = glfs_active_subvol(glfd_in->fs); +    if (!subvol) { +        ret = -1; +        errno = EIO; +        goto out; +    } + +    fd_in = glfs_resolve_fd(glfd_in->fs, subvol, glfd_in); +    if (!fd_in) { +        ret = -1; +        errno = EBADFD; +        goto out; +    } + +    fd_out = glfs_resolve_fd(glfd_out->fs, subvol, glfd_out); +    if (!fd_out) { +        ret = -1; +        errno = EBADFD; +        goto out; +    } + +    /* +     * This is based on how the vfs layer in the kernel handles +     * copy_file_range call. Upon receiving it follows the +     * below method to consider the offset. +     * if (off_in != NULL) +     *    use the value off_in to perform the op +     * else if off_in == NULL +     *    use the current file offset position to perform the op +     * +     * For gfapi, glfd->offset is used. For a freshly opened +     * fd, the offset is set to 0. +     */ +    if (off_in) +        pos_in = *off_in; +    else +        pos_in = glfd_in->offset; + +    if (off_out) +        pos_out = *off_out; +    else +        pos_out = glfd_out->offset; + +    ret = get_fop_attr_thrd_key(&fop_attr); +    if (ret) +        gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed"); + +    ret = syncop_copy_file_range(subvol, fd_in, pos_in, fd_out, pos_out, len, +                                 flags, &iattbuf, &preiatt, &postiatt, fop_attr, +                                 NULL); +    DECODE_SYNCOP_ERR(ret); + +    if (ret >= 0) { +        pos_in += ret; +        pos_out += ret; + +        if (off_in) +            *off_in = pos_in; +        if (off_out) +            *off_out = pos_out; + +        if (statbuf) +            glfs_iatt_to_stat(glfd_in->fs, &iattbuf, statbuf); +        if (prestat) +            glfs_iatt_to_stat(glfd_in->fs, &preiatt, prestat); +        if (poststat) +            glfs_iatt_to_stat(glfd_in->fs, &postiatt, poststat); +    } + +    if (ret <= 0) +        goto out; + +    /* +     * If *off_in is NULL, then there is no offset info that can +     * obtained from the input argument. Hence follow below method. +     *  If *off_in is NULL, then +     *     glfd->offset = offset + ret; +     * else +     *     do nothing. +     * +     * According to the man page of copy_file_range, if off_in is +     * NULL, then the offset of the source file is advanced by +     * the return value of the fop. The same applies to off_out as +     * well. Otherwise, if *off_in is not NULL, then the offset +     * is not advanced by the filesystem. The entity which sends +     * the copy_file_range call is supposed to advance the offset +     * value in its buffer (pointed to by *off_in or *off_out) +     * by the return value of copy_file_range. +     */ +    if (!off_in) +        glfd_in->offset += ret; + +    if (!off_out) +        glfd_out->offset += ret; + +out: +    if (fd_in) +        fd_unref(fd_in); +    if (fd_out) +        fd_unref(fd_out); +    if (glfd_in) +        GF_REF_PUT(glfd_in); +    if (glfd_out) +        GF_REF_PUT(glfd_out); +    if (fop_attr) +        dict_unref(fop_attr); + +    glfs_subvol_done(glfd_in->fs, subvol); + +    __GLFS_EXIT_FS; + +invalid_fs: +    return ret; +} + +GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_copy_file_range, future); + +ssize_t  pub_glfs_pwritev(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,                   off_t offset, int flags)  { diff --git a/api/src/glfs.h b/api/src/glfs.h index cd642a5ea20..160a784222f 100644 --- a/api/src/glfs.h +++ b/api/src/glfs.h @@ -42,6 +42,38 @@  #include <sys/statvfs.h>  #include <inttypes.h> +/* + * For off64_t to be defined, we need both + * __USE_LARGEFILE64 to be true and __off64_t_defnined to be + * false. But, making __USE_LARGEFILE64 true causes other issues + * such as redinition of stat and fstat to stat64 and fstat64 + * respectively which again causes compilation issues. + * Without off64_t being defined, this will not compile as + * copy_file_range uses off64_t. Hence define it here. First + * check whether __off64_t_defined is true or not. <unistd.h> + * sets that flag when it defines off64_t. If __off64_t_defined + * is false and __USE_FILE_OFFSET64 is true, then go on to define + * off64_t using __off64_t. + */ +#ifndef GF_BSD_HOST_OS +#if defined(__USE_FILE_OFFSET64) && !defined(__off64_t_defined) +typedef __off64_t off64_t; +#endif /* defined(__USE_FILE_OFFSET64) && !defined(__off64_t_defined) */ +#else +#include <stdio.h> +#ifndef _OFF64_T_DECLARED +/* + * Including <stdio.h> (done above) should actually define + * _OFF64_T_DECLARED with off64_t data type being available + * for consumption. But, off64_t data type is not recognizable + * for FreeBSD versions less than 11. Hence, int64_t is typedefed + * to off64_t. + */ +#define _OFF64_T_DECLARED +typedef int64_t off64_t; +#endif /* _OFF64_T_DECLARED */ +#endif /* GF_BSD_HOST_OS */ +  #if defined(HAVE_SYS_ACL_H) || (defined(USE_POSIX_ACLS) && USE_POSIX_ACLS)  #include <sys/acl.h>  #else @@ -594,6 +626,13 @@ off_t  glfs_lseek(glfs_fd_t *fd, off_t offset, int whence) __THROW      GFAPI_PUBLIC(glfs_lseek, 3.4.0); +ssize_t +glfs_copy_file_range(struct glfs_fd *glfd_in, off64_t *off_in, +                     struct glfs_fd *glfd_out, off64_t *off_out, size_t len, +                     unsigned int flags, struct stat *statbuf, +                     struct stat *prestat, struct stat *poststat) __THROW +    GFAPI_PUBLIC(glfs_copy_file_range, future); +  int  glfs_truncate(glfs_t *fs, const char *path, off_t length) __THROW      GFAPI_PUBLIC(glfs_truncate, 3.7.15); diff --git a/configure.ac b/configure.ac index 3ddb6f073a5..d3c8f8b9514 100644 --- a/configure.ac +++ b/configure.ac @@ -1018,6 +1018,25 @@ if test "x${have_posix_fallocate}" = "xyes"; then     AC_DEFINE(HAVE_POSIX_FALLOCATE, 1, [define if posix_fallocate exists])  fi +# On fedora-29, copy_file_range syscall and the libc API both are present. +# Whereas, on some machines such as centos-7, RHEL-7, the API is not there. +# Only the system call is present. So, this change is to determine whether +# the API is present or not. If not, then check whether the system call is +# present or not. Accordingly sys_copy_file_range function will first call +# the API if it is there. Otherwise it will call syscall(SYS_copy_file_range). +AC_CHECK_FUNC([copy_file_range], [have_copy_file_range=yes]) +if test "x${have_copy_file_range}" = "xyes"; then +   AC_DEFINE(HAVE_COPY_FILE_RANGE, 1, [define if copy_file_range exists]) +else +   OLD_CFLAGS=${CFLAGS} +   CFLAGS="-D_GNU_SOURCE" +   AC_CHECK_DECL([SYS_copy_file_range], , , [#include <sys/syscall.h>]) +   if test "x${ac_cv_have_decl_SYS_copy_file_range}" = "xyes"; then +      AC_DEFINE(HAVE_COPY_FILE_RANGE_SYS, 1, [define if SYS_copy_file_range is available]) +   fi +   CFLAGS=${OLD_CFLAGS} +fi +  BUILD_NANOSECOND_TIMESTAMPS=no  AC_CHECK_FUNC([utimensat], [have_utimensat=yes])  if test "x${have_utimensat}" = "xyes"; then diff --git a/glusterfs-api.pc.in b/glusterfs-api.pc.in index 6af4e108f7f..4a2edb7bf07 100644 --- a/glusterfs-api.pc.in +++ b/glusterfs-api.pc.in @@ -9,4 +9,4 @@ Description: GlusterFS API  Version: @GFAPI_VERSION@  Requires: @PKGCONFIG_UUID@  Libs: -L${libdir} @GFAPI_LIBS@ -lgfapi -lglusterfs -lgfrpc -lgfxdr -Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -DUSE_POSIX_ACLS=@USE_POSIX_ACLS@ +Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 -DUSE_POSIX_ACLS=@USE_POSIX_ACLS@ diff --git a/libgfchangelog.pc.in b/libgfchangelog.pc.in index e2ff1fb6214..79eac2ad2d3 100644 --- a/libgfchangelog.pc.in +++ b/libgfchangelog.pc.in @@ -9,4 +9,4 @@ Description: GlusterFS Changelog Consumer Library  Version: @LIBGFCHANGELOG_VERSION@  Requires: @PKGCONFIG_UUID@  Libs: -L${libdir} -lgfchangelog -lglusterfs -Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 +Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am index 1d06f1586a9..970f4b74978 100644 --- a/libglusterfs/src/Makefile.am +++ b/libglusterfs/src/Makefile.am @@ -6,7 +6,7 @@ libglusterfs_la_CFLAGS = $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \  libglusterfs_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \  	-DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \  	-DXLATORPARENTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)\" \ -	-DXXH_NAMESPACE=GF_ \ +	-DXXH_NAMESPACE=GF_ -D__USE_LARGEFILE64 \  	-I$(top_srcdir)/rpc/xdr/src/ -I$(top_builddir)/rpc/xdr/src/ \  	-I$(top_srcdir)/rpc/rpc-lib/src/ -I$(CONTRIBDIR)/rbtree \  	-I$(CONTRIBDIR)/libexecinfo ${ARGP_STANDALONE_CPPFLAGS} \ diff --git a/libglusterfs/src/call-stub.c b/libglusterfs/src/call-stub.c index 96454dfaeb5..886dfa52ccc 100644 --- a/libglusterfs/src/call-stub.c +++ b/libglusterfs/src/call-stub.c @@ -1818,6 +1818,51 @@ out:  }  call_stub_t * +fop_copy_file_range_stub(call_frame_t *frame, fop_copy_file_range_t fn, +                         fd_t *fd_in, off64_t off_in, fd_t *fd_out, +                         off64_t off_out, size_t len, uint32_t flags, +                         dict_t *xdata) +{ +    call_stub_t *stub = NULL; + +    GF_VALIDATE_OR_GOTO("call-stub", frame, out); +    GF_VALIDATE_OR_GOTO("call-stub", fn, out); + +    stub = stub_new(frame, 1, GF_FOP_COPY_FILE_RANGE); +    GF_VALIDATE_OR_GOTO("call-stub", stub, out); + +    stub->fn.copy_file_range = fn; + +    args_copy_file_range_store(&stub->args, fd_in, off_in, fd_out, off_out, len, +                               flags, xdata); + +out: +    return stub; +} + +call_stub_t * +fop_copy_file_range_cbk_stub(call_frame_t *frame, fop_copy_file_range_cbk_t fn, +                             int32_t op_ret, int32_t op_errno, +                             struct iatt *stbuf, struct iatt *prebuf_dst, +                             struct iatt *postbuf_dst, dict_t *xdata) +{ +    call_stub_t *stub = NULL; + +    GF_VALIDATE_OR_GOTO("call-stub", frame, out); +    GF_VALIDATE_OR_GOTO("call-stub", fn, out); + +    stub = stub_new(frame, 0, GF_FOP_COPY_FILE_RANGE); +    GF_VALIDATE_OR_GOTO("call-stub", stub, out); + +    stub->fn_cbk.copy_file_range = fn; +    args_copy_file_range_cbk_store(&stub->args_cbk, op_ret, op_errno, stbuf, +                                   prebuf_dst, postbuf_dst, xdata); + +out: +    return stub; +} + +call_stub_t *  fop_put_stub(call_frame_t *frame, fop_put_t fn, loc_t *loc, mode_t mode,               mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,               off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata) @@ -2213,6 +2258,13 @@ call_resume_wind(call_stub_t *stub)                           stub->args.iobref, stub->args.xattr, stub->args.xdata);              break; +        case GF_FOP_COPY_FILE_RANGE: +            stub->fn.copy_file_range( +                stub->frame, stub->frame->this, stub->args.fd, +                stub->args.off_in, stub->args.fd_dst, stub->args.off_out, +                stub->args.size, stub->args.flags, stub->args.xdata); +            break; +          default:              gf_msg_callingfn("call-stub", GF_LOG_ERROR, EINVAL,                               LG_MSG_INVALID_ENTRY, @@ -2439,6 +2491,12 @@ call_resume_unwind(call_stub_t *stub)                          stub->args_cbk.xdata);              break; +        case GF_FOP_COPY_FILE_RANGE: +            STUB_UNWIND(stub, copy_file_range, &stub->args_cbk.stat, +                        &stub->args_cbk.prestat, &stub->args_cbk.poststat, +                        stub->args_cbk.xdata); +            break; +          default:              gf_msg_callingfn("call-stub", GF_LOG_ERROR, EINVAL,                               LG_MSG_INVALID_ENTRY, diff --git a/libglusterfs/src/default-args.c b/libglusterfs/src/default-args.c index 479974e1637..cfceabd1f46 100644 --- a/libglusterfs/src/default-args.c +++ b/libglusterfs/src/default-args.c @@ -1541,6 +1541,48 @@ args_namelink_store(default_args_t *args, loc_t *loc, dict_t *xdata)      return 0;  } +int +args_copy_file_range_store(default_args_t *args, fd_t *fd_in, off64_t off_in, +                           fd_t *fd_out, off64_t off_out, size_t len, +                           uint32_t flags, dict_t *xdata) +{ +    if (fd_in) +        args->fd = fd_ref(fd_in); +    if (fd_out) +        args->fd_dst = fd_ref(fd_out); +    args->size = len; +    args->off_in = off_in; +    args->off_out = off_out; +    args->flags = flags; + +    if (xdata) +        args->xdata = dict_ref(xdata); + +    return 0; +} + +int +args_copy_file_range_cbk_store(default_args_cbk_t *args, int32_t op_ret, +                               int32_t op_errno, struct iatt *stbuf, +                               struct iatt *prebuf_dst, +                               struct iatt *postbuf_dst, dict_t *xdata) +{ +    args->op_ret = op_ret; +    args->op_errno = op_errno; +    if (op_ret >= 0) { +        if (postbuf_dst) +            args->poststat = *postbuf_dst; +        if (prebuf_dst) +            args->prestat = *prebuf_dst; +        if (stbuf) +            args->stat = *stbuf; +    } +    if (xdata) +        args->xdata = dict_ref(xdata); + +    return 0; +} +  void  args_cbk_wipe(default_args_cbk_t *args_cbk)  { diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c index 97de8193dcb..5bf64e8c6c6 100644 --- a/libglusterfs/src/defaults-tmpl.c +++ b/libglusterfs/src/defaults-tmpl.c @@ -84,6 +84,7 @@ struct xlator_fops _default_fops = {      .put = default_put,      .icreate = default_icreate,      .namelink = default_namelink, +    .copy_file_range = default_copy_file_range,  };  struct xlator_fops *default_fops = &_default_fops; diff --git a/libglusterfs/src/generator.py b/libglusterfs/src/generator.py index c17d450502d..5b7aa4764a0 100755 --- a/libglusterfs/src/generator.py +++ b/libglusterfs/src/generator.py @@ -599,6 +599,19 @@ ops['namelink'] = (  	('cbk-arg',     'xdata',                 'dict_t *'),  ) +ops['copy_file_range'] = ( +        ('fop-arg',     'fd_in',                 'fd_t *'), +        ('fop-arg',     'off_in',                'off64_t '), +        ('fop-arg',     'fd_out',                'fd_t *'), +        ('fop-arg',     'off_out',               'off64_t '), +        ('fop-arg',     'len',                   'size_t'), +        ('fop-arg',     'flags',                 'uint32_t'), +        ('fop-arg',     'xdata',                 'dict_t *'), +        ('cbk-arg',     'stbuf',                 'struct iatt *'), +        ('cbk-arg',     'prebuf_dst',            'struct iatt *'), +        ('cbk-arg',     'postbuf_dst',           'struct iatt *'), +        ('cbk-arg',     'xdata',                 'dict_t *'), +)  #####################################################################  xlator_cbks['forget'] = (  	('fn-arg',      'this',        'xlator_t *'), diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c index 35482545ab3..4fec0638926 100644 --- a/libglusterfs/src/globals.c +++ b/libglusterfs/src/globals.c @@ -77,6 +77,7 @@ const char *gf_fop_list[GF_FOP_MAXVALUE] = {      [GF_FOP_PUT] = "PUT",      [GF_FOP_ICREATE] = "ICREATE",      [GF_FOP_NAMELINK] = "NAMELINK", +    [GF_FOP_COPY_FILE_RANGE] = "COPY_FILE_RANGE",  };  const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = { diff --git a/libglusterfs/src/glusterfs/call-stub.h b/libglusterfs/src/glusterfs/call-stub.h index bfed0fbc14a..c01c935e73d 100644 --- a/libglusterfs/src/glusterfs/call-stub.h +++ b/libglusterfs/src/glusterfs/call-stub.h @@ -81,6 +81,7 @@ typedef struct _call_stub {          fop_put_t put;          fop_icreate_t icreate;          fop_namelink_t namelink; +        fop_copy_file_range_t copy_file_range;      } fn;      union { @@ -136,6 +137,7 @@ typedef struct _call_stub {          fop_put_cbk_t put;          fop_icreate_cbk_t icreate;          fop_namelink_cbk_t namelink; +        fop_copy_file_range_cbk_t copy_file_range;      } fn_cbk;      default_args_t args; @@ -589,6 +591,18 @@ fop_namelink_cbk_stub(call_frame_t *frame, fop_namelink_cbk_t fn,                        int32_t op_ret, int32_t op_errno, struct iatt *prebuf,                        struct iatt *postbuf, dict_t *xdata); +call_stub_t * +fop_copy_file_range_stub(call_frame_t *frame, fop_copy_file_range_t fn, +                         fd_t *fd_in, off64_t off_in, fd_t *fd_out, +                         off64_t off_out, size_t len, uint32_t flags, +                         dict_t *xdata); + +call_stub_t * +fop_copy_file_range_cbk_stub(call_frame_t *frame, fop_copy_file_range_cbk_t fn, +                             int32_t op_ret, int32_t op_errno, +                             struct iatt *stbuf, struct iatt *prebuf_dst, +                             struct iatt *postbuf_dst, dict_t *xdata); +  void  call_resume(call_stub_t *stub);  void diff --git a/libglusterfs/src/glusterfs/compat.h b/libglusterfs/src/glusterfs/compat.h index 38c07b5ae7c..9374b79f9af 100644 --- a/libglusterfs/src/glusterfs/compat.h +++ b/libglusterfs/src/glusterfs/compat.h @@ -116,6 +116,25 @@  #include <limits.h>  #include <libgen.h> +/* + * This is where things like off64_t are defined. + * So include it before declaring _OFF64_T_DECLARED. + * If the freebsd version has support for off64_t + * including stdio.h should be sufficient. + */ +#include <stdio.h> + +#ifndef _OFF64_T_DECLARED +/* + * Including <stdio.h> (done above) should actually define + * _OFF64_T_DECLARED with off64_t data type being available + * for consumption. But, off64_t data type is not recognizable + * for FreeBSD versions less than 11. Hence, int64_t is typedefed + * to off64_t. + */ +#define _OFF64_T_DECLARED +typedef int64_t off64_t; +#endif /* _OFF64_T_DECLARED */  #ifndef XATTR_CREATE  enum { diff --git a/libglusterfs/src/glusterfs/default-args.h b/libglusterfs/src/glusterfs/default-args.h index f15f558202b..ca7526fcab6 100644 --- a/libglusterfs/src/glusterfs/default-args.h +++ b/libglusterfs/src/glusterfs/default-args.h @@ -234,6 +234,12 @@ void  args_lease_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,                       struct gf_lease *lease, dict_t *xdata); +int +args_copy_file_range_cbk_store(default_args_cbk_t *args, int32_t op_ret, +                               int32_t op_errno, struct iatt *stbuf, +                               struct iatt *prebuf_dst, +                               struct iatt *postbuf_dst, dict_t *xdata); +  void  args_cbk_wipe(default_args_cbk_t *args_cbk); @@ -439,6 +445,11 @@ args_icreate_store(default_args_t *args, loc_t *loc, mode_t mode,  int  args_namelink_store(default_args_t *args, loc_t *loc, dict_t *xdata); +int +args_copy_file_range_store(default_args_t *args, fd_t *fd_in, off64_t off_in, +                           fd_t *fd_out, off_t off64_out, size_t len, +                           uint32_t flags, dict_t *xdata); +  void  args_cbk_init(default_args_cbk_t *args_cbk);  #endif /* _DEFAULT_ARGS_H */ diff --git a/libglusterfs/src/glusterfs/defaults.h b/libglusterfs/src/glusterfs/defaults.h index 5d6b8e28a51..5a818eeb91a 100644 --- a/libglusterfs/src/glusterfs/defaults.h +++ b/libglusterfs/src/glusterfs/defaults.h @@ -48,10 +48,20 @@ typedef struct {  } default_args_cbk_t;  typedef struct { -    loc_t loc;  /* @old in rename(), link() */ -    loc_t loc2; /* @new in rename(), link() */ -    fd_t *fd; +    loc_t loc;    /* @old in rename(), link() */ +    loc_t loc2;   /* @new in rename(), link() */ +    fd_t *fd;     /* for all the fd based ops */ +    fd_t *fd_dst; /* Only for copy_file_range destination */      off_t offset; +    /* +     * According to the man page of copy_file_range, +     * the offsets for source and destination file +     * are of type loff_t. But the type loff_t is +     * linux specific and is actual a typedef of +     * off64_t. +     */ +    off64_t off_in;  /* For copy_file_range source fd */ +    off64_t off_out; /* For copy_file_range destination fd only */      int mask;      size_t size;      mode_t mode; @@ -323,6 +333,11 @@ int32_t  default_namelink(call_frame_t *frame, xlator_t *this, loc_t *loc,                   dict_t *xdata); +int32_t +default_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, +                        off64_t off_in, fd_t *fd_out, off64_t off_out, +                        size_t len, uint32_t flags, dict_t *xdata); +  /* Resume */  int32_t  default_getspec_resume(call_frame_t *frame, xlator_t *this, const char *key, @@ -542,6 +557,11 @@ default_put_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,                     int32_t count, off_t off, struct iobref *iobref,                     dict_t *xattr, dict_t *xdata); +int32_t +default_copy_file_range_resume(call_frame_t *frame, xlator_t *this, fd_t *fd_in, +                               off_t off64_in, fd_t *fd_out, off64_t off_out, +                               size_t len, uint32_t flags, dict_t *xdata); +  /* _cbk_resume */  int32_t @@ -813,6 +833,13 @@ int32_t  default_namelink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,                          dict_t *xdata); +int32_t +default_copy_file_range_cbk_resume(call_frame_t *frame, void *cookie, +                                   xlator_t *this, int32_t op_ret, +                                   int32_t op_errno, struct iatt *stbuf, +                                   struct iatt *prebuf_dst, +                                   struct iatt *postbuf_dst, dict_t *xdata); +  /* _CBK */  int32_t  default_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, @@ -1072,6 +1099,12 @@ default_namelink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,                       struct iatt *postbuf, dict_t *xdata);  int32_t +default_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                            int32_t op_ret, int32_t op_errno, +                            struct iatt *stbuf, struct iatt *prebuf_dst, +                            struct iatt *postbuf_dst, dict_t *xdata); + +int32_t  default_lookup_failure_cbk(call_frame_t *frame, int32_t op_errno);  int32_t @@ -1231,6 +1264,9 @@ int32_t  default_namelink_failure_cbk(call_frame_t *frame, int32_t op_errno);  int32_t +default_copy_file_range_failure_cbk(call_frame_t *frame, int32_t op_errno); + +int32_t  default_mem_acct_init(xlator_t *this);  void diff --git a/libglusterfs/src/glusterfs/syncop.h b/libglusterfs/src/glusterfs/syncop.h index 203abe92b57..7a6167b0488 100644 --- a/libglusterfs/src/glusterfs/syncop.h +++ b/libglusterfs/src/glusterfs/syncop.h @@ -138,8 +138,19 @@ typedef struct syncbarrier syncbarrier_t;  struct syncargs {      int op_ret;      int op_errno; + +    /* +     * The below 3 iatt structures are used in the fops +     * whose callbacks get struct iatt as one of the +     * a return arguments. Currently, the maximum number +     * of iatt structures returned is 3 for some fops +     * such as mknod, copy_file_range, mkdir etc. So +     * all the following 3 iatt structures would be used +     * for those fops. +     */      struct iatt iatt1;      struct iatt iatt2; +    struct iatt iatt3;      dict_t *xattr;      struct statvfs statvfs_buf;      struct iovec *vector; @@ -634,4 +645,17 @@ syncop_entrylk(xlator_t *subvol, const char *volume, loc_t *loc,                 const char *basename, entrylk_cmd cmd, entrylk_type type,                 dict_t *xdata_in, dict_t **xdata_out); +int +syncop_copy_file_range(xlator_t *subvol, fd_t *fd_in, off64_t off_in, +                       fd_t *fd_out, off64_t off_out, size_t len, +                       uint32_t flags, struct iatt *stbuf, +                       struct iatt *preiatt_dst, struct iatt *postiatt_dst, +                       dict_t *xdata_in, dict_t **xdata_out); + +int +syncop_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                           int op_ret, int op_errno, struct iatt *stbuf, +                           struct iatt *prebuf_dst, struct iatt *postbuf_dst, +                           dict_t *xdata); +  #endif /* _SYNCOP_H */ diff --git a/libglusterfs/src/glusterfs/syscall.h b/libglusterfs/src/glusterfs/syscall.h index faaf694b22c..6b33c141a5e 100644 --- a/libglusterfs/src/glusterfs/syscall.h +++ b/libglusterfs/src/glusterfs/syscall.h @@ -17,6 +17,7 @@  #include <sys/stat.h>  #include <sys/time.h>  #include <sys/socket.h> +#include <stdio.h>  /* GF follows the Linux XATTR definition, which differs in Darwin. */  #define GF_XATTR_CREATE 0x1  /* set value, fail if attr already exists */ @@ -228,4 +229,32 @@ sys_socket(int domain, int type, int protocol);  int  sys_accept(int sock, struct sockaddr *sockaddr, socklen_t *socklen, int flags); +#ifdef GF_BSD_HOST_OS +#ifndef _OFF64_T_DECLARED +/* + * Including <stdio.h> (done above) should actually define + * _OFF64_T_DECLARED with off64_t data type being available + * for consumption. But, off64_t data type is not recognizable + * for FreeBSD versions less than 11. Hence, int64_t is typedefed + * to off64_t. + */ +#define _OFF64_T_DECLARED +typedef int64_t off64_t; +#endif /* _OFF64_T_DECLARED */ +#endif /* GF_BSD_HOST_OS */ + +/* + * According to the man page of copy_file_range, both off_in and off_out are + * pointers to the data type loff_t (i.e. loff_t *). But, freebsd does not + * have (and recognize) loff_t. Since loff_t is 64 bits, use off64_t + * instead.  Since it's a pointer type it should be okay. It just needs + * to be a pointer-to-64-bit pointer for both 32- and 64-bit platforms. + * off64_t is recognized by freebsd. + * TODO: In future, when freebsd can recognize loff_t, probably revisit this + *       and change the off_in and off_out to (loff_t *). + */ +ssize_t +sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out, +                    size_t len, unsigned int flags); +  #endif /* __SYSCALL_H__ */ diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h index 4137d12eb27..12d507bc021 100644 --- a/libglusterfs/src/glusterfs/xlator.h +++ b/libglusterfs/src/glusterfs/xlator.h @@ -23,6 +23,7 @@  #include "glusterfs/list.h"  #include "glusterfs/latency.h"  #include "glusterfs/compat-uuid.h" +#include "glusterfs/syscall.h"  #define FIRST_CHILD(xl) (xl->children->xlator)  #define SECOND_CHILD(xl) (xl->children->next->xlator) @@ -354,6 +355,11 @@ typedef int32_t (*fop_namelink_cbk_t)(call_frame_t *frame, void *cookie,                                        int32_t op_errno, struct iatt *prebuf,                                        struct iatt *postbuf, dict_t *xdata); +typedef int32_t (*fop_copy_file_range_cbk_t)( +    call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, +    int32_t op_errno, struct iatt *stbuf, struct iatt *prebuf_dst, +    struct iatt *postbuf_dst, dict_t *xdata); +  typedef int32_t (*fop_lookup_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,                                  dict_t *xdata); @@ -544,6 +550,11 @@ typedef int32_t (*fop_icreate_t)(call_frame_t *frame, xlator_t *this,  typedef int32_t (*fop_namelink_t)(call_frame_t *frame, xlator_t *this,                                    loc_t *loc, dict_t *xdata); +typedef int32_t (*fop_copy_file_range_t)(call_frame_t *frame, xlator_t *this, +                                         fd_t *fd_in, off64_t off_in, +                                         fd_t *fd_out, off64_t off_out, +                                         size_t len, uint32_t flags, +                                         dict_t *xdata);  /* WARNING: make sure the list is in order with FOP definition in     `rpc/xdr/src/glusterfs-fops.x`. @@ -609,6 +620,7 @@ struct xlator_fops {      fop_put_t put;      fop_icreate_t icreate;      fop_namelink_t namelink; +    fop_copy_file_range_t copy_file_range;      /* these entries are used for a typechecking hack in STACK_WIND _only_ */      /* make sure to add _cbk variables only after defining regular fops as @@ -673,6 +685,7 @@ struct xlator_fops {      fop_put_cbk_t put_cbk;      fop_icreate_cbk_t icreate_cbk;      fop_namelink_cbk_t namelink_cbk; +    fop_copy_file_range_cbk_t copy_file_range_cbk;  };  typedef int32_t (*cbk_forget_t)(xlator_t *this, inode_t *inode); diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym index baf44de64ad..6ca6a639456 100644 --- a/libglusterfs/src/libglusterfs.sym +++ b/libglusterfs/src/libglusterfs.sym @@ -92,6 +92,8 @@ args_xattrop_cbk_store  args_xattrop_store  args_zerofill_cbk_store  args_zerofill_store +args_copy_file_range_cbk_store +args_copy_file_range_store  bin_to_data  call_resume  call_resume_keep_stub @@ -351,6 +353,10 @@ default_put  default_put_cbk  default_put_failure_cbk  default_put_resume +default_copy_file_range +default_copy_file_range_cbk +default_copy_file_range_failure_cbk +default_copy_file_range_resume  __dentry_grep  dht_is_linkfile  dict_add @@ -471,6 +477,8 @@ fd_unref  _fini  fop_access_stub  fop_create_stub +fop_copy_file_range_stub +fop_copy_file_range_cbk_stub  fop_discard_stub  fop_entrylk_stub  fop_enum_to_pri_string @@ -933,6 +941,7 @@ synclock_unlock  syncop_access  syncop_close  syncop_create +syncop_copy_file_range  syncopctx_getctx  syncopctx_setfsgid  syncopctx_setfsgroups @@ -1006,6 +1015,7 @@ sys_chmod  sys_chown  sys_close  sys_closedir +sys_copy_file_range  sys_creat  sys_fallocate  sys_fchmod diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c index b70953725ce..bf70daf95c3 100644 --- a/libglusterfs/src/syncop.c +++ b/libglusterfs/src/syncop.c @@ -3397,4 +3397,65 @@ syncop_namelink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,      __wake(args);      return 0; -}
\ No newline at end of file +} + +int +syncop_copy_file_range(xlator_t *subvol, fd_t *fd_in, off64_t off_in, +                       fd_t *fd_out, off64_t off_out, size_t len, +                       uint32_t flags, struct iatt *stbuf, +                       struct iatt *preiatt_dst, struct iatt *postiatt_dst, +                       dict_t *xdata_in, dict_t **xdata_out) +{ +    struct syncargs args = { +        0, +    }; + +    SYNCOP(subvol, (&args), syncop_copy_file_range_cbk, +           subvol->fops->copy_file_range, fd_in, off_in, fd_out, off_out, len, +           flags, xdata_in); + +    if (stbuf) { +        *stbuf = args.iatt1; +    } +    if (preiatt_dst) { +        *preiatt_dst = args.iatt2; +    } +    if (postiatt_dst) { +        *postiatt_dst = args.iatt3; +    } + +    if (xdata_out) { +        *xdata_out = args.xdata; +    } else if (args.xdata) { +        dict_unref(args.xdata); +    } + +    errno = args.op_errno; +    return args.op_ret; +} + +int +syncop_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                           int op_ret, int op_errno, struct iatt *stbuf, +                           struct iatt *prebuf_dst, struct iatt *postbuf_dst, +                           dict_t *xdata) +{ +    struct syncargs *args = NULL; + +    args = cookie; + +    args->op_ret = op_ret; +    args->op_errno = op_errno; +    if (xdata) +        args->xdata = dict_ref(xdata); + +    if (op_ret >= 0) { +        args->iatt1 = *stbuf; +        args->iatt2 = *prebuf_dst; +        args->iatt3 = *postbuf_dst; +    } + +    __wake(args); + +    return 0; +} diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c index c72a8e16b34..1d88c8adac1 100644 --- a/libglusterfs/src/syscall.c +++ b/libglusterfs/src/syscall.c @@ -8,8 +8,8 @@    cases as published by the Free Software Foundation.  */ -#include "glusterfs/syscall.h"  #include "glusterfs/compat.h" +#include "glusterfs/syscall.h"  #include "glusterfs/mem-pool.h"  #include "glusterfs/libglusterfs-messages.h" @@ -19,6 +19,9 @@  #include <fcntl.h>  #include <unistd.h>  #include <stdarg.h> +#ifdef HAVE_COPY_FILE_RANGE_SYS +#include <sys/syscall.h> +#endif  #define FS_ERROR_LOG(result)                                                   \      do {                                                                       \ @@ -802,3 +805,30 @@ err:  #endif      return newsock;  } + +ssize_t +sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out, +                    size_t len, unsigned int flags) +{ +    /* +     * TODO: Add check for other platofrms like freebsd etc if this syscall is +     *       not generic. +     * This is what the function does. +     *       1) Check whether copy_file_range API is present. If so call it. +     *       2) If copy_file_range API is not present, then check whether +     *          the system call is there. If so, then use syscall to invoke +     *          SYS_copy_file_range system call. +     *       3) If neither of the above is present, then return ENOSYS. +     */ +#ifdef HAVE_COPY_FILE_RANGE +    return FS_RET_CHECK( +        copy_file_range(fd_in, off_in, fd_out, off_out, len, flags), errno); +#else +#ifdef HAVE_COPY_FILE_RANGE_SYS +    return syscall(SYS_copy_file_range, fd_in, off_in, fd_out, off_out, len, +                   flags); +#else +    return -ENOSYS; +#endif /* HAVE_COPY_FILE_RANGE_SYS */ +#endif /* HAVE_COPY_FILE_RANGE */ +} diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c index 1b7c9d46f88..b50848b3476 100644 --- a/libglusterfs/src/xlator.c +++ b/libglusterfs/src/xlator.c @@ -143,6 +143,7 @@ fill_defaults(xlator_t *xl)      SET_DEFAULT_FOP(getspec);      SET_DEFAULT_FOP(icreate);      SET_DEFAULT_FOP(namelink); +    SET_DEFAULT_FOP(copy_file_range);      if (!xl->cbks)          xl->cbks = &default_cbks; diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h index 4950857ae9e..779878f52be 100644 --- a/rpc/rpc-lib/src/protocol-common.h +++ b/rpc/rpc-lib/src/protocol-common.h @@ -68,6 +68,7 @@ enum gf_fop_procnum {      GFS3_OP_ICREATE,      GFS3_OP_NAMELINK,      GFS3_OP_PUT, +    GFS3_OP_COPY_FILE_RANGE,      GFS3_OP_MAXVALUE,  }; diff --git a/rpc/xdr/src/glusterfs-fops.x b/rpc/xdr/src/glusterfs-fops.x index ffb71d6418a..bacf07735f4 100644 --- a/rpc/xdr/src/glusterfs-fops.x +++ b/rpc/xdr/src/glusterfs-fops.x @@ -77,6 +77,7 @@ enum glusterfs_fop_t {          GF_FOP_PUT,          GF_FOP_ICREATE,          GF_FOP_NAMELINK, +        GF_FOP_COPY_FILE_RANGE,          GF_FOP_MAXVALUE  }; diff --git a/rpc/xdr/src/glusterfs4-xdr.x b/rpc/xdr/src/glusterfs4-xdr.x index c183dbcd704..dcea17fac68 100644 --- a/rpc/xdr/src/glusterfs4-xdr.x +++ b/rpc/xdr/src/glusterfs4-xdr.x @@ -628,6 +628,19 @@ struct gfx_seek_rsp {   struct gfx_setvolume_req {          gfx_dict dict;  }  ; + + struct   gfx_copy_file_range_req { +        opaque gfid1[16]; +        opaque gfid2[16]; +        quad_t  fd_in; +        quad_t  fd_out; +        u_quad_t   off_in; +        u_quad_t   off_out; +        unsigned int size; +        unsigned int flag; +        gfx_dict xdata; /* Extra data */ +}; +   struct  gfx_setvolume_rsp {          int    op_ret;          int    op_errno; diff --git a/rpc/xdr/src/libgfxdr.sym b/rpc/xdr/src/libgfxdr.sym index bd9131be7c6..22cdf30bfda 100644 --- a/rpc/xdr/src/libgfxdr.sym +++ b/rpc/xdr/src/libgfxdr.sym @@ -344,3 +344,4 @@ xdr_compound_req_v2  xdr_gfx_compound_req  xdr_compound_rsp_v2  xdr_gfx_compound_rsp +xdr_gfx_copy_file_range_req
\ No newline at end of file diff --git a/tests/basic/gfapi/gfapi-copy-file-range.t b/tests/basic/gfapi/gfapi-copy-file-range.t new file mode 100644 index 00000000000..c24c1433edf --- /dev/null +++ b/tests/basic/gfapi/gfapi-copy-file-range.t @@ -0,0 +1,80 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +TEST glusterd + +# for now, a xfs filesystem with reflink support is created. +# In future, better to make changes in MKFS_LOOP so that, +# once can create a xfs filesystem with reflink enabled in +# generic and simple way, instead of doing below steps each +# time. +TEST truncate -s 2G $B0/xfs_image +mkfs.xfs 2>&1 | grep reflink +if [ $? -eq 0 ]; then +    mkfs.xfs -f -i size=512 -m reflink=1 $B0/xfs_image; +else +    mkfs.xfs -f -i size=512 $B0/xfs_image; +fi + +TEST mkdir $B0/bricks +TEST mount -t xfs -o loop $B0/xfs_image $B0/bricks + +# Just a single brick volume. More test cases need to be +# added in future for distribute, replicate, +# distributed replicate and distributed replicated sharded +# volumes. +TEST $CLI volume create $V0 $H0:$B0/bricks/brick1; +EXPECT 'Created' volinfo_field $V0 'Status'; + +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 + +TEST dd if=/dev/urandom of=$M0/file bs=1M count=555; + +# check for the existence of the created file +TEST stat  $M0/file; + +# grab the size of the file +SRC_SIZE=$(stat -c %s $M0/file); + +logdir=`gluster --print-logdir` + +# TODO: +# For now, do not call copy-file-range utility. This is because, +# the regression machines are centos-7 based which does not have +# copy_file_range API available. So, instead of this testcase +# causing regression failures, for now, this is just a dummy test +# case. Uncomment the below tests (until volume stop) when there +# is support for copy_file_range in the regression machines. +# + +TEST build_tester $(dirname $0)/glfs-copy-file-range.c -lgfapi + +TEST ./$(dirname $0)/glfs-copy-file-range $H0 $V0 $logdir/gfapi-copy-file-range.log /file /new + +# check whether the destination file is created or not +TEST stat $M0/new + +# check the size of the destination file +DST_SIZE=$(stat -c %s $M0/new); + +# The sizes of the source and destination should be same. +# Atleast it ensures that, copy_file_range API is working +# as expected. Whether the actual cloning happened via reflink +# or a read/write happened is different matter. +TEST [ $SRC_SIZE == $DST_SIZE ]; + +cleanup_tester $(dirname $0)/glfs-copy-file-range + +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0 + +UMOUNT_LOOP $B0/bricks; + +cleanup; diff --git a/tests/basic/gfapi/glfs-copy-file-range.c b/tests/basic/gfapi/glfs-copy-file-range.c new file mode 100644 index 00000000000..756c38d21ec --- /dev/null +++ b/tests/basic/gfapi/glfs-copy-file-range.c @@ -0,0 +1,177 @@ +/* + Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <glusterfs/api/glfs.h> +#include <glusterfs/api/glfs-handles.h> +#include <string.h> +#include <time.h> +#include <libgen.h> + +static void +cleanup(glfs_t *fs) +{ +    if (!fs) +        return; +#if 0 +        /* glfs fini path is still racy and crashing the program. Since +         * this program any way has to die, we are not going to call fini +         * in the released versions. i.e. final builds. For all +         * internal testing lets enable this so that glfs_fini code +         * path becomes stable. */ +        glfs_fini (fs); +#endif +} + +int +main(int argc, char **argv) +{ +    glfs_t *fs = NULL; +    int ret = -1; +    char *volname = NULL; +    char *logfilepath = NULL; +    char *path_src = NULL; +    char *path_dst = NULL; +    glfs_fd_t *glfd_in = NULL; +    glfs_fd_t *glfd_out = NULL; +    char *volfile_server = NULL; + +    struct stat stbuf = { +        0, +    }; +    struct stat prestat_dst = { +        0, +    }; +    struct stat poststat_dst = { +        0, +    }; +    size_t len; + +    if (argc < 6) { +        printf("%s <volume> <log file path> <source> <destination>", argv[0]); +        ret = -1; +        goto out; +    } + +    volfile_server = argv[1]; +    volname = argv[2]; +    logfilepath = argv[3]; +    path_src = argv[4]; +    path_dst = argv[5]; + +    if (path_src[0] != '/') { +        fprintf(stderr, "source path %s is not absolute", path_src); +        errno = EINVAL; +        goto out; +    } + +    if (path_dst[0] != '/') { +        fprintf(stderr, "destination path %s is not absolute", path_dst); +        errno = EINVAL; +        goto out; +    } + +    fs = glfs_new(volname); +    if (!fs) { +        ret = -errno; +        fprintf(stderr, "Not able to initialize volume '%s'", volname); +        goto out; +    } + +    ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 24007); +    if (ret < 0) { +        ret = -errno; +        fprintf(stderr, +                "Failed to set the volfile server, " +                "%s", +                strerror(errno)); +        goto out; +    } + +    ret = glfs_set_logging(fs, logfilepath, 7); +    if (ret < 0) { +        ret = -errno; +        fprintf(stderr, +                "Failed to set the log file path, " +                "%s", +                strerror(errno)); +        goto out; +    } + +    ret = glfs_init(fs); +    if (ret < 0) { +        ret = -errno; +        if (errno == ENOENT) { +            fprintf(stderr, "Volume %s does not exist", volname); +        } else { +            fprintf(stderr, +                    "%s: Not able to fetch " +                    "volfile from glusterd", +                    volname); +        } +        goto out; +    } + +    glfd_in = glfs_open(fs, path_src, O_RDONLY | O_NONBLOCK); +    if (!glfd_in) { +        ret = -errno; +        goto out; +    } else { +        printf("OPEN_SRC: opening %s is success\n", path_src); +    } + +    glfd_out = glfs_creat(fs, path_dst, O_RDWR, 0644); +    if (!glfd_out) { +        fprintf(stderr, +                "FAILED_DST_OPEN: failed to " +                "open (create) %s (%s)\n", +                path_dst, strerror(errno)); +        ret = -errno; +        goto out; +    } else { +        printf("OPEN_DST: opening %s is success\n", path_dst); +    } + +    ret = glfs_fstat(glfd_in, &stbuf); +    if (ret < 0) { +        ret = -errno; +        goto out; +    } else { +        printf("FSTAT_SRC: fstat on %s is success\n", path_dst); +    } + +    len = stbuf.st_size; + +    do { +        ret = glfs_copy_file_range(glfd_in, NULL, glfd_out, NULL, len, 0, +                                   &stbuf, &prestat_dst, &poststat_dst); +        if (ret == -1) { +            fprintf(stderr, "copy_file_range failed with %s\n", +                    strerror(errno)); +            ret = -errno; +            break; +        } else { +            printf("copy_file_range successful\n"); +            len -= ret; +        } +    } while (len > 0); + +out: +    if (glfd_in) +        glfs_close(glfd_in); +    if (glfd_out) +        glfs_close(glfd_out); + +    cleanup(fs); + +    return ret; +} diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c index 7bf0d8a8f00..f40b00bba2d 100644 --- a/xlators/debug/io-stats/src/io-stats.c +++ b/xlators/debug/io-stats/src/io-stats.c @@ -2119,6 +2119,19 @@ io_stats_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,  }  int +io_stats_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                             int32_t op_ret, int32_t op_errno, +                             struct iatt *stbuf, struct iatt *prebuf_dst, +                             struct iatt *postbuf_dst, dict_t *xdata) +{ +    UPDATE_PROFILE_STATS(frame, COPY_FILE_RANGE); + +    STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, stbuf, +                        prebuf_dst, postbuf_dst, xdata); +    return 0; +} + +int  io_stats_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,                        int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,                        dict_t *xdata) @@ -2873,6 +2886,19 @@ io_stats_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,  }  int +io_stats_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, +                         off_t off_in, fd_t *fd_out, off_t off_out, size_t len, +                         uint32_t flags, dict_t *xdata) +{ +    START_FOP_LATENCY(frame); + +    STACK_WIND(frame, io_stats_copy_file_range_cbk, FIRST_CHILD(this), +               FIRST_CHILD(this)->fops->copy_file_range, fd_in, off_in, fd_out, +               off_out, len, flags, xdata); +    return 0; +} + +int  io_stats_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)  {      START_FOP_LATENCY(frame); @@ -4189,6 +4215,7 @@ struct xlator_fops fops = {      .getactivelk = io_stats_getactivelk,      .setactivelk = io_stats_setactivelk,      .compound = io_stats_compound, +    .copy_file_range = io_stats_copy_file_range,  };  struct xlator_cbks cbks = { diff --git a/xlators/features/changelog/lib/src/Makefile.am b/xlators/features/changelog/lib/src/Makefile.am index c4b9a3df692..c933ec53ed2 100644 --- a/xlators/features/changelog/lib/src/Makefile.am +++ b/xlators/features/changelog/lib/src/Makefile.am @@ -1,7 +1,7 @@  libgfchangelog_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \  	-DDATADIR=\"$(localstatedir)\" -libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -fpic \ +libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 -fpic \  	-I../../../src/ -I$(top_srcdir)/libglusterfs/src \  	-I$(top_srcdir)/xlators/features/changelog/src \  	-I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py index ab56dc9a4b3..8730a51d13e 100755 --- a/xlators/features/utime/src/utime-gen-fops-c.py +++ b/xlators/features/utime/src/utime-gen-fops-c.py @@ -62,6 +62,20 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,  }  """ +FOPS_COPY_FILE_RANGE_TEMPLATE = """ +int32_t +gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, +                @LONG_ARGS@) +{ +        gl_timespec_get(&frame->root->ctime); + +        (void) utime_update_attribute_flags(frame, this, GF_FOP_COPY_FILE_RANGE); +        STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this), +                    FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); +        return 0; +} +""" +  FOPS_SETATTR_TEMPLATE = """  int32_t  gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this, @@ -94,6 +108,7 @@ utime_ops = ['fallocate', 'zerofill', 'opendir', 'mknod', 'mkdir',  utime_read_op = ['readv']  utime_write_op = ['writev']  utime_setattr_ops = ['setattr', 'fsetattr'] +utime_copy_file_range_ops = ['copy_file_range']  def gen_defaults():      for name in ops: @@ -109,6 +124,9 @@ def gen_defaults():          if name in utime_setattr_ops:              print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs))              print(generate(FOPS_SETATTR_TEMPLATE, name, fop_subs)) +        if name in utime_copy_file_range_ops: +            print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs)) +            print(generate(FOPS_COPY_FILE_RANGE_TEMPLATE, name, fop_subs))  for l in open(sys.argv[1], 'r').readlines():      if l.find('#pragma generate') != -1: diff --git a/xlators/features/utime/src/utime-gen-fops-h.py b/xlators/features/utime/src/utime-gen-fops-h.py index 3686f2e3c1e..e96274c229a 100755 --- a/xlators/features/utime/src/utime-gen-fops-h.py +++ b/xlators/features/utime/src/utime-gen-fops-h.py @@ -18,7 +18,7 @@ gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,  utime_ops = ['fallocate', 'zerofill', 'opendir', 'mknod', 'mkdir',               'unlink', 'rmdir', 'symlink', 'rename', 'link', 'truncate',               'ftruncate', 'create', 'open', 'removexattr', 'fremovexattr', -             'readv', 'writev', 'setattr', 'fsetattr'] +             'readv', 'writev', 'setattr', 'fsetattr', 'copy_file_range']  def gen_defaults():      for name, value in ops.items(): diff --git a/xlators/features/utime/src/utime-helpers.c b/xlators/features/utime/src/utime-helpers.c index c79e12badfa..79cc0145f50 100644 --- a/xlators/features/utime/src/utime-helpers.c +++ b/xlators/features/utime/src/utime-helpers.c @@ -93,6 +93,15 @@ utime_update_attribute_flags(call_frame_t *frame, xlator_t *this,              frame->root->flags |= MDATA_CTIME;              break; +        case GF_FOP_COPY_FILE_RANGE: +            /* Below 2 are for destination fd */ +            frame->root->flags |= MDATA_CTIME; +            frame->root->flags |= MDATA_MTIME; +            /* Below flag is for the source fd */ +            if (!utime_priv->noatime) { +                frame->root->flags |= MDATA_ATIME; +            } +            break;          default:              frame->root->flags = 0;      } diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 3b2622b431f..3f4e19c211e 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -2993,6 +2993,116 @@ fuse_write(xlator_t *this, fuse_in_header_t *finh, void *msg,      return;  } +#if FUSE_KERNEL_MINOR_VERSION >= 28 +static int +fuse_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                         int32_t op_ret, int32_t op_errno, struct iatt *stbuf, +                         struct iatt *prebuf_dst, struct iatt *postbuf_dst, +                         dict_t *xdata) +{ +    fuse_state_t *state = NULL; +    fuse_in_header_t *finh = NULL; +    /* +     * Fuse kernel module uses fuse_write_out itself as the +     * output collector. In fact, fuse_kernel.h in the upstream +     * kernel just defines the input structure fuse_copy_file_range_in +     * for the fop. So, just use the fuse_write_out to send the +     * response back to the kernel. +     */ +    struct fuse_write_out fcfro = { +        0, +    }; + +    char src_gfid[GF_UUID_BUF_SIZE] = {0}; +    char dst_gfid[GF_UUID_BUF_SIZE] = {0}; + +    state = frame->root->state; +    finh = state->finh; + +    fuse_log_eh_fop(this, state, frame, op_ret, op_errno); + +    if (op_ret >= 0) { +        gf_log("glusterfs-fuse", GF_LOG_TRACE, +               "%" PRIu64 ": WRITE => %d/%" GF_PRI_SIZET ",%" PRIu64 +               " , %" PRIu64 " ,%" PRIu64 ",%" PRIu64, +               frame->root->unique, op_ret, state->size, state->off_in, +               state->off_out, stbuf->ia_size, postbuf_dst->ia_size); + +        fcfro.size = op_ret; +        send_fuse_obj(this, finh, &fcfro); +    } else { +        if (state->fd && state->fd->inode) +            uuid_utoa_r(state->fd->inode->gfid, src_gfid); +        else +            snprintf(src_gfid, sizeof(src_gfid), "nil"); + +        if (state->fd_dst && state->fd_dst->inode) +            uuid_utoa_r(state->fd_dst->inode->gfid, dst_gfid); +        else +            snprintf(dst_gfid, sizeof(dst_gfid), "nil"); + +        gf_log("glusterfs-fuse", GF_LOG_WARNING, +               "%" PRIu64 +               ": COPY_FILE_RANGE => -1 gfid_in=%s fd_in=%p " +               "gfid_out=%s fd_out=%p (%s)", +               frame->root->unique, src_gfid, state->fd, dst_gfid, +               state->fd_dst, strerror(op_errno)); + +        send_fuse_err(this, finh, op_errno); +    } + +    free_fuse_state(state); +    STACK_DESTROY(frame->root); + +    return 0; +} + +void +fuse_copy_file_range_resume(fuse_state_t *state) +{ +    gf_log("glusterfs-fuse", GF_LOG_TRACE, +           "%" PRIu64 +           ": COPY_FILE_RANGE " +           "(input fd: %p (gfid: %s), " +           "output fd: %p (gfid: %s) size=%zu, " +           "offset_in=%" PRIu64 ", offset_out=%" PRIu64 ")", +           state->finh->unique, state->fd, uuid_utoa(state->fd->inode->gfid), +           state->fd_dst, uuid_utoa(state->fd_dst->inode->gfid), state->size, +           state->off_in, state->off_out); + +    FUSE_FOP(state, fuse_copy_file_range_cbk, GF_FOP_COPY_FILE_RANGE, +             copy_file_range, state->fd, state->off_in, state->fd_dst, +             state->off_out, state->size, state->io_flags, state->xdata); +} + +static void +fuse_copy_file_range(xlator_t *this, fuse_in_header_t *finh, void *msg, +                     struct iobuf *iobuf) +{ +    struct fuse_copy_file_range_in *fcfri = msg; +    fuse_state_t *state = NULL; +    fd_t *fd_in = NULL; +    fd_t *fd_out = NULL; + +    GET_STATE(this, finh, state); + +    fd_in = FH_TO_FD(fcfri->fh_in); +    fd_out = FH_TO_FD(fcfri->fh_out); +    state->fd = fd_in; +    state->fd_dst = fd_out; + +    fuse_resolve_fd_init(state, &state->resolve, fd_in); +    fuse_resolve_fd_init(state, &state->resolve2, fd_out); + +    state->size = fcfri->len; +    state->off_in = fcfri->off_in; +    state->off_out = fcfri->off_out; +    state->io_flags = fcfri->flags; + +    fuse_resolve_and_resume(state, fuse_copy_file_range_resume); +} +#endif /* FUSE_KERNEL_MINOR_VERSION >= 28 */ +  #if FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE  static int  fuse_lseek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, @@ -6087,6 +6197,10 @@ static fuse_handler_t *fuse_std_ops[FUSE_OP_HIGH] = {  #if FUSE_KERNEL_MINOR_VERSION >= 24 && HAVE_SEEK_HOLE      [FUSE_LSEEK] = fuse_lseek,  #endif + +#if FUSE_KERNEL_MINOR_VERSION >= 28 +    [FUSE_COPY_FILE_RANGE] = fuse_copy_file_range, +#endif  };  static fuse_handler_t *fuse_dump_ops[FUSE_OP_HIGH]; diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h index 57380786f17..60702ab1da5 100644 --- a/xlators/mount/fuse/src/fuse-bridge.h +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -41,8 +41,32 @@  #include <glusterfs/gidcache.h>  #if defined(GF_LINUX_HOST_OS) || defined(__FreeBSD__) || defined(__NetBSD__) + +/* + * TODO: + * So, with the addition of copy_file_range support, it might + * require a bump up of fuse kernel minor version (like it was + * done when support for lseek fop was added. But, as of now, + * the copy_file_range support has just landed in upstream + * kernel fuse module. So, until, there is a release of that + * fuse as part of a kernel, the FUSE_KERNEL_MINOR_VERSION + * from fuse_kernel.h in the contrib might not be changed. + * If so, then the highest op available should be based on + * the current minor version (which is 24). So, selectively + * determine. When, the minor version is changed to 28 in + * fuse_kernel.h from contrib (because in upstream linux + * kernel source tree, the kernel minor version which + * contains support for copy_file_range is 28), then remove + * the reference to FUSE_LSEEK below and just determine + * FUSE_OP_HIGH based on copy_file_range. + */ +#if FUSE_KERNEL_MINOR_VERSION >= 28 +#define FUSE_OP_HIGH (FUSE_COPY_FILE_RANGE + 1) +#else  #define FUSE_OP_HIGH (FUSE_LSEEK + 1)  #endif + +#endif  #ifdef GF_DARWIN_HOST_OS  #define FUSE_OP_HIGH (FUSE_DESTROY + 1)  #endif @@ -400,10 +424,22 @@ typedef struct {      loc_t loc2;      fuse_in_header_t *finh;      int32_t flags; +      off_t off; +    /* +     * The man page of copy_file_range tells that the offset +     * arguments are of type loff_t *. Here in fuse state, the values of +     * those offsets are saved instead of pointers as the kernel sends +     * the values of the offsets from those pointers instead of pointers. +     * But the type loff_t is linux specific and is actually a typedef of +     * off64_t. Hence using off64_t +     */ +    off64_t off_in;  /* for copy_file_range source fd */ +    off64_t off_out; /* for copy_file_range destination fd */      size_t size;      unsigned long nlookup;      fd_t *fd; +    fd_t *fd_dst; /* for copy_file_range destination */      dict_t *xattr;      dict_t *xdata;      char *name; diff --git a/xlators/protocol/client/src/client-common.c b/xlators/protocol/client/src/client-common.c index 7708c820918..64db98d661b 100644 --- a/xlators/protocol/client/src/client-common.c +++ b/xlators/protocol/client/src/client-common.c @@ -2556,6 +2556,38 @@ out:  }  int +client_pre_copy_file_range_v2(xlator_t *this, gfx_copy_file_range_req *req, +                              fd_t *fd_in, off64_t off_in, fd_t *fd_out, +                              off64_t off_out, size_t size, int32_t flags, +                              dict_t **xdata) +{ +    int64_t remote_fd_in = -1; +    int64_t remote_fd_out = -1; +    int op_errno = ESTALE; + +    CLIENT_GET_REMOTE_FD(this, fd_in, FALLBACK_TO_ANON_FD, remote_fd_in, +                         op_errno, out); + +    CLIENT_GET_REMOTE_FD(this, fd_out, FALLBACK_TO_ANON_FD, remote_fd_out, +                         op_errno, out); +    req->size = size; +    req->off_in = off_in; +    req->off_out = off_out; +    req->fd_in = remote_fd_in; +    req->fd_out = remote_fd_out; +    req->flag = flags; + +    memcpy(req->gfid1, fd_in->inode->gfid, 16); +    memcpy(req->gfid2, fd_out->inode->gfid, 16); + +    dict_to_xdr(*xdata, &req->xdata); + +    return 0; +out: +    return -op_errno; +} + +int  client_pre_statfs_v2(xlator_t *this, gfx_statfs_req *req, loc_t *loc,                       dict_t *xdata)  { diff --git a/xlators/protocol/client/src/client-common.h b/xlators/protocol/client/src/client-common.h index 5214eae128e..a2043d8742a 100644 --- a/xlators/protocol/client/src/client-common.h +++ b/xlators/protocol/client/src/client-common.h @@ -621,4 +621,10 @@ client_post_rename_v2(xlator_t *this, gfx_rename_rsp *rsp, struct iatt *stbuf,                        struct iatt *prenewparent, struct iatt *postnewparent,                        dict_t **xdata); +int +client_pre_copy_file_range_v2(xlator_t *this, gfx_copy_file_range_req *req, +                              fd_t *fd_in, off64_t off_in, fd_t *fd_out, +                              off64_t off_out, size_t size, int32_t flags, +                              dict_t **xdata); +  #endif /* __CLIENT_COMMON_H__ */ diff --git a/xlators/protocol/client/src/client-helpers.c b/xlators/protocol/client/src/client-helpers.c index 849fdfca0bc..55e87b3c370 100644 --- a/xlators/protocol/client/src/client-helpers.c +++ b/xlators/protocol/client/src/client-helpers.c @@ -2459,6 +2459,20 @@ client_handle_fop_requirements_v2(                  lease, this, &this_req->compound_req_v2_u.compound_lease_req,                  op_errno, out, &args->loc, &args->lease, args->xdata);              break; +        case GF_FOP_COPY_FILE_RANGE: +            /* +             * Not going to handle the copy_file_range fop in compound +             * operation. This is because, compound operation is going +             * to be removed. In fact, AFR one of the heavy consumer of +             * compound operations has stopped using that. +             * https://github.com/gluster/glusterfs/issues/414 +             * Therefore, sending ENOTSUP error for this fop coming as +             * comound request. Though, there was no need of handling +             * "case GF_FOP_COPY_FILE_RANGE" technically, this comment +             * under the label of GF_FOP_COPY_FILE_RANGE will help in +             * understanding that this fop does not handle the compund +             * request and why. +             */          default:              return ENOTSUP;      } @@ -2631,6 +2645,14 @@ compound_request_cleanup_v2(gfx_compound_req *req)              case GF_FOP_SEEK:                  CLIENT4_COMPOUND_FOP_CLEANUP(curr_req, seek);                  break; +            case GF_FOP_COPY_FILE_RANGE: +                /* +                 * This fop is not handled in compund operations. +                 * Check the comment added under this fop's section +                 * in the compound_request_cleanup_v2. Therefore +                 * keeping this label only as a placeholder with +                 * a message that, this fop is not handled. +                 */              default:                  break;          } @@ -3004,6 +3026,12 @@ client_process_response_v2(call_frame_t *frame, xlator_t *this,                                    &this_args_cbk->lease, xdata);              break;          } +        case GF_FOP_COPY_FILE_RANGE: +            /* +             * Not handling this fop. Returning ENOTSUP. Check +             * the comment added for this fop in the function +             * client_handle_fop_requirements_v2. +             */          default:              return -ENOTSUP;      } diff --git a/xlators/protocol/client/src/client-rpc-fops_v2.c b/xlators/protocol/client/src/client-rpc-fops_v2.c index ca180c1db4b..8f3ee41e5c5 100644 --- a/xlators/protocol/client/src/client-rpc-fops_v2.c +++ b/xlators/protocol/client/src/client-rpc-fops_v2.c @@ -2833,6 +2833,72 @@ out:      return 0;  } +int +client4_0_copy_file_range_cbk(struct rpc_req *req, struct iovec *iov, int count, +                              void *myframe) +{ +    gfx_common_3iatt_rsp rsp = { +        0, +    }; +    call_frame_t *frame = NULL; +    struct iatt stbuf = { +        0, +    }; +    struct iatt prestat = { +        0, +    }; +    struct iatt poststat = { +        0, +    }; +    int ret = 0; +    xlator_t *this = NULL; +    dict_t *xdata = NULL; +    clnt_local_t *local = NULL; + +    this = THIS; + +    frame = myframe; +    local = frame->local; + +    if (-1 == req->rpc_status) { +        rsp.op_ret = -1; +        rsp.op_errno = ENOTCONN; +        goto out; +    } + +    ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gfx_common_3iatt_rsp); +    if (ret < 0) { +        gf_msg(this->name, GF_LOG_ERROR, EINVAL, PC_MSG_XDR_DECODING_FAILED, +               "XDR decoding failed"); +        rsp.op_ret = -1; +        rsp.op_errno = EINVAL; +        goto out; +    } + +    ret = client_post_common_3iatt(this, &rsp, &stbuf, &prestat, &poststat, +                                   &xdata); +    if (ret < 0) +        goto out; +out: +    if (rsp.op_ret == -1) { +        gf_msg(this->name, GF_LOG_WARNING, gf_error_to_errno(rsp.op_errno), +               PC_MSG_REMOTE_OP_FAILED, "remote operation failed"); +    } else if (rsp.op_ret >= 0) { +        if (local->attempt_reopen) +            client_attempt_reopen(local->fd, this); +        if (local->attempt_reopen_out) +            client_attempt_reopen(local->fd_out, this); +    } +    CLIENT_STACK_UNWIND(copy_file_range, frame, rsp.op_ret, +                        gf_error_to_errno(rsp.op_errno), &stbuf, &prestat, +                        &poststat, xdata); + +    if (xdata) +        dict_unref(xdata); + +    return 0; +} +  int32_t  client4_0_releasedir(call_frame_t *frame, xlator_t *this, void *data)  { @@ -5846,6 +5912,80 @@ unwind:  }  int32_t +client4_0_copy_file_range(call_frame_t *frame, xlator_t *this, void *data) +{ +    clnt_args_t *args = NULL; +    clnt_conf_t *conf = NULL; +    clnt_local_t *local = NULL; +    gfx_copy_file_range_req req = { +        { +            0, +        }, +    }; +    int op_errno = ESTALE; +    int ret = 0; + +    if (!frame || !this || !data) +        goto unwind; + +    args = data; +    conf = this->private; + +    ret = client_pre_copy_file_range_v2(this, &req, args->fd, args->off_in, +                                        args->fd_out, args->off_out, args->size, +                                        args->flags, &args->xdata); + +    if (ret) { +        op_errno = -ret; +        goto unwind; +    } + +    ret = client_fd_fop_prepare_local(frame, args->fd, req.fd_in); +    if (ret) { +        op_errno = -ret; +        goto unwind; +    } + +    /* +     * Since frame->local is allocated in above function call +     * itself, better to use it (with the assumption that it +     * has been allocated) directly instead of again calling +     * client_fd_fop_prepare_local or modifying it, as doing +     * so requires changes in other places as well. +     */ + +    local = frame->local; +    local->fd_out = fd_ref(args->fd_out); +    local->attempt_reopen_out = client_is_reopen_needed(args->fd_out, this, +                                                        req.fd_out); + +    ret = client_submit_request( +        this, &req, frame, conf->fops, GFS3_OP_COPY_FILE_RANGE, +        client4_0_copy_file_range_cbk, NULL, NULL, 0, NULL, 0, NULL, +        (xdrproc_t)xdr_gfx_copy_file_range_req); +    if (ret) { +        /* +         * If the lower layers fail to submit a request, they'll also +         * do the unwind for us (see rpc_clnt_submit), so don't unwind +         * here in such cases. +         */ +        gf_msg(this->name, GF_LOG_WARNING, 0, PC_MSG_FOP_SEND_FAILED, +               "failed to send the fop"); +    } + +    GF_FREE(req.xdata.pairs.pairs_val); + +    return 0; + +unwind: +    CLIENT_STACK_UNWIND(copy_file_range, frame, -1, op_errno, NULL, NULL, NULL, +                        NULL); +    GF_FREE(req.xdata.pairs.pairs_val); + +    return 0; +} + +int32_t  client4_0_fsetattr(call_frame_t *frame, xlator_t *this, void *data)  {      clnt_args_t *args = NULL; @@ -6257,6 +6397,7 @@ rpc_clnt_procedure_t clnt4_0_fop_actors[GF_FOP_MAXVALUE] = {      [GF_FOP_COMPOUND] = {"COMPOUND", client4_0_compound},      [GF_FOP_ICREATE] = {"ICREATE", client4_0_icreate},      [GF_FOP_NAMELINK] = {"NAMELINK", client4_0_namelink}, +    [GF_FOP_COPY_FILE_RANGE] = {"COPY-FILE-RANGE", client4_0_copy_file_range},  };  rpc_clnt_prog_t clnt4_0_fop_prog = { diff --git a/xlators/protocol/client/src/client.c b/xlators/protocol/client/src/client.c index 38723b43b45..c8e84f6e1b7 100644 --- a/xlators/protocol/client/src/client.c +++ b/xlators/protocol/client/src/client.c @@ -1129,6 +1129,41 @@ out:      return 0;  } +int32_t +client_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, +                       off_t off_in, fd_t *fd_out, off_t off_out, size_t len, +                       uint32_t flags, dict_t *xdata) +{ +    int ret = -1; +    clnt_conf_t *conf = NULL; +    rpc_clnt_procedure_t *proc = NULL; +    clnt_args_t args = { +        0, +    }; + +    conf = this->private; +    if (!conf || !conf->fops) +        goto out; + +    args.fd = fd_in; +    args.fd_out = fd_out; +    args.offset = off_in; +    args.off_out = off_out; +    args.size = len; +    args.flags = flags; +    args.xdata = xdata; + +    proc = &conf->fops->proctable[GF_FOP_COPY_FILE_RANGE]; +    if (proc->fn) +        ret = proc->fn(frame, this, &args); +out: +    if (ret) +        STACK_UNWIND_STRICT(copy_file_range, frame, -1, ENOTCONN, NULL, NULL, +                            NULL, NULL); + +    return 0; +} +  static gf_boolean_t  is_client_rpc_init_command(dict_t *dict, xlator_t *this, char **value)  { @@ -2898,6 +2933,7 @@ struct xlator_fops fops = {      .icreate = client_icreate,      .namelink = client_namelink,      .put = client_put, +    .copy_file_range = client_copy_file_range,  };  struct xlator_dumpops dumpops = { diff --git a/xlators/protocol/client/src/client.h b/xlators/protocol/client/src/client.h index 5fc75a84628..71f84f3ca89 100644 --- a/xlators/protocol/client/src/client.h +++ b/xlators/protocol/client/src/client.h @@ -269,6 +269,7 @@ typedef struct client_local {      loc_t loc;      loc_t loc2;      fd_t *fd; +    fd_t *fd_out; /* used in copy_file_range */      clnt_fd_ctx_t *fdctx;      uint32_t flags;      struct iobref *iobref; @@ -280,6 +281,11 @@ typedef struct client_local {      pthread_mutex_t mutex;      char *name;      gf_boolean_t attempt_reopen; +    /* +     * The below boolean variable is used +     * only for copy_file_range fop +     */ +    gf_boolean_t attempt_reopen_out;      /* required for compound fops */      compound_args_t *compound_args;      unsigned int length; /* length of a compound fop */ @@ -289,7 +295,13 @@ typedef struct client_local {  typedef struct client_args {      loc_t *loc; +    /* +     * This is the source fd for copy_file_range and +     * the default fd for any other fd based fop which +     * requires only one fd (i.e. opetates on one fd) +     */      fd_t *fd; +    fd_t *fd_out; /* this is the destination fd for copy_file_range */      const char *linkname;      struct iobref *iobref;      struct iovec *vector; @@ -301,7 +313,17 @@ typedef struct client_args {      struct gf_flock *flock;      const char *volume;      const char *basename; +      off_t offset; +    /* +     * According to the man page of copy_file_range, +     * the offsets for source and destination file +     * are of type loff_t. But the type loff_t is +     * linux specific and is actual a typedef of +     * off64_t. +     */ +    off64_t off_in;  /* used in copy_file_range for source fd */ +    off64_t off_out; /* used in copy_file_range for dst fd */      int32_t mask;      int32_t cmd;      size_t size; diff --git a/xlators/protocol/server/src/server-common.c b/xlators/protocol/server/src/server-common.c index 25b36155065..0639ac3feb3 100644 --- a/xlators/protocol/server/src/server-common.c +++ b/xlators/protocol/server/src/server-common.c @@ -541,6 +541,16 @@ server4_post_common_3iatt(server_state_t *state, gfx_common_3iatt_rsp *rsp,  }  void +server4_post_common_3iatt_noinode(gfx_common_3iatt_rsp *rsp, struct iatt *stbuf, +                                  struct iatt *prebuf_dst, +                                  struct iatt *postbuf_dst) +{ +    gfx_stat_from_iattx(&rsp->stat, stbuf); +    gfx_stat_from_iattx(&rsp->preparent, prebuf_dst); +    gfx_stat_from_iattx(&rsp->postparent, postbuf_dst); +} + +void  server4_post_common_2iatt(gfx_common_2iatt_rsp *rsp, struct iatt *prebuf,                            struct iatt *postbuf)  { diff --git a/xlators/protocol/server/src/server-common.h b/xlators/protocol/server/src/server-common.h index 2844ee95756..6200415e304 100644 --- a/xlators/protocol/server/src/server-common.h +++ b/xlators/protocol/server/src/server-common.h @@ -192,3 +192,8 @@ void  server4_post_link(server_state_t *state, gfx_common_3iatt_rsp *rsp,                    inode_t *inode, struct iatt *stbuf, struct iatt *pre,                    struct iatt *post); + +void +server4_post_common_3iatt_noinode(gfx_common_3iatt_rsp *rsp, struct iatt *stbuf, +                                  struct iatt *prebuf_dst, +                                  struct iatt *postbuf_dst); diff --git a/xlators/protocol/server/src/server-helpers.c b/xlators/protocol/server/src/server-helpers.c index c55a422679d..8ad2d8492ed 100644 --- a/xlators/protocol/server/src/server-helpers.c +++ b/xlators/protocol/server/src/server-helpers.c @@ -4948,6 +4948,8 @@ server_populate_compound_response_v2(xlator_t *this, gfx_compound_rsp *rsp,              rsp_args->op_errno = gf_errno_to_error(this_args_cbk->op_errno);              break;          } +        case GF_FOP_COPY_FILE_RANGE: +            /* Not handling this fop. */          default:              return ENOTSUP;      } @@ -5380,6 +5382,12 @@ server_get_compound_resolve_v2(server_state_t *state, gfx_compound_req *req)              memcpy(state->resolve.gfid, this_req.gfid, 16);              break;          } +        case GF_FOP_COPY_FILE_RANGE: +            /* +             * Compound operations is not being used anymore and +             * planned for subsequent removal. Hence not handling +             * this fop here. +             */          default:              return ENOTSUP;      } diff --git a/xlators/protocol/server/src/server-resolve.c b/xlators/protocol/server/src/server-resolve.c index 26260a5ee2c..ec768acba44 100644 --- a/xlators/protocol/server/src/server-resolve.c +++ b/xlators/protocol/server/src/server-resolve.c @@ -545,14 +545,39 @@ server_resolve_fd(call_frame_t *frame)          return 0;      } -    state->fd = gf_fd_fdptr_get(serv_ctx->fdtable, fd_no); - +    /* +     * With copy_file_range, there will be 2 fds to resolve. +     * This same function is called to resolve both the source +     * fd and the destination fd. As of now, this function does +     * not have any mechanism to distinguish between the 2 fds +     * being resolved except for checking the value of state->fd. +     * The assumption is that, if source fd the one which is +     * being resolved here, then state->fd would be NULL. If it +     * is not NULL, then it is the destination fd which is being +     * resolved. +     * This  method (provided the above assumption is true) is +     * to achieve the ability to distinguish between 2 fds with +     * minimum changes being done to this function. If this way +     * is not correct, then more changes might be needed. +     */      if (!state->fd) { -        gf_msg("", GF_LOG_INFO, EBADF, PS_MSG_FD_NOT_FOUND, -               "fd not " -               "found in context"); -        resolve->op_ret = -1; -        resolve->op_errno = EBADF; +        state->fd = gf_fd_fdptr_get(serv_ctx->fdtable, fd_no); +        if (!state->fd) { +            gf_msg("", GF_LOG_INFO, EBADF, PS_MSG_FD_NOT_FOUND, +                   "fd not " +                   "found in context"); +            resolve->op_ret = -1; +            resolve->op_errno = EBADF; +        } +    } else { +        state->fd_out = gf_fd_fdptr_get(serv_ctx->fdtable, fd_no); +        if (!state->fd_out) { +            gf_msg("", GF_LOG_INFO, EBADF, PS_MSG_FD_NOT_FOUND, +                   "fd not " +                   "found in context"); +            resolve->op_ret = -1; +            resolve->op_errno = EBADF; +        }      }      server_resolve_all(frame); diff --git a/xlators/protocol/server/src/server-rpc-fops_v2.c b/xlators/protocol/server/src/server-rpc-fops_v2.c index c5a8e482621..16570294f6d 100644 --- a/xlators/protocol/server/src/server-rpc-fops_v2.c +++ b/xlators/protocol/server/src/server-rpc-fops_v2.c @@ -2259,6 +2259,64 @@ out:      return 0;  } +int +server4_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this, +                            int32_t op_ret, int32_t op_errno, +                            struct iatt *stbuf, struct iatt *prebuf_dst, +                            struct iatt *postbuf_dst, dict_t *xdata) +{ +    gfx_common_3iatt_rsp rsp = { +        0, +    }; +    server_state_t *state = NULL; +    rpcsvc_request_t *req = NULL; +    char in_gfid[GF_UUID_BUF_SIZE] = {0}; +    char out_gfid[GF_UUID_BUF_SIZE] = {0}; + +    dict_to_xdr(xdata, &rsp.xdata); + +    if (op_ret < 0) { +        state = CALL_STATE(frame); + +        uuid_utoa_r(state->resolve.gfid, in_gfid); +        uuid_utoa_r(state->resolve2.gfid, out_gfid); + +        gf_msg(this->name, fop_log_level(GF_FOP_COPY_FILE_RANGE, op_errno), +               op_errno, PS_MSG_WRITE_INFO, +               "%" PRId64 ": COPY_FILE_RANGE %" PRId64 " (%s), %" PRId64 +               " (%s) client: %s, " +               "error-xlator: %s", +               frame->root->unique, state->resolve.fd_no, in_gfid, +               state->resolve2.fd_no, out_gfid, STACK_CLIENT_NAME(frame->root), +               STACK_ERR_XL_NAME(frame->root)); +        goto out; +    } + +    /* +     * server4_post_common_3iatt (ex: used by server4_put_cbk and some +     * other cbks) also performs inode linking along with copying of 3 +     * iatt structures to the response. But, for copy_file_range, linking +     * of inode is not needed. Therefore a new function is used to +     * construct the response using 3 iatt structures. +     * @stbuf: iatt or stat of the source file (or fd) +     * @prebuf_dst: iatt or stat of destination file (or fd) before the fop +     * @postbuf_dst: iatt or stat of destination file (or fd) after the fop +     */ +    server4_post_common_3iatt_noinode(&rsp, stbuf, prebuf_dst, postbuf_dst); + +out: +    rsp.op_ret = op_ret; +    rsp.op_errno = gf_errno_to_error(op_errno); + +    req = frame->local; +    server_submit_reply(frame, req, &rsp, NULL, 0, NULL, +                        (xdrproc_t)xdr_gfx_common_3iatt_rsp); + +    GF_FREE(rsp.xdata.pairs.pairs_val); + +    return 0; +} +  /* Resume function section */  int @@ -3448,6 +3506,29 @@ err:  }  int +server4_copy_file_range_resume(call_frame_t *frame, xlator_t *bound_xl) +{ +    server_state_t *state = NULL; + +    state = CALL_STATE(frame); + +    if (state->resolve.op_ret != 0) +        goto err; + +    STACK_WIND(frame, server4_copy_file_range_cbk, bound_xl, +               bound_xl->fops->copy_file_range, state->fd, state->off_in, +               state->fd_out, state->off_out, state->size, state->flags, +               state->xdata); + +    return 0; +err: +    server4_copy_file_range_cbk(frame, NULL, frame->this, state->resolve.op_ret, +                                state->resolve.op_errno, NULL, NULL, NULL, +                                NULL); +    return 0; +} + +int  server4_0_stat(rpcsvc_request_t *req)  {      server_state_t *state = NULL; @@ -6104,6 +6185,53 @@ out:      return ret;  } +int +server4_0_copy_file_range(rpcsvc_request_t *req) +{ +    server_state_t *state = NULL; +    call_frame_t *frame = NULL; +    gfx_copy_file_range_req args = { +        { +            0, +        }, +    }; +    ssize_t len = 0; +    int ret = -1; +    int op_errno = 0; + +    if (!req) +        return ret; + +    ret = rpc_receive_common(req, &frame, &state, &len, &args, +                             xdr_gfx_copy_file_range_req, +                             GF_FOP_COPY_FILE_RANGE); +    if (ret != 0) { +        goto out; +    } + +    state->resolve.type = RESOLVE_MUST; +    state->resolve.fd_no = args.fd_in; +    state->resolve2.type = RESOLVE_MUST; /*making this resolve must */ +    state->resolve2.fd_no = args.fd_out; +    state->off_in = args.off_in; +    state->off_out = args.off_out; +    state->size = args.size; +    state->flags = args.flag; +    memcpy(state->resolve.gfid, args.gfid1, 16); +    memcpy(state->resolve2.gfid, args.gfid2, 16); + +    xdr_to_dict(&args.xdata, &state->xdata); + +    ret = 0; +    resolve_and_resume(frame, server4_copy_file_range_resume); +out: + +    if (op_errno) +        SERVER_REQ_SET_ERROR(req, ret); + +    return ret; +} +  rpcsvc_actor_t glusterfs4_0_fop_actors[] = {      [GFS3_OP_NULL] = {"NULL", GFS3_OP_NULL, server_null, NULL, 0},      [GFS3_OP_STAT] = {"STAT", GFS3_OP_STAT, server4_0_stat, NULL, 0}, @@ -6195,6 +6323,8 @@ rpcsvc_actor_t glusterfs4_0_fop_actors[] = {                           DRC_NA},      [GFS3_OP_NAMELINK] = {"NAMELINK", GFS3_OP_NAMELINK, server4_0_namelink,                            NULL, 0, DRC_NA}, +    [GFS3_OP_COPY_FILE_RANGE] = {"COPY-FILE-RANGE", GFS3_OP_COPY_FILE_RANGE, +                                 server4_0_copy_file_range, NULL, 0, DRC_NA},  };  struct rpcsvc_program glusterfs4_0_fop_prog = { diff --git a/xlators/protocol/server/src/server.h b/xlators/protocol/server/src/server.h index 2a77aba1f3c..bdf98c96f1c 100644 --- a/xlators/protocol/server/src/server.h +++ b/xlators/protocol/server/src/server.h @@ -180,7 +180,12 @@ struct _server_state {      struct iatt stbuf;      int valid; +    /* +     * this fd is used in all the fd based operations PLUS +     * as a source fd in copy_file_range +     */      fd_t *fd; +    fd_t *fd_out; /* destination fd in copy_file_range */      dict_t *params;      int32_t flags;      int wbflags; @@ -191,6 +196,15 @@ struct _server_state {      size_t size;      off_t offset; +    /* +     * According to the man page of copy_file_range, +     * the offsets for source and destination file +     * are of type loff_t. But the type loff_t is +     * linux specific and is actual a typedef of +     * off64_t. +     */ +    off64_t off_in;  /* source offset in copy_file_range */ +    off64_t off_out; /* destination offset in copy_file_range */      mode_t mode;      dev_t dev;      size_t nr_count; diff --git a/xlators/storage/posix/src/posix-helpers.c b/xlators/storage/posix/src/posix-helpers.c index ed0516d4c4a..54fc1dc1195 100644 --- a/xlators/storage/posix/src/posix-helpers.c +++ b/xlators/storage/posix/src/posix-helpers.c @@ -3314,3 +3314,23 @@ unlock:  out:      return ret;  } + +int +posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno) +{ +    int ret = -1; + +    if (inode->ia_type == IA_IFBLK || inode->ia_type == IA_IFCHR) { +        *op_errno = EINVAL; +        gf_msg(this->name, GF_LOG_ERROR, *op_errno, P_MSG_INVALID_ARGUMENT, +               "%s received on %s file (%s)", fop, +               (inode->ia_type == IA_IFBLK) ? "block" : "char", +               uuid_utoa(inode->gfid)); +        goto out; +    } + +    ret = 0; + +out: +    return ret; +} diff --git a/xlators/storage/posix/src/posix-inode-fd-ops.c b/xlators/storage/posix/src/posix-inode-fd-ops.c index 9e2b37f582c..dafd1855ef9 100644 --- a/xlators/storage/posix/src/posix-inode-fd-ops.c +++ b/xlators/storage/posix/src/posix-inode-fd-ops.c @@ -1960,6 +1960,274 @@ out:  }  int32_t +posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, +                      off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len, +                      uint32_t flags, dict_t *xdata) +{ +    int32_t op_ret = -1; +    int32_t op_errno = 0; +    int _fd_in = -1; +    int _fd_out = -1; +    struct posix_private *priv = NULL; +    struct posix_fd *pfd_in = NULL; +    struct posix_fd *pfd_out = NULL; +    struct iatt preop_dst = { +        0, +    }; +    struct iatt postop_dst = { +        0, +    }; +    struct iatt stbuf = { +        0, +    }; +    int ret = -1; +    dict_t *rsp_xdata = NULL; +    int is_append = 0; +    gf_boolean_t locked = _gf_false; +    gf_boolean_t update_atomic = _gf_false; +    posix_inode_ctx_t *ctx = NULL; + +    VALIDATE_OR_GOTO(frame, out); +    VALIDATE_OR_GOTO(this, out); +    VALIDATE_OR_GOTO(fd_in, out); +    VALIDATE_OR_GOTO(fd_in->inode, out); +    VALIDATE_OR_GOTO(fd_out, out); +    VALIDATE_OR_GOTO(fd_out->inode, out); +    VALIDATE_OR_GOTO(this->private, out); + +    priv = this->private; + +    VALIDATE_OR_GOTO(priv, out); +    DISK_SPACE_CHECK_AND_GOTO(frame, priv, xdata, op_ret, op_errno, out); + +    if (posix_check_dev_file(this, fd_in->inode, "copy_file_range", &op_errno)) +        goto out; + +    if (posix_check_dev_file(this, fd_out->inode, "copy_file_range", &op_errno)) +        goto out; + +    ret = posix_fd_ctx_get(fd_in, this, &pfd_in, &op_errno); +    if (ret < 0) { +        gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, +               "pfd is NULL from fd=%p", fd_in); +        goto out; +    } + +    _fd_in = pfd_in->fd; + +    ret = posix_fd_ctx_get(fd_out, this, &pfd_out, &op_errno); +    if (ret < 0) { +        gf_msg(this->name, GF_LOG_WARNING, ret, P_MSG_PFD_NULL, +               "pfd is NULL from fd=%p", fd_out); +        goto out; +    } + +    _fd_out = pfd_out->fd; + +    /* +     * Currently, the internal write is checked via xdata which +     * is set by some xlator above. It could be due to several of +     * the reasons such as healing or a snapshot operation happening +     * using copy_file_range. As of now (i.e. writing the patch with +     * this change) none of the xlators above posix are using the +     * internal write with copy_file_range. In future it might +     * change. Atleast as of now the hope is that, when that happens +     * this functon or fop does not require additional changes for +     * handling internal writes. +     */ +    ret = posix_check_internal_writes(this, fd_out, _fd_out, xdata); +    if (ret < 0) { +        gf_msg(this->name, GF_LOG_ERROR, 0, 0, +               "possible overwrite from internal client, fd=%p", fd_out); +        op_ret = -1; +        op_errno = EBUSY; +        goto out; +    } + +    if (xdata) { +        if (dict_get(xdata, GLUSTERFS_WRITE_UPDATE_ATOMIC)) +            update_atomic = _gf_true; +    } + +    /* +     * The update_atomic option is to instruct posix to do prestat, +     * write and poststat atomically. This is to prevent any modification to +     * ia_size and ia_blocks until poststat and the diff in their values +     * between pre and poststat could be of use for some translators. +     * This is similar to the atomic write operation. atmoic write is +     * (i.e. prestat + write + poststat) used by shard as of now. In case, +     * some xlator needs copy_file_range to be atomic from prestat and postat +     * prespective (i.e. prestat + copy_file_range + poststat) then it has +     * to send "GLUSTERFS_WRITE_UPDATE_ATOMIC" key in xdata. +     */ + +    op_ret = posix_inode_ctx_get_all(fd_out->inode, this, &ctx); +    if (op_ret < 0) { +        op_errno = ENOMEM; +        goto out; +    } + +    if (update_atomic) { +        ret = pthread_mutex_lock(&ctx->write_atomic_lock); +        if (!ret) +            locked = _gf_true; +        else { +            gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_MUTEX_FAILED, +                   "failed to hold write atomic lock on %s", +                   uuid_utoa(fd_out->inode->gfid)); +            goto out; +        } +    } + +    op_ret = posix_fdstat(this, fd_out->inode, _fd_out, &preop_dst); +    if (op_ret == -1) { +        op_errno = errno; +        gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, +               "pre-operation fstat failed on fd=%p", fd_out); +        goto out; +    } + +    /* +     * Since, only the destination file (fd_out) is undergoing +     * modification, the write related tests are done on that. +     * i.e. this is treater similar to as if the destination file +     * undergoing write fop from maintenance perspective. +     */ +    if (xdata) { +        op_ret = posix_cs_maintenance(this, fd_out, NULL, &_fd_out, &preop_dst, +                                      NULL, xdata, &rsp_xdata, _gf_false); +        if (op_ret < 0) { +            gf_msg(this->name, GF_LOG_ERROR, 0, 0, +                   "file state check failed, fd %p", fd_out); +            op_errno = EIO; +            goto out; +        } +    } + +    /* +     * NOTE: This is just doing a single execution of copy_file_range +     *       system call. If the returned value of this system call is less +     *       than len, then should we keep doing it in a for loop until the +     *       copy_file_range of all the len bytes is done? +     *       Check the  example program provided in the man page of +     *       copy_file_range. +     *       If so, then a separate variables for both off_in and off_out +     *       should be used which are initialized to off_in and off_out +     *       that this function call receives, but then advanced by the +     *       value returned by sys_copy_file_range and then use that as +     *       off_in and off_out for next instance of copy_file_range execution. +     */ +    op_ret = sys_copy_file_range(_fd_in, &off_in, _fd_out, &off_out, len, +                                 flags); + +    if (op_ret < 0) { +        op_errno = -op_ret; +        op_ret = -1; +        gf_msg(this->name, GF_LOG_ERROR, op_errno, P_MSG_COPY_FILE_RANGE_FAILED, +               "copy_file_range failed: fd_in: %p (gfid: %s) ," +               " fd_out %p (gfid:%s)", +               fd_in, uuid_utoa(fd_in->inode->gfid), fd_out, +               uuid_utoa(fd_out->inode->gfid)); +        goto out; +    } + +    /* +     * Let this be as it is for now. This function collects +     * infomration such as open fd count etc. So, even though +     * is_append does not apply to copy_file_range, for now, +     * allowing it to be recorded in the dict as _gf_false. +     */ +    rsp_xdata = _fill_writev_xdata(fd_out, xdata, this, is_append); + +    /* copy_file_range successful, we also need to get the stat of +     * the file we wrote to (i.e. destination file or fd_out). +     */ +    ret = posix_fdstat(this, fd_out->inode, _fd_out, &postop_dst); +    if (ret == -1) { +        op_ret = -1; +        op_errno = errno; +        gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, +               "post-operation fstat failed on fd=%p", fd_out); +        goto out; +    } + +    /* +     * Also perform the stat on the source fd (i.e. fd_in). For now, +     * allowing it to be done within the locked region if the request +     * is for atomic operation (and update) of copy_file_range. +     */ +    ret = posix_fdstat(this, fd_in->inode, _fd_in, &stbuf); +    if (ret == -1) { +        op_ret = -1; +        op_errno = errno; +        gf_msg(this->name, GF_LOG_ERROR, errno, P_MSG_FSTAT_FAILED, +               "post-operation fstat failed on fd=%p", fd_in); +        goto out; +    } + +    /* +     * The core logic of what time attributes are to be updated +     * on a fop is decided at client side xlator utime. +     * All the remaining fops call posix_set_ctime function +     * to update the {a,m,c}time. But, for all the other fops, +     * the operation is happening on only one file (or inode). +     * But here, there are 2 fds (source and destination). Hence +     * the new function below to update the appropriate times for +     * both the source and the destination file. +     * For the source file, if at all anything has to be updated, +     * it would be atime (as that file is only read, not updated). +     * For the destination file, the attributes that require the +     * modification would be mtime and ctime. +     * What times have to be changed is actually determined by +     * utime xlator. But, all of them would be in frame->root->flags. +     * So, currently posix assumes that, the atime flag is for +     * the source file and the other 2 flags are for the destination +     * file. Since, the assumption is rigid (i.e. atime for source +     * and {m,c}time for destination), the below function is called +     * posix_set_ctime_cfr (cfr standing for copy_file_range). +     * FUTURE TODO: +     * In future, some other functionality or fop might operate +     * simultaneously on 2 files. Then, depending upon what that new +     * fop does or what are its requirements, the below function might +     * require changes  to become generic for consumption in case of +     * simultaneous operations on 2 files. +     */ +    posix_set_ctime_cfr(frame, this, NULL, pfd_in->fd, fd_in->inode, &stbuf, +                        NULL, pfd_out->fd, fd_out->inode, &postop_dst); + +    if (locked) { +        pthread_mutex_unlock(&ctx->write_atomic_lock); +        locked = _gf_false; +    } + +    /* +     * Record copy_file_range in priv->write_value for now. +     * If not needed, remove below section of code along with +     * this comment (or add comment to explain why it is not +     * needed). +     */ +    LOCK(&priv->lock); +    { +        priv->write_value += op_ret; +    } +    UNLOCK(&priv->lock); + +out: + +    if (locked) { +        pthread_mutex_unlock(&ctx->write_atomic_lock); +        locked = _gf_false; +    } + +    STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, &stbuf, +                        &preop_dst, &postop_dst, rsp_xdata); + +    if (rsp_xdata) +        dict_unref(rsp_xdata); +    return 0; +} + +int32_t  posix_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)  {      char *real_path = NULL; diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h index 62af32ac8fe..928723db8f9 100644 --- a/xlators/storage/posix/src/posix-messages.h +++ b/xlators/storage/posix/src/posix-messages.h @@ -67,6 +67,7 @@ GLFS_MSGID(POSIX, P_MSG_XATTR_FAILED, P_MSG_NULL_GFID, P_MSG_FCNTL_FAILED,             P_MSG_ANCESTORY_FAILED, P_MSG_DISK_SPACE_CHECK_FAILED,             P_MSG_FALLOCATE_FAILED, P_MSG_STOREMDATA_FAILED,             P_MSG_FETCHMDATA_FAILED, P_MSG_GETMDATA_FAILED, -           P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE); +           P_MSG_SETMDATA_FAILED, P_MSG_FRESHFILE, P_MSG_MUTEX_FAILED, +           P_MSG_COPY_FILE_RANGE_FAILED);  #endif /* !_GLUSTERD_MESSAGES_H_ */ diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c index 26fae2019b5..62669a0b83f 100644 --- a/xlators/storage/posix/src/posix-metadata.c +++ b/xlators/storage/posix/src/posix-metadata.c @@ -663,3 +663,81 @@ posix_set_parent_ctime(call_frame_t *frame, xlator_t *this,  out:      return;  } + +void +posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, +                    const char *real_path_in, int fd_in, inode_t *inode_in, +                    struct iatt *stbuf_in, const char *real_path_out, +                    int fd_out, inode_t *inode_out, struct iatt *stbuf_out) +{ +    posix_mdata_flag_t flag = { +        0, +    }; +    posix_mdata_flag_t flag_dup = { +        0, +    }; +    int ret = 0; +    struct posix_private *priv = NULL; + +    priv = this->private; + +    if (priv->ctime) { +        (void)posix_get_mdata_flag(frame->root->flags, &flag); +        if ((flag.ctime == 0) && (flag.mtime == 0) && (flag.atime == 0)) { +            goto out; +        } + +        if (frame->root->ctime.tv_sec == 0) { +            gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, +                   "posix set mdata failed, No ctime : in: %s gfid_in:%s " +                   "out: %s gfid_out:%s", +                   real_path_in, +                   inode_in ? uuid_utoa(inode_in->gfid) : "No inode", +                   real_path_out, +                   inode_out ? uuid_utoa(inode_out->gfid) : "No inode"); +            goto out; +        } + +        flag_dup = flag; + +        /* +         * For the destination file, no need to update atime. +         * It got modified. Hence the things that need to be +         * changed are mtime and ctime (provided the utime +         * xlator from the client has set those flags, which +         * are just copied to flag_dup). +         */ +        if (flag.atime) +            flag_dup.atime = 0; + +        ret = posix_set_mdata_xattr(this, real_path_out, fd_out, inode_out, +                                    &frame->root->ctime, stbuf_out, &flag_dup, +                                    _gf_false); +        if (ret) { +            gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, +                   "posix set mdata failed on file: %s gfid:%s", real_path_out, +                   inode_out ? uuid_utoa(inode_out->gfid) : "No inode"); +        } + +        /* +         * For the source file, no need to change the mtime and ctime. +         * For source file, it is only read operation. So, if at all +         * anything needs to be updated, it is only the atime. +         */ +        if (flag.atime) +            flag_dup.atime = flag.atime; +        flag_dup.mtime = 0; +        flag_dup.ctime = 0; + +        ret = posix_set_mdata_xattr(this, real_path_in, fd_out, inode_out, +                                    &frame->root->ctime, stbuf_out, &flag_dup, +                                    _gf_false); +        if (ret) { +            gf_msg(this->name, GF_LOG_WARNING, errno, P_MSG_SETMDATA_FAILED, +                   "posix set mdata failed on file: %s gfid:%s", real_path_in, +                   inode_in ? uuid_utoa(inode_in->gfid) : "No inode"); +        } +    } +out: +    return; +} diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h index e1b549d55a1..3416148ea97 100644 --- a/xlators/storage/posix/src/posix-metadata.h +++ b/xlators/storage/posix/src/posix-metadata.h @@ -48,5 +48,10 @@ void  posix_set_parent_ctime(call_frame_t *frame, xlator_t *this,                         const char *real_path, int fd, inode_t *inode,                         struct iatt *stbuf); +void +posix_set_ctime_cfr(call_frame_t *frame, xlator_t *this, +                    const char *real_path_in, int fd_in, inode_t *inode_in, +                    struct iatt *stbuf_in, const char *read_path_put, +                    int fd_out, inode_t *inode_out, struct iatt *stbuf_out);  #endif /* _POSIX_METADATA_H */ diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index d6a20135f96..42b965434b9 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -76,6 +76,7 @@ struct xlator_fops fops = {      .seek = posix_seek,      .lease = posix_lease,      .put = posix_put, +    .copy_file_range = posix_copy_file_range,  };  struct xlator_cbks cbks = { diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h index 1f1d4fc2774..a1ec996f4b2 100644 --- a/xlators/storage/posix/src/posix.h +++ b/xlators/storage/posix/src/posix.h @@ -638,6 +638,11 @@ posix_put(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,            off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata);  int32_t +posix_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in, +                      off64_t off_in, fd_t *fd_out, off64_t off_out, size_t len, +                      uint32_t flags, dict_t *xdata); + +int32_t  posix_set_mode_in_dict(dict_t *in_dict, dict_t *out_dict,                         struct iatt *in_stbuf); @@ -656,5 +661,7 @@ int  posix_cs_maintenance(xlator_t *this, fd_t *fd, loc_t *loc, int *pfd,                       struct iatt *buf, const char *realpath, dict_t *xattr_req,                       dict_t **xattr_rsp, gf_boolean_t ignore_failure); +int +posix_check_dev_file(xlator_t *this, inode_t *inode, char *fop, int *op_errno);  #endif /* _POSIX_H */  | 
