summaryrefslogtreecommitdiffstats
path: root/xlators/storage
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/storage')
-rw-r--r--xlators/storage/posix/src/Makefile.am5
-rw-r--r--xlators/storage/posix/src/posix-entry-ops.c18
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h1
-rw-r--r--xlators/storage/posix/src/posix-messages.h4
-rw-r--r--xlators/storage/posix/src/posix-metadata-disk.h31
-rw-r--r--xlators/storage/posix/src/posix-metadata.c510
-rw-r--r--xlators/storage/posix/src/posix-metadata.h49
-rw-r--r--xlators/storage/posix/src/posix.h17
8 files changed, 614 insertions, 21 deletions
diff --git a/xlators/storage/posix/src/Makefile.am b/xlators/storage/posix/src/Makefile.am
index 59d462336d1..d8af6221e4e 100644
--- a/xlators/storage/posix/src/Makefile.am
+++ b/xlators/storage/posix/src/Makefile.am
@@ -7,12 +7,13 @@ posix_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
posix_la_SOURCES = posix.c posix-helpers.c posix-handle.c posix-aio.c \
posix-gfid-path.c posix-entry-ops.c posix-inode-fd-ops.c \
- posix-common.c
+ posix-common.c posix-metadata.c
posix_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIBAIO) \
$(ACL_LIBS)
noinst_HEADERS = posix.h posix-mem-types.h posix-handle.h posix-aio.h \
- posix-messages.h posix-gfid-path.h posix-inode-handle.h
+ posix-messages.h posix-gfid-path.h posix-inode-handle.h \
+ posix-metadata.h posix-metadata-disk.h
AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
diff --git a/xlators/storage/posix/src/posix-entry-ops.c b/xlators/storage/posix/src/posix-entry-ops.c
index 0abe380ee43..438fbe509e4 100644
--- a/xlators/storage/posix/src/posix-entry-ops.c
+++ b/xlators/storage/posix/src/posix-entry-ops.c
@@ -88,24 +88,6 @@ extern char *marker_xattrs[];
#endif
-/* Setting microseconds or nanoseconds depending on what's supported:
- The passed in `tv` can be
- struct timespec
- if supported (better, because it supports nanosecond resolution) or
- struct timeval
- otherwise. */
-#if HAVE_UTIMENSAT
-#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \
- tv.tv_nsec = nanosecs
-#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \
- (sys_utimensat (AT_FDCWD, path, tv, AT_SYMLINK_NOFOLLOW))
-#else
-#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \
- tv.tv_usec = nanosecs / 1000
-#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \
- (lutimes (path, tv))
-#endif
-
gf_boolean_t
posix_symlinks_match (xlator_t *this, loc_t *loc, uuid_t gfid)
{
diff --git a/xlators/storage/posix/src/posix-mem-types.h b/xlators/storage/posix/src/posix-mem-types.h
index b463c086be5..0180900ee8e 100644
--- a/xlators/storage/posix/src/posix-mem-types.h
+++ b/xlators/storage/posix/src/posix-mem-types.h
@@ -22,6 +22,7 @@ enum gf_posix_mem_types_ {
gf_posix_mt_trash_path,
gf_posix_mt_paiocb,
gf_posix_mt_inode_ctx_t,
+ gf_posix_mt_mdata_attr,
gf_posix_mt_end
};
#endif
diff --git a/xlators/storage/posix/src/posix-messages.h b/xlators/storage/posix/src/posix-messages.h
index a05f6739958..6b5332b6d09 100644
--- a/xlators/storage/posix/src/posix-messages.h
+++ b/xlators/storage/posix/src/posix-messages.h
@@ -136,7 +136,9 @@ GLFS_MSGID(POSIX,
P_MSG_LEASE_DISABLED,
P_MSG_ANCESTORY_FAILED,
P_MSG_DISK_SPACE_CHECK_FAILED,
- P_MSG_FALLOCATE_FAILED
+ P_MSG_FALLOCATE_FAILED,
+ P_MSG_STOREMDATA_FAILED,
+ P_MSG_FETCHMDATA_FAILED
);
#endif /* !_GLUSTERD_MESSAGES_H_ */
diff --git a/xlators/storage/posix/src/posix-metadata-disk.h b/xlators/storage/posix/src/posix-metadata-disk.h
new file mode 100644
index 00000000000..b25ad04a633
--- /dev/null
+++ b/xlators/storage/posix/src/posix-metadata-disk.h
@@ -0,0 +1,31 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _POSIX_METADATA_DISK_H
+#define _POSIX_METADATA_DISK_H
+
+typedef struct gf_timespec_disk {
+ uint64_t tv_sec;
+ uint64_t tv_nsec;
+} gf_timespec_disk_t;
+
+/* posix_mdata_t on disk structure */
+
+typedef struct __attribute__ ((__packed__)) posix_mdata_disk {
+ /* version of structure, bumped up if any new member is added */
+ uint8_t version;
+ /* flags indicates valid fields in the structure */
+ uint64_t flags;
+ gf_timespec_disk_t ctime;
+ gf_timespec_disk_t mtime;
+ gf_timespec_disk_t atime;
+} posix_mdata_disk_t;
+
+#endif /* _POSIX_METADATA_DISK_H */
diff --git a/xlators/storage/posix/src/posix-metadata.c b/xlators/storage/posix/src/posix-metadata.c
new file mode 100644
index 00000000000..4e75a4f1411
--- /dev/null
+++ b/xlators/storage/posix/src/posix-metadata.c
@@ -0,0 +1,510 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "xlator.h"
+#include "posix-metadata.h"
+#include "posix-metadata-disk.h"
+#include "posix-handle.h"
+#include "posix-messages.h"
+#include "syscall.h"
+#include "compat-errno.h"
+#include "compat.h"
+
+static int gf_posix_xattr_enotsup_log;
+
+/* posix_mdata_to_disk converts posix_mdata_t into network byte order to
+ * save it on disk in machine independant format
+ */
+static inline void
+posix_mdata_to_disk (posix_mdata_disk_t *out, posix_mdata_t *in)
+{
+ out->version = in->version;
+ out->flags = htobe64(in->flags);
+
+ out->ctime.tv_sec = htobe64(in->ctime.tv_sec);
+ out->ctime.tv_nsec = htobe64(in->ctime.tv_nsec);
+
+ out->mtime.tv_sec = htobe64(in->mtime.tv_sec);
+ out->mtime.tv_nsec = htobe64(in->mtime.tv_nsec);
+
+ out->atime.tv_sec = htobe64(in->atime.tv_sec);
+ out->atime.tv_nsec = htobe64(in->atime.tv_nsec);
+}
+
+/* posix_mdata_from_disk converts posix_mdata_disk_t into host byte order
+ */
+static inline void
+posix_mdata_from_disk (posix_mdata_t *out, posix_mdata_disk_t *in)
+{
+ out->version = in->version;
+ out->flags = be64toh(in->flags);
+
+ out->ctime.tv_sec = be64toh(in->ctime.tv_sec);
+ out->ctime.tv_nsec = be64toh(in->ctime.tv_nsec);
+
+ out->mtime.tv_sec = be64toh(in->mtime.tv_sec);
+ out->mtime.tv_nsec = be64toh(in->mtime.tv_nsec);
+
+ out->atime.tv_sec = be64toh(in->atime.tv_sec);
+ out->atime.tv_nsec = be64toh(in->atime.tv_nsec);
+}
+
+/* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */
+static int
+posix_fetch_mdata_xattr (xlator_t *this, const char *real_path_arg, int _fd,
+ inode_t *inode, posix_mdata_t *metadata)
+{
+ size_t size = -1;
+ int op_errno = 0;
+ int op_ret = -1;
+ char *value = NULL;
+ gf_boolean_t fd_based_fop = _gf_false;
+ char gfid_str[64] = {0};
+ char *real_path = NULL;
+
+ char *key = GF_XATTR_MDATA_KEY;
+
+ if (!metadata) {
+ op_ret = -1;
+ goto out;
+ }
+
+ if (_fd != -1) {
+ fd_based_fop = _gf_true;
+ }
+ if (!(fd_based_fop || real_path_arg)) {
+ MAKE_HANDLE_PATH (real_path, this, inode->gfid, NULL);
+ if (!real_path) {
+ uuid_utoa_r (inode->gfid, gfid_str);
+ gf_msg (this->name, GF_LOG_WARNING, op_errno,
+ P_MSG_LSTAT_FAILED, "lstat on gfid %s failed",
+ gfid_str);
+ op_ret = -1;
+ goto out;
+ }
+ }
+
+ if (fd_based_fop) {
+ size = sys_fgetxattr (_fd, key, NULL, 0);
+ } else if (real_path_arg) {
+ size = sys_lgetxattr (real_path_arg, key, NULL, 0);
+ } else if (real_path) {
+ size = sys_lgetxattr (real_path, key, NULL, 0);
+ }
+
+ if (size == -1) {
+ op_errno = errno;
+ if ((op_errno == ENOTSUP) || (op_errno == ENOSYS)) {
+ GF_LOG_OCCASIONALLY (gf_posix_xattr_enotsup_log,
+ this->name, GF_LOG_WARNING,
+ "Extended attributes not "
+ "supported (try remounting"
+ " brick with 'user_xattr' "
+ "flag)");
+ } else if (op_errno == ENOATTR ||
+ op_errno == ENODATA) {
+ gf_msg_debug (this->name, 0,
+ "No such attribute:%s for file %s "
+ "gfid: %s",
+ key, real_path ? real_path : (real_path_arg ? real_path_arg : "null"),
+ uuid_utoa(inode->gfid));
+ } else {
+ gf_msg (this->name, GF_LOG_DEBUG, op_errno,
+ P_MSG_XATTR_FAILED, "getxattr failed"
+ " on %s gfid: %s key: %s ",
+ real_path ? real_path : (real_path_arg ? real_path_arg : "null"),
+ uuid_utoa(inode->gfid), key);
+ }
+ op_ret = -1;
+ goto out;
+ }
+
+ value = GF_CALLOC (size + 1, sizeof(char), gf_posix_mt_char);
+ if (!value) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (fd_based_fop) {
+ size = sys_fgetxattr (_fd, key, value, size);
+ } else if (real_path_arg) {
+ size = sys_lgetxattr (real_path_arg, key, value, size);
+ } else if (real_path) {
+ size = sys_lgetxattr (real_path, key, value, size);
+ }
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_XATTR_FAILED, "getxattr failed on "
+ " on %s gfid: %s key: %s ",
+ real_path ? real_path : (real_path_arg ? real_path_arg : "null"),
+ uuid_utoa(inode->gfid), key);
+ goto out;
+ }
+
+ posix_mdata_from_disk (metadata, (posix_mdata_disk_t*)value);
+
+ op_ret = 0;
+out:
+ GF_FREE (value);
+ return op_ret;
+}
+
+/* posix_store_mdata_xattr stores the posix_mdata_t on disk */
+static int
+posix_store_mdata_xattr (xlator_t *this, const char *real_path_arg, int fd,
+ inode_t *inode, posix_mdata_t *metadata)
+{
+ char *real_path = NULL;
+ int op_ret = 0;
+ gf_boolean_t fd_based_fop = _gf_false;
+ char *key = GF_XATTR_MDATA_KEY;
+ char gfid_str[64] = {0};
+ posix_mdata_disk_t disk_metadata;
+
+ if (!metadata) {
+ op_ret = -1;
+ goto out;
+ }
+
+ if (fd != -1) {
+ fd_based_fop = _gf_true;
+ }
+ if (!(fd_based_fop || real_path_arg)) {
+ MAKE_HANDLE_PATH (real_path, this, inode->gfid, NULL);
+ if (!real_path) {
+ uuid_utoa_r (inode->gfid, gfid_str);
+ gf_msg (this->name, GF_LOG_DEBUG, errno,
+ P_MSG_LSTAT_FAILED, "lstat on gfid %s failed",
+ gfid_str);
+ op_ret = -1;
+ goto out;
+ }
+ }
+
+ /* Set default version as 1 */
+ posix_mdata_to_disk (&disk_metadata, metadata);
+
+ if (fd_based_fop) {
+ op_ret = sys_fsetxattr (fd, key,
+ (void *) &disk_metadata,
+ sizeof (posix_mdata_disk_t), 0);
+ } else if (real_path_arg) {
+ op_ret = sys_lsetxattr (real_path_arg, key,
+ (void *) &disk_metadata,
+ sizeof (posix_mdata_disk_t), 0);
+ } else if (real_path) {
+ op_ret = sys_lsetxattr (real_path, key,
+ (void *) &disk_metadata,
+ sizeof (posix_mdata_disk_t), 0);
+ }
+
+#ifdef GF_DARWIN_HOST_OS
+ if (real_path_arg) {
+ posix_dump_buffer(this, real_path_arg, key, value, 0);
+ } else if (real_path) {
+ posix_dump_buffer(this, real_path, key, value, 0);
+ }
+#endif
+out:
+ if (op_ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, errno, P_MSG_XATTR_FAILED,
+ "file: %s: gfid: %s key:%s ",
+ real_path ? real_path : (real_path_arg ? real_path_arg : "null"),
+ uuid_utoa(inode->gfid), key);
+ }
+ return op_ret;
+}
+
+/* _posix_get_mdata_xattr gets posix_mdata_t from inode context. If it fails
+ * to get it from inode context, gets it from disk. This is with out inode lock.
+ */
+int
+__posix_get_mdata_xattr (xlator_t *this, const char *real_path, int _fd,
+ inode_t *inode, struct iatt *stbuf)
+{
+ posix_mdata_t *mdata = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ ret = __inode_ctx_get1 (inode, this,
+ (uint64_t *)&mdata);
+ if (ret == -1 || !mdata) {
+ mdata = GF_CALLOC (1, sizeof (posix_mdata_t),
+ gf_posix_mt_mdata_attr);
+ if (!mdata) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = posix_fetch_mdata_xattr (this, real_path, _fd, inode,
+ mdata);
+
+ if (ret == 0) {
+ /* Got mdata from disk, set it in inode ctx. This case
+ * is hit when in-memory status is lost due to brick
+ * down scenario
+ */
+ __inode_ctx_set1 (inode, this, (uint64_t *)&mdata);
+ } else {
+ /* Failed to get mdata from disk, xattr missing
+ * Even new file creation hits here first as posix_pstat
+ * is generally done before posix_set_ctime
+ */
+ if (stbuf) {
+ mdata->version = 1;
+ mdata->flags = 0;
+ mdata->ctime.tv_sec = stbuf->ia_ctime;
+ mdata->ctime.tv_nsec = stbuf->ia_ctime_nsec;
+ mdata->mtime.tv_sec = stbuf->ia_mtime;
+ mdata->mtime.tv_nsec = stbuf->ia_mtime_nsec;
+ mdata->atime.tv_sec = stbuf->ia_atime;
+ mdata->atime.tv_nsec = stbuf->ia_atime_nsec;
+ ret = posix_store_mdata_xattr (this, real_path,
+ _fd, inode,
+ mdata);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_STOREMDATA_FAILED,
+ "file: %s: gfid: %s key:%s ",
+ real_path ? real_path : "null",
+ uuid_utoa(inode->gfid),
+ GF_XATTR_MDATA_KEY);
+ goto out;
+ }
+ __inode_ctx_set1 (inode, this, (uint64_t *)&mdata);
+ } else {
+ /* This case should not be hit. If it hits, don't
+ * fail, log warning, free mdata and move on
+ */
+ gf_msg (this->name, GF_LOG_WARNING, errno,
+ P_MSG_FETCHMDATA_FAILED,
+ "file: %s: gfid: %s key:%s ",
+ real_path ? real_path : "null",
+ uuid_utoa(inode->gfid),
+ GF_XATTR_MDATA_KEY);
+ GF_FREE (mdata);
+ ret = 0;
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+
+ if (ret == 0 && stbuf) {
+ stbuf->ia_ctime = mdata->ctime.tv_sec;
+ stbuf->ia_ctime_nsec = mdata->ctime.tv_nsec;
+ stbuf->ia_mtime = mdata->mtime.tv_sec;
+ stbuf->ia_mtime_nsec = mdata->mtime.tv_nsec;
+ stbuf->ia_atime = mdata->atime.tv_sec;
+ stbuf->ia_atime_nsec = mdata->atime.tv_nsec;
+ }
+
+out:
+ return ret;
+}
+
+/* posix_get_mdata_xattr gets posix_mdata_t from inode context. If it fails
+ * to get it from inode context, gets it from disk. This is with inode lock.
+ */
+int
+posix_get_mdata_xattr (xlator_t *this, const char *real_path, int _fd,
+ inode_t *inode, struct iatt *stbuf)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __posix_get_mdata_xattr (this, real_path, _fd, inode, stbuf);
+ }
+ UNLOCK (&inode->lock);
+
+out:
+ return ret;
+}
+
+static int
+posix_compare_timespec (struct timespec *first, struct timespec *second)
+{
+ if (first->tv_sec == second->tv_sec)
+ return first->tv_nsec - second->tv_nsec;
+ else
+ return first->tv_sec - second->tv_sec;
+}
+
+/* posix_update_mdata_xattr updates the posix_mdata_t based on the flag
+ * in inode context and stores it on disk
+ */
+int
+posix_set_mdata_xattr (xlator_t *this, const char *real_path, int fd,
+ inode_t *inode, struct timespec *time,
+ struct iatt *stbuf, posix_mdata_flag_t *flag)
+{
+ posix_mdata_t *mdata = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("posix", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO (this->name, inode->gfid, out);
+
+ LOCK (&inode->lock);
+ {
+ ret = __inode_ctx_get1 (inode, this,
+ (uint64_t *)&mdata);
+ if (ret == -1 || !mdata) {
+ /*
+ * Do we need to fetch the data from xattr
+ * If we does we can compare the value and store
+ * the largest data in inode ctx.
+ */
+ mdata = GF_CALLOC (1, sizeof (posix_mdata_t),
+ gf_posix_mt_mdata_attr);
+ if (!mdata) {
+ ret = -1;
+ goto unlock;
+ }
+
+ ret = posix_fetch_mdata_xattr (this, real_path, fd,
+ inode,
+ (void *)mdata);
+ if (ret == 0) {
+ /* Got mdata from disk, set it in inode ctx. This case
+ * is hit when in-memory status is lost due to brick
+ * down scenario
+ */
+ __inode_ctx_set1 (inode, this,
+ (uint64_t *)&mdata);
+ } else if (ret && stbuf) {
+ /*
+ * This is the first time creating the time
+ * attr. This happens when you activate this
+ * feature, and the legacy file will not have
+ * any xattr set.
+ *
+ * New files will create extended attributes.
+ */
+
+ /*
+ * TODO: This is wrong approach, because before
+ * creating fresh xattr, we should consult
+ * to all replica and/or distribution set.
+ *
+ * We should contact the time management
+ * xlators, and ask them to create an xattr.
+ */
+ mdata->version = 1;
+ mdata->flags = 0;
+ mdata->ctime.tv_sec = stbuf->ia_ctime;
+ mdata->ctime.tv_nsec = stbuf->ia_ctime_nsec;
+ mdata->atime.tv_sec = stbuf->ia_atime;
+ mdata->atime.tv_nsec = stbuf->ia_atime_nsec;
+ mdata->mtime.tv_sec = stbuf->ia_mtime;
+ mdata->mtime.tv_nsec = stbuf->ia_mtime_nsec;
+
+ __inode_ctx_set1 (inode, this,
+ (uint64_t *)&mdata);
+ }
+ }
+ if (flag->ctime &&
+ posix_compare_timespec (time, &mdata->ctime) > 0) {
+ mdata->ctime = *time;
+ }
+ if (flag->mtime &&
+ posix_compare_timespec (time, &mdata->mtime) > 0) {
+ mdata->mtime = *time;
+ }
+ if (flag->atime &&
+ posix_compare_timespec (time, &mdata->atime) > 0) {
+ mdata->atime = *time;
+ }
+
+ if (inode->ia_type == IA_INVAL) {
+ /*
+ * TODO: This is non-linked inode. So we have to sync the
+ * data into backend. Because inode_link may return
+ * a different inode.
+ */
+ /* ret = posix_store_mdata_xattr (this, loc, fd,
+ * mdata); */
+ }
+ /*
+ * With this patch set, we are setting the xattr for each update
+ * We should evaluate the performance, and based on that we can
+ * decide on asynchronous updation.
+ */
+ ret = posix_store_mdata_xattr (this, real_path, fd, inode,
+ mdata);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, errno,
+ P_MSG_STOREMDATA_FAILED,
+ "file: %s: gfid: %s key:%s ",
+ real_path ? real_path : "null",
+ uuid_utoa(inode->gfid), GF_XATTR_MDATA_KEY);
+ goto out;
+ }
+ }
+unlock:
+ UNLOCK (&inode->lock);
+out:
+ if (ret == 0 && stbuf) {
+ stbuf->ia_ctime = mdata->ctime.tv_sec;
+ stbuf->ia_ctime_nsec = mdata->ctime.tv_nsec;
+ stbuf->ia_mtime = mdata->mtime.tv_sec;
+ stbuf->ia_mtime_nsec = mdata->mtime.tv_nsec;
+ stbuf->ia_atime = mdata->atime.tv_sec;
+ stbuf->ia_atime_nsec = mdata->atime.tv_nsec;
+ }
+
+ return ret;
+}
+
+/* posix_update_utime_in_mdata updates the posix_mdata_t when mtime/atime
+ * is modified using syscall
+ */
+int
+posix_update_utime_in_mdata (xlator_t *this, const char *real_path, int fd,
+ inode_t *inode,
+ struct iatt *stbuf, int valid)
+{
+ int32_t ret = -1;
+#if defined(HAVE_UTIMENSAT)
+ struct timespec tv = {0, };
+#else
+ struct timeval tv = {0, };
+#endif
+ posix_mdata_flag_t flag = {0, };
+
+ if ((valid & GF_SET_ATTR_ATIME) == GF_SET_ATTR_ATIME) {
+ tv.tv_sec = stbuf->ia_atime;
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_atime_nsec);
+
+ flag.ctime = 0;
+ flag.mtime = 0;
+ flag.atime = 1;
+ }
+
+ if ((valid & GF_SET_ATTR_MTIME) == GF_SET_ATTR_MTIME) {
+ tv.tv_sec = stbuf->ia_mtime;
+ SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, stbuf->ia_mtime_nsec);
+ flag.ctime = 1;
+ flag.mtime = 1;
+ flag.atime = 0;
+ }
+
+ ret = posix_set_mdata_xattr (this, real_path, -1, inode, &tv, NULL,
+ &flag);
+ return ret;
+}
diff --git a/xlators/storage/posix/src/posix-metadata.h b/xlators/storage/posix/src/posix-metadata.h
new file mode 100644
index 00000000000..b654c83230c
--- /dev/null
+++ b/xlators/storage/posix/src/posix-metadata.h
@@ -0,0 +1,49 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _POSIX_METADATA_H
+#define _POSIX_METADATA_H
+
+#include "posix-metadata-disk.h"
+
+/* In memory representation posix metadata xattr */
+typedef struct {
+ /* version of structure, bumped up if any new member is added */
+ uint8_t version;
+ /* flags indicates valid fields in the structure */
+ uint64_t flags;
+ struct timespec ctime;
+ struct timespec mtime;
+ struct timespec atime;
+} posix_mdata_t;
+
+typedef struct {
+ unsigned short ctime : 1;
+ unsigned short mtime : 1;
+ unsigned short atime : 1;
+} posix_mdata_flag_t;
+
+/* With inode lock*/
+int
+posix_get_mdata_xattr (xlator_t *this, const char *real_path, int _fd,
+ inode_t *inode, struct iatt *stbuf);
+/* With out inode lock*/
+int
+__posix_get_mdata_xattr (xlator_t *this, const char *real_path, int _fd,
+ inode_t *inode, struct iatt *stbuf);
+int
+posix_set_mdata_xattr (xlator_t *this, const char *real_path, int fd,
+ inode_t *inode, struct timespec *time,
+ struct iatt *stbuf, posix_mdata_flag_t *flag);
+int
+posix_update_utime_in_mdata (xlator_t *this, const char *real_path, int fd,
+ inode_t *inode, struct iatt *stbuf, int valid);
+
+#endif /* _POSIX_METADATA_H */
diff --git a/xlators/storage/posix/src/posix.h b/xlators/storage/posix/src/posix.h
index 5f28be9e414..60a7132ddcb 100644
--- a/xlators/storage/posix/src/posix.h
+++ b/xlators/storage/posix/src/posix.h
@@ -77,6 +77,23 @@
} \
} while (0)
+/* Setting microseconds or nanoseconds depending on what's supported:
+ The passed in `tv` can be
+ struct timespec
+ if supported (better, because it supports nanosecond resolution) or
+ struct timeval
+ otherwise. */
+#if HAVE_UTIMENSAT
+#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \
+ tv.tv_nsec = nanosecs
+#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \
+ (sys_utimensat (AT_FDCWD, path, tv, AT_SYMLINK_NOFOLLOW))
+#else
+#define SET_TIMESPEC_NSEC_OR_TIMEVAL_USEC(tv, nanosecs) \
+ tv.tv_usec = nanosecs / 1000
+#define PATH_SET_TIMESPEC_OR_TIMEVAL(path, tv) \
+ (lutimes (path, tv))
+#endif
#define GFID_NULL_CHECK_AND_GOTO(frame, this, loc, xattr_req, op_ret, \
op_errno, out) \