summaryrefslogtreecommitdiffstats
path: root/xlators/performance/md-cache/src
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/performance/md-cache/src')
-rw-r--r--xlators/performance/md-cache/src/Makefile.am29
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h23
-rw-r--r--xlators/performance/md-cache/src/md-cache-messages.h29
-rw-r--r--xlators/performance/md-cache/src/md-cache.c4020
4 files changed, 4101 insertions, 0 deletions
diff --git a/xlators/performance/md-cache/src/Makefile.am b/xlators/performance/md-cache/src/Makefile.am
new file mode 100644
index 00000000000..447ff0f30f0
--- /dev/null
+++ b/xlators/performance/md-cache/src/Makefile.am
@@ -0,0 +1,29 @@
+xlator_LTLIBRARIES = md-cache.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+
+md_cache_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+md_cache_la_SOURCES = md-cache.c
+md_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = md-cache-mem-types.h md-cache-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(CONTRIBDIR)/rbtree
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+
+stat-prefetch-compat:
+ mkdir -p $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance
+ rm -rf $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance/stat-prefetch.so
+ ln -s ./md-cache.so $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance/stat-prefetch.so
+
+
+install-exec-local: stat-prefetch-compat
+
+uninstall-local:
+ rm -f $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance/stat-prefetch.so
diff --git a/xlators/performance/md-cache/src/md-cache-mem-types.h b/xlators/performance/md-cache/src/md-cache-mem-types.h
new file mode 100644
index 00000000000..47a07005717
--- /dev/null
+++ b/xlators/performance/md-cache/src/md-cache-mem-types.h
@@ -0,0 +1,23 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __MDC_MEM_TYPES_H__
+#define __MDC_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_mdc_mem_types_ {
+ gf_mdc_mt_mdc_local_t = gf_common_mt_end + 1,
+ gf_mdc_mt_md_cache_t,
+ gf_mdc_mt_mdc_conf_t,
+ gf_mdc_mt_mdc_ipc,
+ gf_mdc_mt_end
+};
+#endif
diff --git a/xlators/performance/md-cache/src/md-cache-messages.h b/xlators/performance/md-cache/src/md-cache-messages.h
new file mode 100644
index 00000000000..f367bad1991
--- /dev/null
+++ b/xlators/performance/md-cache/src/md-cache-messages.h
@@ -0,0 +1,29 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _MD_CACHE_MESSAGES_H_
+#define _MD_CACHE_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(MD_CACHE, MD_CACHE_MSG_NO_MEMORY, MD_CACHE_MSG_DISCARD_UPDATE,
+ MD_CACHE_MSG_CACHE_UPDATE, MD_CACHE_MSG_IPC_UPCALL_FAILED,
+ MD_CACHE_MSG_NO_XATTR_CACHE);
+
+#endif /* _MD_CACHE_MESSAGES_H_ */
diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
new file mode 100644
index 00000000000..a405be51f02
--- /dev/null
+++ b/xlators/performance/md-cache/src/md-cache.c
@@ -0,0 +1,4020 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/syncop.h>
+#include "md-cache-mem-types.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/glusterfs-acl.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/upcall-utils.h>
+#include <assert.h>
+#include <sys/time.h>
+#include "md-cache-messages.h"
+#include <glusterfs/statedump.h>
+#include <glusterfs/atomic.h>
+
+/* TODO:
+ - cache symlink() link names and nuke symlink-cache
+ - send proper postbuf in setattr_cbk even when op_ret = -1
+*/
+
+struct mdc_statfs_cache {
+ pthread_mutex_t lock;
+ time_t last_refreshed; /* (time_t)-1 if not yet initialized. */
+ struct statvfs buf;
+};
+
+struct mdc_statistics {
+ gf_atomic_t stat_hit; /* No. of times lookup/stat was served from
+ mdc */
+
+ gf_atomic_t stat_miss; /* No. of times valid stat wasn't present in
+ mdc */
+
+ gf_atomic_t xattr_hit; /* No. of times getxattr was served from mdc,
+ Note: this doesn't count the xattr served
+ from lookup */
+
+ gf_atomic_t xattr_miss; /* No. of times xattr req was WIND from mdc */
+ gf_atomic_t negative_lookup; /* No. of negative lookups */
+ gf_atomic_t nameless_lookup; /* No. of negative lookups that were sent
+ to bricks */
+
+ gf_atomic_t stat_invals; /* No. of invalidates received from upcall */
+ gf_atomic_t xattr_invals; /* No. of invalidates received from upcall */
+ gf_atomic_t need_lookup; /* No. of lookups issued, because other
+ xlators requested for explicit lookup */
+};
+
+struct mdc_conf {
+ uint32_t timeout;
+ gf_boolean_t cache_posix_acl;
+ gf_boolean_t cache_glusterfs_acl;
+ gf_boolean_t cache_selinux;
+ gf_boolean_t cache_capability;
+ gf_boolean_t cache_ima;
+ gf_boolean_t force_readdirp;
+ gf_boolean_t cache_swift_metadata;
+ gf_boolean_t cache_samba_metadata;
+ gf_boolean_t mdc_invalidation;
+ gf_boolean_t global_invalidation;
+
+ time_t last_child_down;
+ gf_lock_t lock;
+ struct mdc_statistics mdc_counter;
+ gf_boolean_t cache_statfs;
+ struct mdc_statfs_cache statfs_cache;
+ char *mdc_xattr_str;
+ gf_atomic_int32_t generation;
+};
+
+struct mdc_local;
+typedef struct mdc_local mdc_local_t;
+
+#define MDC_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ mdc_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ mdc_local_wipe(__xl, __local); \
+ } while (0)
+
+struct md_cache {
+ ia_prot_t md_prot;
+ uint32_t md_nlink;
+ uint32_t md_uid;
+ uint32_t md_gid;
+ uint32_t md_atime_nsec;
+ uint32_t md_mtime_nsec;
+ uint32_t md_ctime_nsec;
+ int64_t md_atime;
+ int64_t md_mtime;
+ int64_t md_ctime;
+ uint64_t md_rdev;
+ uint64_t md_size;
+ uint64_t md_blocks;
+ uint64_t generation;
+ dict_t *xattr;
+ char *linkname;
+ time_t ia_time;
+ time_t xa_time;
+ gf_boolean_t need_lookup;
+ gf_boolean_t valid;
+ gf_boolean_t gen_rollover;
+ gf_boolean_t invalidation_rollover;
+ gf_lock_t lock;
+};
+
+struct mdc_local {
+ loc_t loc;
+ loc_t loc2;
+ fd_t *fd;
+ char *linkname;
+ char *key;
+ dict_t *xattr;
+ uint64_t incident_time;
+ bool update_cache;
+};
+
+int
+__mdc_inode_ctx_get(xlator_t *this, inode_t *inode, struct md_cache **mdc_p)
+{
+ int ret = 0;
+ struct md_cache *mdc = NULL;
+ uint64_t mdc_int = 0;
+
+ ret = __inode_ctx_get(inode, this, &mdc_int);
+ mdc = (void *)(long)(mdc_int);
+ if (ret == 0 && mdc_p)
+ *mdc_p = mdc;
+
+ return ret;
+}
+
+int
+mdc_inode_ctx_get(xlator_t *this, inode_t *inode, struct md_cache **mdc_p)
+{
+ int ret = -1;
+
+ if (!inode)
+ goto out;
+
+ LOCK(&inode->lock);
+ {
+ ret = __mdc_inode_ctx_get(this, inode, mdc_p);
+ }
+ UNLOCK(&inode->lock);
+
+out:
+ return ret;
+}
+
+uint64_t
+__mdc_inc_generation(xlator_t *this, struct md_cache *mdc)
+{
+ uint64_t gen = 0, rollover;
+ struct mdc_conf *conf = NULL;
+
+ conf = this->private;
+
+ gen = GF_ATOMIC_INC(conf->generation);
+ if (gen == 0) {
+ mdc->gen_rollover = !mdc->gen_rollover;
+ gen = GF_ATOMIC_INC(conf->generation);
+ mdc->ia_time = 0;
+ mdc->generation = 0;
+ }
+
+ rollover = mdc->gen_rollover;
+ gen |= (rollover << 32);
+ return gen;
+}
+
+uint64_t
+mdc_inc_generation(xlator_t *this, inode_t *inode)
+{
+ struct mdc_conf *conf = NULL;
+ uint64_t gen = 0;
+ struct md_cache *mdc = NULL;
+
+ conf = this->private;
+
+ mdc_inode_ctx_get(this, inode, &mdc);
+
+ if (mdc) {
+ LOCK(&mdc->lock);
+ {
+ gen = __mdc_inc_generation(this, mdc);
+ }
+ UNLOCK(&mdc->lock);
+ } else {
+ gen = GF_ATOMIC_INC(conf->generation);
+ if (gen == 0) {
+ gen = GF_ATOMIC_INC(conf->generation);
+ }
+ }
+
+ return gen;
+}
+
+uint64_t
+mdc_get_generation(xlator_t *this, inode_t *inode)
+{
+ struct mdc_conf *conf = NULL;
+ uint64_t gen = 0;
+ struct md_cache *mdc = NULL;
+
+ conf = this->private;
+
+ mdc_inode_ctx_get(this, inode, &mdc);
+
+ if (mdc) {
+ LOCK(&mdc->lock);
+ {
+ gen = mdc->generation;
+ }
+ UNLOCK(&mdc->lock);
+ } else
+ gen = GF_ATOMIC_GET(conf->generation);
+
+ return gen;
+}
+
+int
+__mdc_inode_ctx_set(xlator_t *this, inode_t *inode, struct md_cache *mdc)
+{
+ int ret = 0;
+ uint64_t mdc_int = 0;
+
+ mdc_int = (long)mdc;
+ ret = __inode_ctx_set(inode, this, &mdc_int);
+
+ return ret;
+}
+
+int
+mdc_inode_ctx_set(xlator_t *this, inode_t *inode, struct md_cache *mdc)
+{
+ int ret;
+
+ LOCK(&inode->lock);
+ {
+ ret = __mdc_inode_ctx_set(this, inode, mdc);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+mdc_local_t *
+mdc_local_get(call_frame_t *frame, inode_t *inode)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (local)
+ goto out;
+
+ local = GF_CALLOC(sizeof(*local), 1, gf_mdc_mt_mdc_local_t);
+ if (!local)
+ goto out;
+
+ local->incident_time = mdc_get_generation(frame->this, inode);
+ frame->local = local;
+out:
+ return local;
+}
+
+void
+mdc_local_wipe(xlator_t *this, mdc_local_t *local)
+{
+ if (!local)
+ return;
+
+ loc_wipe(&local->loc);
+
+ loc_wipe(&local->loc2);
+
+ if (local->fd)
+ fd_unref(local->fd);
+
+ GF_FREE(local->linkname);
+
+ GF_FREE(local->key);
+
+ if (local->xattr)
+ dict_unref(local->xattr);
+
+ GF_FREE(local);
+ return;
+}
+
+int
+mdc_inode_wipe(xlator_t *this, inode_t *inode)
+{
+ int ret = 0;
+ uint64_t mdc_int = 0;
+ struct md_cache *mdc = NULL;
+
+ ret = inode_ctx_del(inode, this, &mdc_int);
+ if (ret != 0)
+ goto out;
+
+ mdc = (void *)(long)mdc_int;
+
+ if (mdc->xattr)
+ dict_unref(mdc->xattr);
+
+ GF_FREE(mdc->linkname);
+
+ GF_FREE(mdc);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+struct md_cache *
+mdc_inode_prep(xlator_t *this, inode_t *inode)
+{
+ int ret = 0;
+ struct md_cache *mdc = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ret = __mdc_inode_ctx_get(this, inode, &mdc);
+ if (ret == 0)
+ goto unlock;
+
+ mdc = GF_CALLOC(sizeof(*mdc), 1, gf_mdc_mt_md_cache_t);
+ if (!mdc) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "out of memory");
+ goto unlock;
+ }
+
+ LOCK_INIT(&mdc->lock);
+
+ ret = __mdc_inode_ctx_set(this, inode, mdc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "out of memory");
+ GF_FREE(mdc);
+ mdc = NULL;
+ }
+ }
+unlock:
+ UNLOCK(&inode->lock);
+
+ return mdc;
+}
+
+/* Cache is valid if:
+ * - It is not cached before any brick was down. Brick down case is handled by
+ * invalidating all the cache when any brick went down.
+ * - The cache time is not expired
+ */
+static gf_boolean_t
+__is_cache_valid(xlator_t *this, time_t mdc_time)
+{
+ gf_boolean_t ret = _gf_true;
+ struct mdc_conf *conf = NULL;
+ uint32_t timeout = 0;
+ time_t last_child_down = 0;
+
+ conf = this->private;
+
+ /* conf->lock here is not taken deliberately, so that the multi
+ * threaded IO doesn't contend on a global lock. While updating
+ * the variable, the lock is taken, so that at least the writes are
+ * intact. The read of last_child_down may return junk, but that
+ * is for a very short period of time.
+ */
+ last_child_down = conf->last_child_down;
+ timeout = conf->timeout;
+
+ if ((mdc_time == 0) ||
+ ((last_child_down != 0) && (mdc_time < last_child_down))) {
+ ret = _gf_false;
+ goto out;
+ }
+
+ if (gf_time() >= (mdc_time + timeout)) {
+ ret = _gf_false;
+ }
+
+out:
+ return ret;
+}
+
+static gf_boolean_t
+is_md_cache_iatt_valid(xlator_t *this, struct md_cache *mdc)
+{
+ gf_boolean_t ret = _gf_true;
+
+ LOCK(&mdc->lock);
+ {
+ if (mdc->valid == _gf_false) {
+ ret = mdc->valid;
+ } else {
+ ret = __is_cache_valid(this, mdc->ia_time);
+ if (ret == _gf_false) {
+ mdc->ia_time = 0;
+ mdc->generation = 0;
+ }
+ }
+ }
+ UNLOCK(&mdc->lock);
+
+ return ret;
+}
+
+static gf_boolean_t
+is_md_cache_xatt_valid(xlator_t *this, struct md_cache *mdc)
+{
+ gf_boolean_t ret = _gf_true;
+
+ LOCK(&mdc->lock);
+ {
+ ret = __is_cache_valid(this, mdc->xa_time);
+ if (ret == _gf_false)
+ mdc->xa_time = 0;
+ }
+ UNLOCK(&mdc->lock);
+
+ return ret;
+}
+
+void
+mdc_from_iatt(struct md_cache *mdc, struct iatt *iatt)
+{
+ mdc->md_prot = iatt->ia_prot;
+ mdc->md_nlink = iatt->ia_nlink;
+ mdc->md_uid = iatt->ia_uid;
+ mdc->md_gid = iatt->ia_gid;
+ mdc->md_atime = iatt->ia_atime;
+ mdc->md_atime_nsec = iatt->ia_atime_nsec;
+ mdc->md_mtime = iatt->ia_mtime;
+ mdc->md_mtime_nsec = iatt->ia_mtime_nsec;
+ mdc->md_ctime = iatt->ia_ctime;
+ mdc->md_ctime_nsec = iatt->ia_ctime_nsec;
+ mdc->md_rdev = iatt->ia_rdev;
+ mdc->md_size = iatt->ia_size;
+ mdc->md_blocks = iatt->ia_blocks;
+}
+
+void
+mdc_to_iatt(struct md_cache *mdc, struct iatt *iatt)
+{
+ iatt->ia_prot = mdc->md_prot;
+ iatt->ia_nlink = mdc->md_nlink;
+ iatt->ia_uid = mdc->md_uid;
+ iatt->ia_gid = mdc->md_gid;
+ iatt->ia_atime = mdc->md_atime;
+ iatt->ia_atime_nsec = mdc->md_atime_nsec;
+ iatt->ia_mtime = mdc->md_mtime;
+ iatt->ia_mtime_nsec = mdc->md_mtime_nsec;
+ iatt->ia_ctime = mdc->md_ctime;
+ iatt->ia_ctime_nsec = mdc->md_ctime_nsec;
+ iatt->ia_rdev = mdc->md_rdev;
+ iatt->ia_size = mdc->md_size;
+ iatt->ia_blocks = mdc->md_blocks;
+}
+
+int
+mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
+ struct iatt *iatt, gf_boolean_t update_time,
+ uint64_t incident_time)
+{
+ int ret = 0;
+ struct md_cache *mdc = NULL;
+ uint32_t rollover = 0;
+ uint64_t gen = 0;
+ gf_boolean_t update_xa_time = _gf_false;
+ struct mdc_conf *conf = this->private;
+
+ mdc = mdc_inode_prep(this, inode);
+ if (!mdc) {
+ ret = -1;
+ goto out;
+ }
+
+ rollover = incident_time >> 32;
+ incident_time = (incident_time & 0xffffffff);
+
+ LOCK(&mdc->lock);
+ {
+ if (!iatt || !iatt->ia_ctime) {
+ gf_msg_callingfn("md-cache", GF_LOG_TRACE, 0, 0,
+ "invalidating iatt(NULL)"
+ "(%s)",
+ uuid_utoa(inode->gfid));
+ mdc->ia_time = 0;
+ mdc->valid = 0;
+
+ gen = __mdc_inc_generation(this, mdc);
+ mdc->generation = (gen & 0xffffffff);
+ goto unlock;
+ }
+
+ /* There could be a race in invalidation, where the
+ * invalidations in order A, B reaches md-cache in the order
+ * B, A. Hence, make sure the invalidation A is discarded if
+ * it comes after B. ctime of a file is always in ascending
+ * order unlike atime and mtime(which can be changed by user
+ * to any date), also ctime gets updates when atime/mtime
+ * changes, hence check for ctime only.
+ */
+ if (mdc->md_ctime > iatt->ia_ctime) {
+ gf_msg_callingfn(this->name, GF_LOG_DEBUG, EINVAL,
+ MD_CACHE_MSG_DISCARD_UPDATE,
+ "discarding the iatt validate "
+ "request (%s)",
+ uuid_utoa(inode->gfid));
+ ret = -1;
+ goto unlock;
+ }
+ if ((mdc->md_ctime == iatt->ia_ctime) &&
+ (mdc->md_ctime_nsec > iatt->ia_ctime_nsec)) {
+ gf_msg_callingfn(this->name, GF_LOG_DEBUG, EINVAL,
+ MD_CACHE_MSG_DISCARD_UPDATE,
+ "discarding the iatt validate "
+ "request(ctime_nsec) (%s)",
+ uuid_utoa(inode->gfid));
+ ret = -1;
+ goto unlock;
+ }
+
+ /*
+ * Invalidate the inode if the mtime or ctime has changed
+ * and the prebuf doesn't match the value we have cached.
+ * TODO: writev returns with a NULL iatt due to
+ * performance/write-behind, causing invalidation on writes.
+ */
+ if ((iatt->ia_mtime != mdc->md_mtime) ||
+ (iatt->ia_mtime_nsec != mdc->md_mtime_nsec) ||
+ (iatt->ia_ctime != mdc->md_ctime) ||
+ (iatt->ia_ctime_nsec != mdc->md_ctime_nsec)) {
+ if (conf->global_invalidation &&
+ (!prebuf || (prebuf->ia_mtime != mdc->md_mtime) ||
+ (prebuf->ia_mtime_nsec != mdc->md_mtime_nsec) ||
+ (prebuf->ia_ctime != mdc->md_ctime) ||
+ (prebuf->ia_ctime_nsec != mdc->md_ctime_nsec))) {
+ if (IA_ISREG(inode->ia_type)) {
+ gf_msg("md-cache", GF_LOG_TRACE, 0,
+ MD_CACHE_MSG_DISCARD_UPDATE,
+ "prebuf doesn't match the value we have cached,"
+ " invalidate the inode(%s)",
+ uuid_utoa(inode->gfid));
+
+ inode_invalidate(inode);
+ }
+ } else {
+ update_xa_time = _gf_true;
+ }
+ }
+
+ if ((mdc->gen_rollover == rollover) &&
+ (incident_time >= mdc->generation)) {
+ mdc_from_iatt(mdc, iatt);
+ mdc->valid = _gf_true;
+ if (update_time) {
+ mdc->ia_time = gf_time();
+ if (mdc->xa_time && update_xa_time)
+ mdc->xa_time = mdc->ia_time;
+ }
+
+ gf_msg_callingfn(
+ "md-cache", GF_LOG_TRACE, 0, MD_CACHE_MSG_CACHE_UPDATE,
+ "Updated iatt(%s)"
+ " time:%lld generation=%lld",
+ uuid_utoa(iatt->ia_gfid), (unsigned long long)mdc->ia_time,
+ (unsigned long long)mdc->generation);
+ } else {
+ gf_msg_callingfn("md-cache", GF_LOG_TRACE, 0, 0,
+ "not updating cache (%s)"
+ "mdc-rollover=%u rollover=%u "
+ "mdc-generation=%llu "
+ "mdc-ia_time=%llu incident_time=%llu ",
+ uuid_utoa(iatt->ia_gfid), mdc->gen_rollover,
+ rollover, (unsigned long long)mdc->generation,
+ (unsigned long long)mdc->ia_time,
+ (unsigned long long)incident_time);
+ }
+ }
+unlock:
+ UNLOCK(&mdc->lock);
+
+out:
+ return ret;
+}
+
+int
+mdc_inode_iatt_set(xlator_t *this, inode_t *inode, struct iatt *iatt,
+ uint64_t incident_time)
+{
+ return mdc_inode_iatt_set_validate(this, inode, NULL, iatt, _gf_true,
+ incident_time);
+}
+
+int
+mdc_inode_iatt_get(xlator_t *this, inode_t *inode, struct iatt *iatt)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0) {
+ gf_msg_trace("md-cache", 0, "mdc_inode_ctx_get failed (%s)",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ if (!is_md_cache_iatt_valid(this, mdc)) {
+ gf_msg_trace("md-cache", 0, "iatt cache not valid for (%s)",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ LOCK(&mdc->lock);
+ {
+ mdc_to_iatt(mdc, iatt);
+ }
+ UNLOCK(&mdc->lock);
+
+ gf_uuid_copy(iatt->ia_gfid, inode->gfid);
+ iatt->ia_ino = gfid_to_ino(inode->gfid);
+ iatt->ia_dev = 42;
+ iatt->ia_type = inode->ia_type;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+struct updatedict {
+ dict_t *dict;
+ int ret;
+};
+
+static int
+is_mdc_key_satisfied(xlator_t *this, const char *key)
+{
+ int ret = 0;
+ char *pattern = NULL;
+ struct mdc_conf *conf = this->private;
+ char *mdc_xattr_str = NULL;
+ char *tmp = NULL;
+ char *tmp1 = NULL;
+
+ if (!key)
+ goto out;
+
+ /* conf->mdc_xattr_str, is never freed and is hence safely used outside
+ * of lock*/
+ tmp1 = conf->mdc_xattr_str;
+ if (!tmp1)
+ goto out;
+
+ mdc_xattr_str = gf_strdup(tmp1);
+ if (!mdc_xattr_str)
+ goto out;
+
+ pattern = strtok_r(mdc_xattr_str, ",", &tmp);
+ while (pattern) {
+ gf_strTrim(&pattern);
+ if (fnmatch(pattern, key, 0) == 0) {
+ ret = 1;
+ break;
+ } else {
+ gf_msg_trace("md-cache", 0,
+ "xattr key %s doesn't satisfy "
+ "caching requirements",
+ key);
+ }
+ pattern = strtok_r(NULL, ",", &tmp);
+ }
+ GF_FREE(mdc_xattr_str);
+out:
+ return ret;
+}
+
+static int
+updatefn(dict_t *dict, char *key, data_t *value, void *data)
+{
+ struct updatedict *u = data;
+
+ if (is_mdc_key_satisfied(THIS, key)) {
+ if (!u->dict) {
+ u->dict = dict_new();
+ if (!u->dict) {
+ u->ret = -1;
+ return -1;
+ }
+ }
+
+ if (dict_set(u->dict, key, value) < 0) {
+ u->ret = -1;
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int
+mdc_dict_update(dict_t **tgt, dict_t *src)
+{
+ struct updatedict u = {
+ .dict = *tgt,
+ .ret = 0,
+ };
+
+ dict_foreach(src, updatefn, &u);
+
+ if (*tgt)
+ return u.ret;
+
+ if ((u.ret < 0) && u.dict) {
+ dict_unref(u.dict);
+ return u.ret;
+ }
+
+ *tgt = u.dict;
+
+ return u.ret;
+}
+
+int
+mdc_inode_xatt_set(xlator_t *this, inode_t *inode, dict_t *dict)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+ dict_t *newdict = NULL;
+
+ mdc = mdc_inode_prep(this, inode);
+ if (!mdc)
+ goto out;
+
+ if (!dict) {
+ gf_msg_trace("md-cache", 0,
+ "mdc_inode_xatt_set failed (%s) "
+ "dict NULL",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ LOCK(&mdc->lock);
+ {
+ if (mdc->xattr) {
+ gf_msg_trace("md-cache", 0,
+ "deleting the old xattr "
+ "cache (%s)",
+ uuid_utoa(inode->gfid));
+ dict_unref(mdc->xattr);
+ mdc->xattr = NULL;
+ }
+
+ ret = mdc_dict_update(&newdict, dict);
+ if (ret < 0) {
+ UNLOCK(&mdc->lock);
+ goto out;
+ }
+
+ if (newdict)
+ mdc->xattr = newdict;
+
+ mdc->xa_time = gf_time();
+ gf_msg_trace("md-cache", 0, "xatt cache set for (%s) time:%lld",
+ uuid_utoa(inode->gfid), (long long)mdc->xa_time);
+ }
+ UNLOCK(&mdc->lock);
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+mdc_inode_xatt_update(xlator_t *this, inode_t *inode, dict_t *dict)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ mdc = mdc_inode_prep(this, inode);
+ if (!mdc)
+ goto out;
+
+ if (!dict)
+ goto out;
+
+ LOCK(&mdc->lock);
+ {
+ ret = mdc_dict_update(&mdc->xattr, dict);
+ if (ret < 0) {
+ UNLOCK(&mdc->lock);
+ goto out;
+ }
+ }
+ UNLOCK(&mdc->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+mdc_inode_xatt_unset(xlator_t *this, inode_t *inode, char *name)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ mdc = mdc_inode_prep(this, inode);
+ if (!mdc)
+ goto out;
+
+ if (!name || !mdc->xattr)
+ goto out;
+
+ LOCK(&mdc->lock);
+ {
+ dict_del(mdc->xattr, name);
+ }
+ UNLOCK(&mdc->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+mdc_inode_xatt_get(xlator_t *this, inode_t *inode, dict_t **dict)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0) {
+ gf_msg_trace("md-cache", 0, "mdc_inode_ctx_get failed (%s)",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ if (!is_md_cache_xatt_valid(this, mdc)) {
+ gf_msg_trace("md-cache", 0, "xattr cache not valid for (%s)",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ LOCK(&mdc->lock);
+ {
+ ret = 0;
+ /* Missing xattr only means no keys were there, i.e
+ a negative cache for the "loaded" keys
+ */
+ if (!mdc->xattr) {
+ gf_msg_trace("md-cache", 0, "xattr not present (%s)",
+ uuid_utoa(inode->gfid));
+ goto unlock;
+ }
+
+ if (dict)
+ *dict = dict_ref(mdc->xattr);
+ }
+unlock:
+ UNLOCK(&mdc->lock);
+
+out:
+ return ret;
+}
+
+gf_boolean_t
+mdc_inode_reset_need_lookup(xlator_t *this, inode_t *inode)
+{
+ struct md_cache *mdc = NULL;
+ gf_boolean_t need = _gf_false;
+
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0)
+ goto out;
+
+ LOCK(&mdc->lock);
+ {
+ need = mdc->need_lookup;
+ mdc->need_lookup = _gf_false;
+ }
+ UNLOCK(&mdc->lock);
+
+out:
+ return need;
+}
+
+void
+mdc_inode_set_need_lookup(xlator_t *this, inode_t *inode, gf_boolean_t need)
+{
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0)
+ goto out;
+
+ LOCK(&mdc->lock);
+ {
+ mdc->need_lookup = need;
+ }
+ UNLOCK(&mdc->lock);
+
+out:
+ return;
+}
+
+void
+mdc_inode_iatt_invalidate(xlator_t *this, inode_t *inode)
+{
+ struct md_cache *mdc = NULL;
+ uint32_t gen = 0;
+
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0)
+ goto out;
+
+ gen = mdc_inc_generation(this, inode) & 0xffffffff;
+
+ LOCK(&mdc->lock);
+ {
+ mdc->ia_time = 0;
+ mdc->valid = _gf_false;
+ mdc->generation = gen;
+ }
+ UNLOCK(&mdc->lock);
+
+out:
+ return;
+}
+
+int
+mdc_inode_xatt_invalidate(xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ struct md_cache *mdc = NULL;
+
+ if (mdc_inode_ctx_get(this, inode, &mdc) != 0)
+ goto out;
+
+ LOCK(&mdc->lock);
+ {
+ mdc->xa_time = 0;
+ }
+ UNLOCK(&mdc->lock);
+
+out:
+ return ret;
+}
+
+static int
+mdc_update_gfid_stat(xlator_t *this, struct iatt *iatt)
+{
+ int ret = 0;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+
+ itable = ((xlator_t *)this->graph->top)->itable;
+ inode = inode_find(itable, iatt->ia_gfid);
+ if (!inode) {
+ ret = -1;
+ goto out;
+ }
+ ret = mdc_inode_iatt_set_validate(this, inode, NULL, iatt, _gf_true,
+ mdc_inc_generation(this, inode));
+out:
+ return ret;
+}
+
+static bool
+mdc_load_reqs(xlator_t *this, dict_t *dict)
+{
+ struct mdc_conf *conf = this->private;
+ char *pattern = NULL;
+ char *mdc_xattr_str = NULL;
+ char *tmp = NULL;
+ char *tmp1 = NULL;
+ int ret = 0;
+ bool loaded = false;
+
+ tmp1 = conf->mdc_xattr_str;
+ if (!tmp1)
+ goto out;
+
+ mdc_xattr_str = gf_strdup(tmp1);
+ if (!mdc_xattr_str)
+ goto out;
+
+ pattern = strtok_r(mdc_xattr_str, ",", &tmp);
+ while (pattern) {
+ gf_strTrim(&pattern);
+ ret = dict_set_int8(dict, pattern, 0);
+ if (ret) {
+ conf->mdc_xattr_str = NULL;
+ gf_msg("md-cache", GF_LOG_ERROR, 0, MD_CACHE_MSG_NO_XATTR_CACHE,
+ "Disabled cache for xattrs, dict_set failed");
+ goto out;
+ }
+ pattern = strtok_r(NULL, ",", &tmp);
+ }
+
+ loaded = true;
+
+out:
+ GF_FREE(mdc_xattr_str);
+
+ return loaded;
+}
+
+struct checkpair {
+ int ret;
+ dict_t *rsp;
+};
+
+static int
+checkfn(dict_t *this, char *key, data_t *value, void *data)
+{
+ struct checkpair *pair = data;
+
+ if (!is_mdc_key_satisfied(THIS, key))
+ pair->ret = 0;
+
+ return 0;
+}
+
+int
+mdc_xattr_satisfied(xlator_t *this, dict_t *req, dict_t *rsp)
+{
+ struct checkpair pair = {
+ .ret = 1,
+ .rsp = rsp,
+ };
+
+ dict_foreach(req, checkfn, &pair);
+
+ return pair.ret;
+}
+
+static void
+mdc_cache_statfs(xlator_t *this, struct statvfs *buf)
+{
+ struct mdc_conf *conf = this->private;
+
+ pthread_mutex_lock(&conf->statfs_cache.lock);
+ {
+ memcpy(&conf->statfs_cache.buf, buf, sizeof(struct statvfs));
+ conf->statfs_cache.last_refreshed = gf_time();
+ }
+ pthread_mutex_unlock(&conf->statfs_cache.lock);
+}
+
+int
+mdc_load_statfs_info_from_cache(xlator_t *this, struct statvfs **buf)
+{
+ struct mdc_conf *conf = this->private;
+ uint32_t cache_age = 0;
+ int ret = 0;
+
+ if (!buf || !conf) {
+ ret = -1;
+ goto err;
+ }
+
+ *buf = NULL;
+
+ pthread_mutex_lock(&conf->statfs_cache.lock);
+ {
+ /* Skip if the cache is not initialized. */
+ if (conf->statfs_cache.last_refreshed == (time_t)-1) {
+ ret = -1;
+ goto unlock;
+ }
+
+ cache_age = (gf_time() - conf->statfs_cache.last_refreshed);
+
+ gf_log(this->name, GF_LOG_DEBUG, "STATFS cache age = %u secs",
+ cache_age);
+ if (cache_age > conf->timeout) {
+ /* Expire the cache. */
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Cache age %u secs exceeded timeout %u secs", cache_age,
+ conf->timeout);
+ ret = -1;
+ goto unlock;
+ }
+
+ *buf = &conf->statfs_cache.buf;
+ }
+unlock:
+ pthread_mutex_unlock(&conf->statfs_cache.lock);
+err:
+ return ret;
+}
+
+static dict_t *
+mdc_prepare_request(xlator_t *this, mdc_local_t *local, dict_t *xdata)
+{
+ if (xdata != NULL) {
+ dict_ref(xdata);
+ }
+
+ if (local == NULL) {
+ return xdata;
+ }
+
+ if (xdata == NULL) {
+ xdata = dict_new();
+ if (xdata == NULL) {
+ local->update_cache = false;
+
+ return NULL;
+ }
+ }
+
+ local->update_cache = mdc_load_reqs(this, xdata);
+
+ return xdata;
+}
+
+int
+mdc_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ struct mdc_conf *conf = this->private;
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ }
+
+ goto out;
+ }
+
+ if (conf && conf->cache_statfs) {
+ mdc_cache_statfs(this, buf);
+ }
+
+out:
+ MDC_STACK_UNWIND(statfs, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+int
+mdc_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int ret = 0, op_ret = 0, op_errno = 0;
+ struct statvfs *buf = NULL;
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (!local) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ loc_copy(&local->loc, loc);
+
+ if (!conf) {
+ goto uncached;
+ }
+
+ if (!conf->cache_statfs) {
+ goto uncached;
+ }
+
+ ret = mdc_load_statfs_info_from_cache(this, &buf);
+ if (ret == 0 && buf) {
+ op_ret = 0;
+ op_errno = 0;
+ goto out;
+ }
+
+uncached:
+ STACK_WIND(frame, mdc_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+
+out:
+ MDC_STACK_UNWIND(statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int
+mdc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *dict, struct iatt *postparent)
+{
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if (op_errno == ENOENT)
+ GF_ATOMIC_INC(conf->mdc_counter.negative_lookup);
+
+ if (op_errno == ESTALE) {
+ /* if op_errno is ENOENT, fuse-bridge will unlink the
+ * dentry
+ */
+ if (local->loc.parent)
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
+ else
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, stbuf, local->incident_time);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->loc.inode, dict);
+ }
+ }
+out:
+ MDC_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, dict,
+ postparent);
+ return 0;
+}
+
+int
+mdc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int ret = 0;
+ struct iatt stbuf = {
+ 0,
+ };
+ struct iatt postparent = {
+ 0,
+ };
+ dict_t *xattr_rsp = NULL;
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (!local) {
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ goto uncached;
+ }
+
+ loc_copy(&local->loc, loc);
+
+ if (!inode_is_linked(loc->inode)) {
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ goto uncached;
+ }
+
+ if (mdc_inode_reset_need_lookup(this, loc->inode)) {
+ GF_ATOMIC_INC(conf->mdc_counter.need_lookup);
+ goto uncached;
+ }
+
+ ret = mdc_inode_iatt_get(this, loc->inode, &stbuf);
+ if (ret != 0) {
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ goto uncached;
+ }
+
+ if (xdata) {
+ ret = mdc_inode_xatt_get(this, loc->inode, &xattr_rsp);
+ if (ret != 0) {
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ goto uncached;
+ }
+
+ if (!mdc_xattr_satisfied(this, xdata, xattr_rsp)) {
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ goto uncached;
+ }
+ }
+
+ GF_ATOMIC_INC(conf->mdc_counter.stat_hit);
+ MDC_STACK_UNWIND(lookup, frame, 0, 0, loc->inode, &stbuf, xattr_rsp,
+ &postparent);
+
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ return 0;
+
+uncached:
+ xdata = mdc_prepare_request(this, local, xdata);
+
+ STACK_WIND(frame, mdc_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
+
+int
+mdc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ }
+
+ goto out;
+ }
+
+ mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->loc.inode, xdata);
+ }
+
+out:
+ MDC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+int
+mdc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int ret;
+ struct iatt stbuf;
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (!local)
+ goto uncached;
+
+ loc_copy(&local->loc, loc);
+
+ if (!inode_is_linked(loc->inode)) {
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ goto uncached;
+ }
+
+ ret = mdc_inode_iatt_get(this, loc->inode, &stbuf);
+ if (ret != 0)
+ goto uncached;
+
+ GF_ATOMIC_INC(conf->mdc_counter.stat_hit);
+ MDC_STACK_UNWIND(stat, frame, 0, 0, &stbuf, xdata);
+
+ return 0;
+
+uncached:
+ xdata = mdc_prepare_request(this, local, xdata);
+
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ STACK_WIND(frame, mdc_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
+
+int
+mdc_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE)) {
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ }
+
+ goto out;
+ }
+
+ mdc_inode_iatt_set(this, local->fd->inode, buf, local->incident_time);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->fd->inode, xdata);
+ }
+
+out:
+ MDC_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+int
+mdc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int ret;
+ struct iatt stbuf;
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (!local)
+ goto uncached;
+
+ local->fd = __fd_ref(fd);
+
+ ret = mdc_inode_iatt_get(this, fd->inode, &stbuf);
+ if (ret != 0)
+ goto uncached;
+
+ GF_ATOMIC_INC(conf->mdc_counter.stat_hit);
+ MDC_STACK_UNWIND(fstat, frame, 0, 0, &stbuf, xdata);
+
+ return 0;
+
+uncached:
+ xdata = mdc_prepare_request(this, local, xdata);
+
+ GF_ATOMIC_INC(conf->mdc_counter.stat_miss);
+ STACK_WIND(frame, mdc_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
+
+int
+mdc_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+mdc_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ local->loc.inode = inode_ref(loc->inode);
+ }
+
+ STACK_WIND(frame, mdc_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+}
+
+int
+mdc_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int
+mdc_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+ STACK_WIND(frame, mdc_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+}
+
+int
+mdc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
+ }
+out:
+ MDC_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int
+mdc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ local->xattr = dict_ref(xdata);
+ }
+
+ STACK_WIND(frame, mdc_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+ return 0;
+}
+
+int
+mdc_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
+ }
+out:
+ MDC_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int
+mdc_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ local->xattr = dict_ref(xdata);
+ }
+
+ STACK_WIND(frame, mdc_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ return 0;
+}
+
+int
+mdc_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ /* if errno is ESTALE, parent is not present, which implies even
+ * child is not present. Also, man 2 unlink states unlink can
+ * return ENOENT if a component in pathname does not
+ * exist or is a dangling symbolic link. So, invalidate both
+ * parent and child for both errno
+ */
+
+ if ((op_errno == ENOENT) || (op_errno == ESTALE)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, NULL, local->incident_time);
+ }
+
+out:
+ MDC_STACK_UNWIND(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int
+mdc_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ }
+
+ STACK_WIND(frame, mdc_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int
+mdc_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ /* if errno is ESTALE, parent is not present, which implies even
+ * child is not present. Also, man 2 rmdir states rmdir can
+ * return ENOENT if a directory component in pathname does not
+ * exist or is a dangling symbolic link. So, invalidate both
+ * parent and child for both errno
+ */
+
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+out:
+ MDC_STACK_UNWIND(rmdir, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int
+mdc_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ }
+
+ STACK_WIND(frame, mdc_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flag, xdata);
+ return 0;
+}
+
+int
+mdc_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
+ }
+out:
+ MDC_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int
+mdc_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ char *name;
+
+ name = gf_strdup(linkname);
+ if (name == NULL) {
+ goto wind;
+ }
+ local = mdc_local_get(frame, loc->inode);
+ if (local == NULL) {
+ GF_FREE(name);
+ goto wind;
+ }
+
+ loc_copy(&local->loc, loc);
+ local->linkname = name;
+
+wind:
+ STACK_WIND(frame, mdc_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata);
+ return 0;
+}
+
+int
+mdc_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ mdc_inode_iatt_invalidate(this, local->loc2.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postoldparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ /* TODO: fix dht_rename() not to return linkfile
+ attributes before setting attributes here
+ */
+
+ mdc_inode_iatt_set(this, local->loc.inode, NULL, local->incident_time);
+ }
+
+ if (local->loc2.parent) {
+ mdc_inode_iatt_set(this, local->loc2.parent, postnewparent,
+ local->incident_time);
+ }
+out:
+ MDC_STACK_UNWIND(rename, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+ return 0;
+}
+
+int
+mdc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, oldloc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->loc2, newloc);
+ }
+
+ STACK_WIND(frame, mdc_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ return 0;
+}
+
+int
+mdc_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE)) {
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ mdc_inode_iatt_invalidate(this, local->loc2.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, local->loc.inode, buf, local->incident_time);
+ }
+
+ if (local->loc2.parent) {
+ mdc_inode_iatt_set(this, local->loc2.parent, postparent,
+ local->incident_time);
+ }
+out:
+ MDC_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int
+mdc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, oldloc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->loc2, newloc);
+ }
+
+ STACK_WIND(frame, mdc_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
+}
+
+int
+mdc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT)) {
+ mdc_inode_iatt_invalidate(this, local->loc.parent);
+ }
+
+ goto out;
+ }
+
+ if (local->loc.parent) {
+ mdc_inode_iatt_set(this, local->loc.parent, postparent,
+ local->incident_time);
+ }
+
+ if (local->loc.inode) {
+ mdc_inode_iatt_set(this, inode, buf, local->incident_time);
+ }
+out:
+ MDC_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int
+mdc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ local->xattr = dict_ref(xdata);
+ }
+
+ STACK_WIND(frame, mdc_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+}
+
+static int
+mdc_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ goto out;
+ }
+
+ if (local->fd->flags & O_TRUNC) {
+ /* O_TRUNC modifies file size. Hence invalidate the
+ * cache entry to fetch latest attributes. */
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ }
+
+out:
+ MDC_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+static int
+mdc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ if (!fd || !IA_ISREG(fd->inode->ia_type) || !(fd->flags & O_TRUNC)) {
+ goto out;
+ }
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+out:
+ STACK_WIND(frame, mdc_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+int
+mdc_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret < 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_iatt_set(this, local->fd->inode, stbuf, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
+
+ return 0;
+}
+
+int
+mdc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+ STACK_WIND(frame, mdc_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ return 0;
+}
+
+int
+mdc_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == -1) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+mdc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+ STACK_WIND(frame, mdc_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
+ return 0;
+}
+
+int
+mdc_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ mdc_inode_iatt_set(this, local->loc.inode, NULL, local->incident_time);
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+ mdc_inode_xatt_update(this, local->loc.inode, xdata);
+
+out:
+ MDC_STACK_UNWIND(setattr, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+mdc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int valid, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ dict_t *xattr_alloc = NULL;
+ int ret = 0;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local == NULL) {
+ goto wind;
+ }
+
+ loc_copy(&local->loc, loc);
+
+ if ((valid & GF_SET_ATTR_MODE) && conf->cache_glusterfs_acl) {
+ if (!xdata)
+ xdata = xattr_alloc = dict_new();
+ if (xdata) {
+ ret = dict_set_int8(xdata, GF_POSIX_ACL_ACCESS, 0);
+ if (!ret)
+ ret = dict_set_int8(xdata, GF_POSIX_ACL_DEFAULT, 0);
+ if (ret)
+ mdc_inode_xatt_invalidate(this, local->loc.inode);
+ }
+ }
+
+ if ((valid & GF_SET_ATTR_MODE) && conf->cache_posix_acl) {
+ if (!xdata)
+ xdata = xattr_alloc = dict_new();
+ if (xdata) {
+ ret = dict_set_int8(xdata, POSIX_ACL_ACCESS_XATTR, 0);
+ if (!ret)
+ ret = dict_set_int8(xdata, POSIX_ACL_DEFAULT_XATTR, 0);
+ if (ret)
+ mdc_inode_xatt_invalidate(this, local->loc.inode);
+ }
+ }
+
+wind:
+ STACK_WIND(frame, mdc_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+
+ if (xattr_alloc)
+ dict_unref(xattr_alloc);
+ return 0;
+}
+
+int
+mdc_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+ mdc_inode_xatt_update(this, local->fd->inode, xdata);
+
+out:
+ MDC_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+mdc_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int valid, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ dict_t *xattr_alloc = NULL;
+ int ret = 0;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local == NULL) {
+ goto wind;
+ }
+
+ local->fd = __fd_ref(fd);
+
+ if ((valid & GF_SET_ATTR_MODE) && conf->cache_glusterfs_acl) {
+ if (!xdata)
+ xdata = xattr_alloc = dict_new();
+ if (xdata) {
+ ret = dict_set_int8(xdata, GF_POSIX_ACL_ACCESS, 0);
+ if (!ret)
+ ret = dict_set_int8(xdata, GF_POSIX_ACL_DEFAULT, 0);
+ if (ret)
+ mdc_inode_xatt_invalidate(this, local->fd->inode);
+ }
+ }
+
+ if ((valid & GF_SET_ATTR_MODE) && conf->cache_posix_acl) {
+ if (!xdata)
+ xdata = xattr_alloc = dict_new();
+ if (xdata) {
+ ret = dict_set_int8(xdata, POSIX_ACL_ACCESS_XATTR, 0);
+ if (!ret)
+ ret = dict_set_int8(xdata, POSIX_ACL_DEFAULT_XATTR, 0);
+ if (ret)
+ mdc_inode_xatt_invalidate(this, local->fd->inode);
+ }
+ }
+
+wind:
+ STACK_WIND(frame, mdc_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+
+ if (xattr_alloc)
+ dict_unref(xattr_alloc);
+ return 0;
+}
+
+int
+mdc_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+mdc_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+ STACK_WIND(frame, mdc_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
+}
+
+int
+mdc_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ struct iatt prestat = {
+ 0,
+ };
+ struct iatt poststat = {
+ 0,
+ };
+ int ret = 0;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ goto out;
+ }
+
+ mdc_inode_xatt_update(this, local->loc.inode, local->xattr);
+
+ ret = dict_get_iatt(xdata, GF_PRESTAT, &prestat);
+ if (ret >= 0) {
+ ret = dict_get_iatt(xdata, GF_POSTSTAT, &poststat);
+ mdc_inode_iatt_set_validate(this, local->loc.inode, &prestat, &poststat,
+ _gf_true, local->incident_time);
+ }
+
+ if (ret < 0)
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+
+out:
+ MDC_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+mdc_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ local->xattr = dict_ref(xattr);
+ }
+
+ STACK_WIND(frame, mdc_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, xattr, flags, xdata);
+
+ return 0;
+}
+
+int
+mdc_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ struct iatt prestat = {
+ 0,
+ };
+ struct iatt poststat = {
+ 0,
+ };
+ int ret = 0;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_xatt_update(this, local->fd->inode, local->xattr);
+
+ ret = dict_get_iatt(xdata, GF_PRESTAT, &prestat);
+ if (ret >= 0) {
+ ret = dict_get_iatt(xdata, GF_POSTSTAT, &poststat);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, &prestat, &poststat,
+ _gf_true, local->incident_time);
+ }
+
+ if (ret < 0)
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+
+out:
+ MDC_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+mdc_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ local->xattr = dict_ref(xattr);
+ }
+
+ STACK_WIND(frame, mdc_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, xattr, flags, xdata);
+
+ return 0;
+}
+
+int
+mdc_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret < 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ goto out;
+ }
+
+ if (dict_get(xattr, "glusterfs.skip-cache")) {
+ gf_msg(this->name, GF_LOG_DEBUG, 0, 0,
+ "Skipping xattr update due to empty value");
+ goto out;
+ }
+
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->loc.inode, xdata);
+ }
+
+out:
+ MDC_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+}
+
+int
+mdc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+ dict_t *xdata)
+{
+ int ret;
+ int op_errno = ENODATA;
+ mdc_local_t *local = NULL;
+ dict_t *xattr = NULL;
+ struct mdc_conf *conf = this->private;
+ gf_boolean_t key_satisfied = _gf_false;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (!local) {
+ goto uncached;
+ }
+
+ loc_copy(&local->loc, loc);
+
+ if (!is_mdc_key_satisfied(this, key)) {
+ goto uncached;
+ }
+ key_satisfied = _gf_true;
+
+ ret = mdc_inode_xatt_get(this, loc->inode, &xattr);
+ if (ret != 0)
+ goto uncached;
+
+ if (!xattr || !dict_get(xattr, (char *)key)) {
+ ret = -1;
+ op_errno = ENODATA;
+ }
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_hit);
+ MDC_STACK_UNWIND(getxattr, frame, ret, op_errno, xattr, xdata);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+
+uncached:
+ if (key_satisfied) {
+ xdata = mdc_prepare_request(this, local, xdata);
+ }
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ STACK_WIND(frame, mdc_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, key, xdata);
+
+ if (key_satisfied && (xdata != NULL)) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
+
+int
+mdc_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret < 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ if (dict_get(xattr, "glusterfs.skip-cache")) {
+ gf_msg(this->name, GF_LOG_DEBUG, 0, 0,
+ "Skipping xattr update due to empty value");
+ goto out;
+ }
+
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, local->fd->inode, xdata);
+ }
+
+out:
+ MDC_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+}
+
+int
+mdc_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xdata)
+{
+ int ret;
+ mdc_local_t *local = NULL;
+ dict_t *xattr = NULL;
+ int op_errno = ENODATA;
+ struct mdc_conf *conf = this->private;
+ gf_boolean_t key_satisfied = _gf_true;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (!local)
+ goto uncached;
+
+ local->fd = __fd_ref(fd);
+
+ if (!is_mdc_key_satisfied(this, key)) {
+ key_satisfied = _gf_false;
+ goto uncached;
+ }
+
+ ret = mdc_inode_xatt_get(this, fd->inode, &xattr);
+ if (ret != 0)
+ goto uncached;
+
+ if (!xattr || !dict_get(xattr, (char *)key)) {
+ ret = -1;
+ op_errno = ENODATA;
+ }
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_hit);
+ MDC_STACK_UNWIND(fgetxattr, frame, ret, op_errno, xattr, xdata);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+
+uncached:
+ if (key_satisfied) {
+ xdata = mdc_prepare_request(this, local, xdata);
+ }
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ STACK_WIND(frame, mdc_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata);
+
+ if (key_satisfied && (xdata != NULL)) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
+
+int
+mdc_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ struct iatt prestat = {
+ 0,
+ };
+ struct iatt poststat = {
+ 0,
+ };
+ int ret = 0;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+ goto out;
+ }
+
+ if (local->key)
+ mdc_inode_xatt_unset(this, local->loc.inode, local->key);
+ else
+ mdc_inode_xatt_invalidate(this, local->loc.inode);
+
+ ret = dict_get_iatt(xdata, GF_PRESTAT, &prestat);
+ if (ret >= 0) {
+ ret = dict_get_iatt(xdata, GF_POSTSTAT, &poststat);
+ mdc_inode_iatt_set_validate(this, local->loc.inode, &prestat, &poststat,
+ _gf_true, local->incident_time);
+ }
+
+ if (ret < 0)
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+out:
+ MDC_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+mdc_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ int op_errno = ENODATA;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ struct mdc_conf *conf = this->private;
+ char *name2;
+
+ name2 = gf_strdup(name);
+ if (name2 == NULL) {
+ goto uncached;
+ }
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local == NULL) {
+ GF_FREE(name2);
+ goto uncached;
+ }
+
+ loc_copy(&local->loc, loc);
+ local->key = name2;
+
+ if (!is_mdc_key_satisfied(this, name))
+ goto uncached;
+
+ ret = mdc_inode_xatt_get(this, loc->inode, &xattr);
+ if (ret != 0)
+ goto uncached;
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_hit);
+
+ if (!xattr || !dict_get(xattr, (char *)name)) {
+ ret = -1;
+ op_errno = ENODATA;
+
+ MDC_STACK_UNWIND(removexattr, frame, ret, op_errno, xdata);
+ } else {
+ STACK_WIND(frame, mdc_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ }
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+
+uncached:
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ STACK_WIND(frame, mdc_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
+}
+
+int
+mdc_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ struct iatt prestat = {
+ 0,
+ };
+ struct iatt poststat = {
+ 0,
+ };
+ int ret = 0;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ if (local->key)
+ mdc_inode_xatt_unset(this, local->fd->inode, local->key);
+ else
+ mdc_inode_xatt_invalidate(this, local->fd->inode);
+
+ ret = dict_get_iatt(xdata, GF_PRESTAT, &prestat);
+ if (ret >= 0) {
+ ret = dict_get_iatt(xdata, GF_POSTSTAT, &poststat);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, &prestat, &poststat,
+ _gf_true, local->incident_time);
+ }
+
+ if (ret < 0)
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+
+out:
+ MDC_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+mdc_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ int op_errno = ENODATA;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ struct mdc_conf *conf = this->private;
+ char *name2;
+
+ name2 = gf_strdup(name);
+ if (name2 == NULL) {
+ goto uncached;
+ }
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local == NULL) {
+ GF_FREE(name2);
+ goto uncached;
+ }
+
+ local->fd = __fd_ref(fd);
+ local->key = name2;
+
+ if (!is_mdc_key_satisfied(this, name))
+ goto uncached;
+
+ ret = mdc_inode_xatt_get(this, fd->inode, &xattr);
+ if (ret != 0)
+ goto uncached;
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_hit);
+
+ if (!xattr || !dict_get(xattr, (char *)name)) {
+ ret = -1;
+ op_errno = ENODATA;
+
+ MDC_STACK_UNWIND(fremovexattr, frame, ret, op_errno, xdata);
+ } else {
+ STACK_WIND(frame, mdc_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ }
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+
+uncached:
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_miss);
+ STACK_WIND(frame, mdc_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
+}
+
+int32_t
+mdc_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == 0)
+ goto out;
+
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+
+out:
+ MDC_STACK_UNWIND(opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int
+mdc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (local != NULL) {
+ loc_copy(&local->loc, loc);
+ }
+
+ /* Tell readdir-ahead to include these keys in xdata when it
+ * internally issues readdirp() in it's opendir_cbk */
+ xdata = mdc_prepare_request(this, local, xdata);
+
+ STACK_WIND(frame, mdc_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
+
+int
+mdc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto unwind;
+
+ if (op_ret <= 0) {
+ if ((op_ret == -1) && ((op_errno == ENOENT) || (op_errno == ESTALE)))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto unwind;
+ }
+
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ if (!entry->inode)
+ continue;
+ mdc_inode_iatt_set(this, entry->inode, &entry->d_stat,
+ local->incident_time);
+ if (local->update_cache) {
+ mdc_inode_xatt_set(this, entry->inode, entry->dict);
+ }
+ }
+
+unwind:
+ MDC_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+int
+mdc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (!local)
+ goto out;
+
+ local->fd = __fd_ref(fd);
+
+ xdata = mdc_prepare_request(this, local, xdata);
+
+ STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+out:
+ MDC_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+int
+mdc_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == 0)
+ goto out;
+
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+out:
+ MDC_STACK_UNWIND(readdir, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+int
+mdc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+ struct mdc_conf *conf = this->private;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (!local)
+ goto unwind;
+
+ local->fd = __fd_ref(fd);
+
+ if (!conf->force_readdirp) {
+ STACK_WIND(frame, mdc_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
+ return 0;
+ }
+
+ xdata = mdc_prepare_request(this, local, xdata);
+
+ STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+unwind:
+ MDC_STACK_UNWIND(readdir, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+int
+mdc_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int
+mdc_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ mdc_local_t *local;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+ STACK_WIND(frame, mdc_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+
+ return 0;
+}
+
+int
+mdc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+mdc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ mdc_local_t *local;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+ STACK_WIND(frame, mdc_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+
+ return 0;
+}
+
+int
+mdc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret != 0) {
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+ goto out;
+ }
+
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf,
+ _gf_true, local->incident_time);
+
+out:
+ MDC_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+mdc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ mdc_local_t *local;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (local != NULL) {
+ local->fd = __fd_ref(fd);
+ }
+
+ STACK_WIND(frame, mdc_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+
+ return 0;
+}
+
+int32_t
+mdc_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == 0)
+ goto out;
+
+ if ((op_errno == ENOENT) || (op_errno == ESTALE))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+
+out:
+ MDC_STACK_UNWIND(readlink, frame, op_ret, op_errno, path, buf, xdata);
+ return 0;
+}
+
+int32_t
+mdc_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (!local)
+ goto unwind;
+
+ loc_copy(&local->loc, loc);
+
+ STACK_WIND(frame, mdc_readlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
+ return 0;
+
+unwind:
+ MDC_STACK_UNWIND(readlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+mdc_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == 0)
+ goto out;
+
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->fd->inode);
+
+out:
+ MDC_STACK_UNWIND(fsyncdir, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+mdc_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, fd->inode);
+ if (!local)
+ goto unwind;
+
+ local->fd = __fd_ref(fd);
+
+ STACK_WIND(frame, mdc_fsyncdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata);
+ return 0;
+
+unwind:
+ MDC_STACK_UNWIND(fsyncdir, frame, -1, ENOMEM, NULL);
+ return 0;
+}
+
+int32_t
+mdc_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ if (op_ret == 0)
+ goto out;
+
+ if ((op_errno == ESTALE) || (op_errno == ENOENT))
+ mdc_inode_iatt_invalidate(this, local->loc.inode);
+
+out:
+ MDC_STACK_UNWIND(access, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+mdc_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ mdc_local_t *local = NULL;
+
+ local = mdc_local_get(frame, loc->inode);
+ if (!local)
+ goto unwind;
+
+ loc_copy(&local->loc, loc);
+
+ STACK_WIND(frame, mdc_access_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access, loc, mask, xdata);
+ return 0;
+
+unwind:
+ MDC_STACK_UNWIND(access, frame, -1, ENOMEM, NULL);
+ return 0;
+}
+
+int
+mdc_priv_dump(xlator_t *this)
+{
+ struct mdc_conf *conf = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+
+ conf = this->private;
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ gf_proc_dump_write("stat_hit_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.stat_hit));
+ gf_proc_dump_write("stat_miss_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.stat_miss));
+ gf_proc_dump_write("xattr_hit_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.xattr_hit));
+ gf_proc_dump_write("xattr_miss_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.xattr_miss));
+ gf_proc_dump_write("nameless_lookup_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.nameless_lookup));
+ gf_proc_dump_write("negative_lookup_count", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.negative_lookup));
+ gf_proc_dump_write("stat_invalidations_received", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.stat_invals));
+ gf_proc_dump_write("xattr_invalidations_received", "%" PRId64,
+ GF_ATOMIC_GET(conf->mdc_counter.xattr_invals));
+
+ return 0;
+}
+
+static int32_t
+mdc_dump_metrics(xlator_t *this, int fd)
+{
+ struct mdc_conf *conf = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ dprintf(fd, "%s.stat_cache_hit_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.stat_hit));
+ dprintf(fd, "%s.stat_cache_miss_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.stat_miss));
+ dprintf(fd, "%s.xattr_cache_hit_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.xattr_hit));
+ dprintf(fd, "%s.xattr_cache_miss_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.xattr_miss));
+ dprintf(fd, "%s.nameless_lookup_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.nameless_lookup));
+ dprintf(fd, "%s.negative_lookup_count %" PRId64 "\n", this->name,
+ GF_ATOMIC_GET(conf->mdc_counter.negative_lookup));
+ dprintf(fd, "%s.stat_cache_invalidations_received %" PRId64 "\n",
+ this->name, GF_ATOMIC_GET(conf->mdc_counter.stat_invals));
+ dprintf(fd, "%s.xattr_cache_invalidations_received %" PRId64 "\n",
+ this->name, GF_ATOMIC_GET(conf->mdc_counter.xattr_invals));
+out:
+ return 0;
+}
+
+int
+mdc_forget(xlator_t *this, inode_t *inode)
+{
+ mdc_inode_wipe(this, inode);
+
+ return 0;
+}
+
+int
+is_strpfx(const char *str1, const char *str2)
+{
+ /* is one of the string a prefix of the other? */
+ int i;
+
+ for (i = 0; str1[i] == str2[i]; i++) {
+ if (!str1[i] || !str2[i])
+ break;
+ }
+
+ return !(str1[i] && str2[i]);
+}
+
+static int
+mdc_key_unload_all(struct mdc_conf *conf)
+{
+ conf->mdc_xattr_str = NULL;
+
+ return 0;
+}
+
+int
+mdc_xattr_list_populate(struct mdc_conf *conf, char *tmp_str)
+{
+ char *mdc_xattr_str = NULL;
+ size_t max_size = 0;
+ int ret = 0;
+
+ max_size = SLEN(
+ "security.capability,security.selinux,security."
+ "ima," POSIX_ACL_ACCESS_XATTR "," POSIX_ACL_DEFAULT_XATTR
+ "," GF_POSIX_ACL_ACCESS "," GF_POSIX_ACL_DEFAULT
+ ","
+ "user.swift.metadata,user.DOSATTRIB,user.DosStream.*"
+ ",user.org.netatalk.Metadata,security.NTACL,"
+ "user.org.netatalk.ResourceFork") +
+ strlen(tmp_str) + 5; /*Some buffer bytes*/
+
+ mdc_xattr_str = GF_MALLOC(max_size, gf_common_mt_char);
+ GF_CHECK_ALLOC(mdc_xattr_str, ret, out);
+ mdc_xattr_str[0] = '\0';
+
+ if (conf->cache_capability)
+ strcat(mdc_xattr_str, "security.capability,");
+
+ if (conf->cache_selinux)
+ strcat(mdc_xattr_str, "security.selinux,");
+
+ if (conf->cache_ima)
+ strcat(mdc_xattr_str, "security.ima,");
+
+ if (conf->cache_posix_acl)
+ strcat(mdc_xattr_str,
+ POSIX_ACL_ACCESS_XATTR "," POSIX_ACL_DEFAULT_XATTR ",");
+
+ if (conf->cache_glusterfs_acl)
+ strcat(mdc_xattr_str, GF_POSIX_ACL_ACCESS "," GF_POSIX_ACL_DEFAULT ",");
+
+ if (conf->cache_swift_metadata)
+ strcat(mdc_xattr_str, "user.swift.metadata,");
+
+ if (conf->cache_samba_metadata)
+ strcat(mdc_xattr_str,
+ "user.DOSATTRIB,user.DosStream.*,"
+ "user.org.netatalk.Metadata,user.org.netatalk."
+ "ResourceFork,security.NTACL,");
+
+ strcat(mdc_xattr_str, tmp_str);
+
+ LOCK(&conf->lock);
+ {
+ /* This is not freed, else is_mdc_key_satisfied, which is
+ * called by every fop has to take lock, and will lead to
+ * lock contention
+ */
+ conf->mdc_xattr_str = mdc_xattr_str;
+ }
+ UNLOCK(&conf->lock);
+
+out:
+ return ret;
+}
+
+struct set {
+ inode_t *inode;
+ xlator_t *this;
+};
+
+static int
+mdc_inval_xatt(dict_t *d, char *k, data_t *v, void *tmp)
+{
+ struct set *tmp1 = NULL;
+ int ret = 0;
+
+ tmp1 = (struct set *)tmp;
+ ret = mdc_inode_xatt_unset(tmp1->this, tmp1->inode, k);
+ return ret;
+}
+
+static int
+mdc_invalidate(xlator_t *this, void *data)
+{
+ struct gf_upcall *up_data = NULL;
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+ struct set tmp = {
+ 0,
+ };
+ inode_table_t *itable = NULL;
+ struct mdc_conf *conf = this->private;
+ uint64_t gen = 0;
+
+ up_data = (struct gf_upcall *)data;
+
+ if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION)
+ goto out;
+
+ up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
+
+ itable = ((xlator_t *)this->graph->top)->itable;
+ inode = inode_find(itable, up_data->gfid);
+ if (!inode) {
+ ret = -1;
+ goto out;
+ }
+
+ if (up_ci->flags & UP_PARENT_DENTRY_FLAGS) {
+ mdc_update_gfid_stat(this, &up_ci->p_stat);
+ if (up_ci->flags & UP_RENAME_FLAGS)
+ mdc_update_gfid_stat(this, &up_ci->oldp_stat);
+ }
+
+ if (up_ci->flags & UP_EXPLICIT_LOOKUP) {
+ mdc_inode_set_need_lookup(this, inode, _gf_true);
+ goto out;
+ }
+
+ if (up_ci->flags &
+ (UP_NLINK | UP_RENAME_FLAGS | UP_FORGET | UP_INVAL_ATTR)) {
+ mdc_inode_iatt_invalidate(this, inode);
+ mdc_inode_xatt_invalidate(this, inode);
+ GF_ATOMIC_INC(conf->mdc_counter.stat_invals);
+ goto out;
+ }
+
+ if (up_ci->flags & IATT_UPDATE_FLAGS) {
+ gen = mdc_inc_generation(this, inode);
+ ret = mdc_inode_iatt_set_validate(this, inode, NULL, &up_ci->stat,
+ _gf_false, gen);
+ /* one of the scenarios where ret < 0 is when this invalidate
+ * is older than the current stat, in that case do not
+ * update the xattrs as well
+ */
+ if (ret < 0)
+ goto out;
+ GF_ATOMIC_INC(conf->mdc_counter.stat_invals);
+ }
+
+ if (up_ci->flags & UP_XATTR) {
+ if (up_ci->dict)
+ ret = mdc_inode_xatt_update(this, inode, up_ci->dict);
+ else
+ ret = mdc_inode_xatt_invalidate(this, inode);
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_invals);
+ } else if (up_ci->flags & UP_XATTR_RM) {
+ tmp.inode = inode;
+ tmp.this = this;
+ ret = dict_foreach(up_ci->dict, mdc_inval_xatt, &tmp);
+
+ GF_ATOMIC_INC(conf->mdc_counter.xattr_invals);
+ }
+
+out:
+ if (inode)
+ inode_unref(inode);
+
+ return ret;
+}
+
+struct mdc_ipc {
+ xlator_t *this;
+ dict_t *xattr;
+};
+
+static int
+mdc_send_xattrs_cbk(int ret, call_frame_t *frame, void *data)
+{
+ struct mdc_ipc *tmp = data;
+
+ if (ret < 0) {
+ mdc_key_unload_all(THIS->private);
+ gf_msg("md-cache", GF_LOG_INFO, 0, MD_CACHE_MSG_NO_XATTR_CACHE,
+ "Disabled cache for all xattrs, as registering for "
+ "xattr cache invalidation failed");
+ }
+ STACK_DESTROY(frame->root);
+ dict_unref(tmp->xattr);
+ GF_FREE(tmp);
+
+ return 0;
+}
+
+static int
+mdc_send_xattrs(void *data)
+{
+ int ret = 0;
+ struct mdc_ipc *tmp = data;
+
+ ret = syncop_ipc(FIRST_CHILD(tmp->this), GF_IPC_TARGET_UPCALL, tmp->xattr,
+ NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret < 0) {
+ gf_msg(tmp->this->name, GF_LOG_WARNING, errno,
+ MD_CACHE_MSG_IPC_UPCALL_FAILED,
+ "Registering the list "
+ "of xattrs that needs invalidaton, with upcall, failed");
+ }
+
+ return ret;
+}
+
+static int
+mdc_register_xattr_inval(xlator_t *this)
+{
+ dict_t *xattr = NULL;
+ int ret = 0;
+ struct mdc_conf *conf = NULL;
+ call_frame_t *frame = NULL;
+ struct mdc_ipc *data = NULL;
+
+ conf = this->private;
+
+ LOCK(&conf->lock);
+ {
+ if (!conf->mdc_invalidation) {
+ UNLOCK(&conf->lock);
+ goto out;
+ }
+ }
+ UNLOCK(&conf->lock);
+
+ xattr = dict_new();
+ if (!xattr) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ if (!mdc_load_reqs(this, xattr)) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "failed to populate cache entries");
+ ret = -1;
+ goto out;
+ }
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "failed to create the frame");
+ ret = -1;
+ goto out;
+ }
+
+ data = GF_CALLOC(1, sizeof(struct mdc_ipc), gf_mdc_mt_mdc_ipc);
+ if (!data) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "failed to allocate memory");
+ ret = -1;
+ goto out;
+ }
+
+ data->this = this;
+ data->xattr = xattr;
+ ret = synctask_new(this->ctx->env, mdc_send_xattrs, mdc_send_xattrs_cbk,
+ frame, data);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno,
+ MD_CACHE_MSG_IPC_UPCALL_FAILED,
+ "Registering the list "
+ "of xattrs that needs invalidaton, with upcall, failed");
+ }
+
+out:
+ if (ret < 0) {
+ mdc_key_unload_all(conf);
+ if (xattr)
+ dict_unref(xattr);
+ if (frame)
+ STACK_DESTROY(frame->root);
+ GF_FREE(data);
+ gf_msg(this->name, GF_LOG_INFO, 0, MD_CACHE_MSG_NO_XATTR_CACHE,
+ "Disabled cache for all xattrs, as registering for "
+ "xattr cache invalidation failed");
+ }
+
+ return ret;
+}
+
+int
+mdc_reconfigure(xlator_t *this, dict_t *options)
+{
+ struct mdc_conf *conf = NULL;
+ int timeout = 0, ret = 0;
+ char *tmp_str = NULL;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF("md-cache-timeout", timeout, options, int32, out);
+
+ GF_OPTION_RECONF("cache-selinux", conf->cache_selinux, options, bool, out);
+
+ GF_OPTION_RECONF("cache-capability-xattrs", conf->cache_capability, options,
+ bool, out);
+
+ GF_OPTION_RECONF("cache-ima-xattrs", conf->cache_ima, options, bool, out);
+
+ GF_OPTION_RECONF("cache-posix-acl", conf->cache_posix_acl, options, bool,
+ out);
+
+ GF_OPTION_RECONF("cache-glusterfs-acl", conf->cache_glusterfs_acl, options,
+ bool, out);
+
+ GF_OPTION_RECONF("cache-swift-metadata", conf->cache_swift_metadata,
+ options, bool, out);
+
+ GF_OPTION_RECONF("cache-samba-metadata", conf->cache_samba_metadata,
+ options, bool, out);
+
+ GF_OPTION_RECONF("force-readdirp", conf->force_readdirp, options, bool,
+ out);
+
+ GF_OPTION_RECONF("cache-invalidation", conf->mdc_invalidation, options,
+ bool, out);
+
+ GF_OPTION_RECONF("global-cache-invalidation", conf->global_invalidation,
+ options, bool, out);
+
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, out);
+
+ GF_OPTION_RECONF("md-cache-statfs", conf->cache_statfs, options, bool, out);
+
+ GF_OPTION_RECONF("xattr-cache-list", tmp_str, options, str, out);
+
+ ret = mdc_xattr_list_populate(conf, tmp_str);
+ if (ret < 0)
+ goto out;
+
+ /* If timeout is greater than 60s (default before the patch that added
+ * cache invalidation support was added) then, cache invalidation
+ * feature for md-cache needs to be enabled, if not set timeout to the
+ * previous max which is 60s
+ */
+ if ((timeout > 60) && (!conf->mdc_invalidation)) {
+ conf->timeout = 60;
+ goto out;
+ }
+ conf->timeout = timeout;
+
+ ret = mdc_register_xattr_inval(this);
+out:
+ return ret;
+}
+
+int32_t
+mdc_mem_acct_init(xlator_t *this)
+{
+ return xlator_mem_acct_init(this, gf_mdc_mt_end + 1);
+}
+
+int
+mdc_init(xlator_t *this)
+{
+ struct mdc_conf *conf = NULL;
+ uint32_t timeout = 0;
+ char *tmp_str = NULL;
+
+ conf = GF_CALLOC(sizeof(*conf), 1, gf_mdc_mt_mdc_conf_t);
+ if (!conf) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, MD_CACHE_MSG_NO_MEMORY,
+ "out of memory");
+ return -1;
+ }
+
+ LOCK_INIT(&conf->lock);
+
+ GF_OPTION_INIT("md-cache-timeout", timeout, uint32, out);
+
+ GF_OPTION_INIT("cache-selinux", conf->cache_selinux, bool, out);
+
+ GF_OPTION_INIT("cache-capability-xattrs", conf->cache_capability, bool,
+ out);
+
+ GF_OPTION_INIT("cache-ima-xattrs", conf->cache_ima, bool, out);
+
+ GF_OPTION_INIT("cache-posix-acl", conf->cache_posix_acl, bool, out);
+
+ GF_OPTION_INIT("cache-glusterfs-acl", conf->cache_glusterfs_acl, bool, out);
+
+ GF_OPTION_INIT("cache-swift-metadata", conf->cache_swift_metadata, bool,
+ out);
+
+ GF_OPTION_INIT("cache-samba-metadata", conf->cache_samba_metadata, bool,
+ out);
+
+ GF_OPTION_INIT("force-readdirp", conf->force_readdirp, bool, out);
+
+ GF_OPTION_INIT("cache-invalidation", conf->mdc_invalidation, bool, out);
+
+ GF_OPTION_INIT("global-cache-invalidation", conf->global_invalidation, bool,
+ out);
+
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, out);
+
+ pthread_mutex_init(&conf->statfs_cache.lock, NULL);
+ GF_OPTION_INIT("md-cache-statfs", conf->cache_statfs, bool, out);
+
+ GF_OPTION_INIT("xattr-cache-list", tmp_str, str, out);
+ mdc_xattr_list_populate(conf, tmp_str);
+
+ conf->last_child_down = gf_time();
+ conf->statfs_cache.last_refreshed = (time_t)-1;
+
+ /* initialize gf_atomic_t counters */
+ GF_ATOMIC_INIT(conf->mdc_counter.stat_hit, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.stat_miss, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.xattr_hit, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.xattr_miss, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.negative_lookup, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.nameless_lookup, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.stat_invals, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.xattr_invals, 0);
+ GF_ATOMIC_INIT(conf->mdc_counter.need_lookup, 0);
+ GF_ATOMIC_INIT(conf->generation, 0);
+
+ /* If timeout is greater than 60s (default before the patch that added
+ * cache invalidation support was added) then, cache invalidation
+ * feature for md-cache needs to be enabled, if not set timeout to the
+ * previous max which is 60s
+ */
+ if ((timeout > 60) && (!conf->mdc_invalidation)) {
+ conf->timeout = 60;
+ goto out;
+ }
+ conf->timeout = timeout;
+
+out:
+ this->private = conf;
+
+ return 0;
+}
+
+void
+mdc_update_child_down_time(xlator_t *this, time_t now)
+{
+ struct mdc_conf *conf = NULL;
+
+ conf = this->private;
+
+ LOCK(&conf->lock);
+ {
+ conf->last_child_down = now;
+ }
+ UNLOCK(&conf->lock);
+}
+
+int
+mdc_notify(xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+ struct mdc_conf *conf = NULL;
+
+ conf = this->private;
+ switch (event) {
+ case GF_EVENT_CHILD_DOWN:
+ case GF_EVENT_SOME_DESCENDENT_DOWN:
+ mdc_update_child_down_time(this, gf_time());
+ break;
+ case GF_EVENT_UPCALL:
+ if (conf->mdc_invalidation)
+ ret = mdc_invalidate(this, data);
+ break;
+ case GF_EVENT_CHILD_UP:
+ case GF_EVENT_SOME_DESCENDENT_UP:
+ ret = mdc_register_xattr_inval(this);
+ break;
+ default:
+ break;
+ }
+
+ if (default_notify(this, event, data) != 0)
+ ret = -1;
+
+ return ret;
+}
+
+void
+mdc_fini(xlator_t *this)
+{
+ GF_FREE(this->private);
+}
+
+struct xlator_fops mdc_fops = {
+ .lookup = mdc_lookup,
+ .stat = mdc_stat,
+ .fstat = mdc_fstat,
+ .truncate = mdc_truncate,
+ .ftruncate = mdc_ftruncate,
+ .mknod = mdc_mknod,
+ .mkdir = mdc_mkdir,
+ .unlink = mdc_unlink,
+ .rmdir = mdc_rmdir,
+ .symlink = mdc_symlink,
+ .rename = mdc_rename,
+ .link = mdc_link,
+ .create = mdc_create,
+ .open = mdc_open,
+ .readv = mdc_readv,
+ .writev = mdc_writev,
+ .setattr = mdc_setattr,
+ .fsetattr = mdc_fsetattr,
+ .fsync = mdc_fsync,
+ .setxattr = mdc_setxattr,
+ .fsetxattr = mdc_fsetxattr,
+ .getxattr = mdc_getxattr,
+ .fgetxattr = mdc_fgetxattr,
+ .removexattr = mdc_removexattr,
+ .fremovexattr = mdc_fremovexattr,
+ .opendir = mdc_opendir,
+ .readdirp = mdc_readdirp,
+ .readdir = mdc_readdir,
+ .fallocate = mdc_fallocate,
+ .discard = mdc_discard,
+ .zerofill = mdc_zerofill,
+ .statfs = mdc_statfs,
+ .readlink = mdc_readlink,
+ .fsyncdir = mdc_fsyncdir,
+ .access = mdc_access,
+};
+
+struct xlator_cbks mdc_cbks = {
+ .forget = mdc_forget,
+};
+
+struct xlator_dumpops mdc_dumpops = {
+ .priv = mdc_priv_dump,
+};
+
+struct volume_options mdc_options[] = {
+ {
+ .key = {"md-cache"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable md-cache",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {
+ .key = {"cache-selinux"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache selinux xattr(security.selinux) on client side",
+ },
+ {
+ .key = {"cache-capability-xattrs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {GD_OP_VERSION_3_10_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache capability xattr(security.capability) on "
+ "client side",
+ },
+ {
+ .key = {"cache-ima-xattrs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {GD_OP_VERSION_3_10_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache Linux integrity subsystem xattr(security.ima) "
+ "on client side",
+ },
+ {
+ .key = {"cache-swift-metadata"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_3_7_10},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache swift metadata (user.swift.metadata xattr)",
+ },
+ {
+ .key = {"cache-samba-metadata"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache samba metadata (user.DOSATTRIB, security.NTACL,"
+ " org.netatalk.Metadata, org.netatalk.ResourceFork, "
+ "and user.DosStream. xattrs)",
+ },
+ {
+ .key = {"cache-posix-acl"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache posix ACL xattrs (system.posix_acl_access, "
+ "system.posix_acl_default) on client side",
+ },
+ {
+ .key = {"cache-glusterfs-acl"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache virtual glusterfs ACL xattrs "
+ "(glusterfs.posix.acl, glusterfs.posix.default_acl) "
+ "on client side",
+ },
+ {
+ .key = {"md-cache-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 600,
+ .default_value = SITE_H_MD_CACHE_TIMEOUT,
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Time period after which cache has to be refreshed",
+ },
+ {
+ .key = {"force-readdirp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Convert all readdir requests to readdirplus to "
+ "collect stat info on each entry.",
+ },
+ {
+ .key = {"cache-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "When \"on\", invalidates/updates the metadata cache,"
+ " on receiving the cache-invalidation notifications",
+ },
+ {
+ .key = {"global-cache-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description =
+ "When \"on\", purges all read caches in kernel and glusterfs stack "
+ "whenever a stat change is detected. Stat changes can be detected "
+ "while processing responses to file operations (fop) or through "
+ "upcall notifications. Since purging caches can be an expensive "
+ "operation, it's advised to have this option \"on\" only when a "
+ "file "
+ "can be accessed from multiple different Glusterfs mounts and "
+ "caches across these different mounts are required to be coherent. "
+ "If a file is not accessed across different mounts "
+ "(simple example is having only one mount for a volume), its "
+ "advised to keep "
+ "this option \"off\" as all file modifications go through caches "
+ "keeping them "
+ "coherent. This option overrides value of "
+ "performance.cache-invalidation.",
+ },
+ {
+ .key = {"md-cache-statfs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "Cache statfs information of filesystem on the client",
+ },
+ {
+ .key = {"xattr-cache-list"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .description = "A comma separated list of xattrs that shall be "
+ "cached by md-cache. The only wildcard allowed is '*'",
+ },
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"md-cache"},
+ .description = "Enable/Disable md cache translator"},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = mdc_init,
+ .fini = mdc_fini,
+ .notify = mdc_notify,
+ .reconfigure = mdc_reconfigure,
+ .mem_acct_init = mdc_mem_acct_init,
+ .dump_metrics = mdc_dump_metrics,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &mdc_dumpops,
+ .fops = &mdc_fops,
+ .cbks = &mdc_cbks,
+ .options = mdc_options,
+ .identifier = "md-cache",
+ .category = GF_MAINTAINED,
+};