summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPoornima G <pgurusid@redhat.com>2015-12-11 05:12:07 -0500
committerRaghavendra G <rgowdapp@redhat.com>2016-07-20 05:12:05 -0700
commit1f97d7101b3313ce647638310e1028da8dac6785 (patch)
tree869593e615ad5c7eff936a3dac72eab7aecebf0a
parent1929141da34d36f537e9798e3618e0e3bdc61eb6 (diff)
md-cache: Add cache invalidation support to invalidate the meta data cache
Problem: md-cache currently updates its stat in cbks of selected fops. The default cache time is 1 second, if this is increasd to reap the benefits of caching, we may end up with stale cache for long time, as there is no logic yet to notify md-cache of backend changes by another client. Solution: Use the existing upcall mechanism to invalidate the cache. For this feature to work, "features.cache-invalidation" volume option should be enabled. This patch as is doesn't improve any performance, the benifit of the patch is that it provides coherency for stat cache, hence the cache timeout can be quite longer which in turn can improve the performance. Change-Id: I2dbb0afa7b5e4a5a248f910188e0918e02f18692 BUG: 1211863 Signed-off-by: Poornima G <pgurusid@redhat.com> Reviewed-on: http://review.gluster.org/12951 Smoke: Gluster Build System <jenkins@build.gluster.org> CentOS-regression: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
-rw-r--r--tests/bugs/md-cache/bug-1211863.t60
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c6
-rw-r--r--xlators/performance/md-cache/src/md-cache-messages.h10
-rw-r--r--xlators/performance/md-cache/src/md-cache.c258
4 files changed, 304 insertions, 30 deletions
diff --git a/tests/bugs/md-cache/bug-1211863.t b/tests/bugs/md-cache/bug-1211863.t
index b969fbb4b29..4fa9e33d7a1 100644
--- a/tests/bugs/md-cache/bug-1211863.t
+++ b/tests/bugs/md-cache/bug-1211863.t
@@ -5,18 +5,64 @@
cleanup;
-TEST glusterd
+## 1. Start glusterd
+TEST glusterd;
-TEST $CLI volume create $V0 $H0:$B0/$V0
+## 2. Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+## 3. Start the volume
TEST $CLI volume start $V0
-TEST $CLI volume set $V0 cache-samba-metadata on
-EXPECT 'on' volinfo_field $V0 'performance.cache-samba-metadata'
+## 4. Enable the upcall xlator, and increase the md-cache timeout to max
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.cache-samba-metadata on
-TEST $CLI volume set $V0 cache-samba-metadata off
-EXPECT 'off' volinfo_field $V0 'performance.cache-samba-metadata'
+## 6. Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+## 8. Create a file
+TEST touch $M0/file1
+
+## 9. Setxattr from mount-0
+TEST "setfattr -n user.DOSATTRIB -v "abc" $M0/file1"
+## 10. Getxattr from mount-1, this should return the correct value as it is a fresh getxattr
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc"
+
+## 11. Now modify the same xattr from mount-0 again
+TEST "setfattr -n user.DOSATTRIB -v "xyz" $M0/file1"
+## 12. Since the xattr is already cached in mount-1 it returns the old xattr
+ #value, until the timeout (600)
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc"
+
+## 13. Unmount to clean all the cache
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+
+## 18. Restart the volume to restart the bick process
TEST $CLI volume stop $V0
-TEST $CLI volume delete $V0
+TEST $CLI volume start $V0
+
+## 20. Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+## 22. Repeat the tests 11-14, but this time since cache invalidation is on,
+ #the getxattr will reflect the new value
+TEST "setfattr -n user.DOSATTRIB -v "abc" $M0/file1"
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc"
+TEST "setfattr -n user.DOSATTRIB -v "xyz" $M0/file1"
+sleep 2; #There can be a very very small window where the next getxattr
+ #reaches md-cache, before the cache-invalidation caused by previous
+ #setxattr, reaches md-cache. Hence sleeping for 2 sec.
+ #Also it should not be > 600.
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q xyz"
+
+TEST $CLI volume set $V0 cache-samba-metadata off
+EXPECT 'off' volinfo_field $V0 'performance.cache-samba-metadata'
cleanup;
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 2e6aa378097..3e12c4f5fb0 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -1967,6 +1967,12 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = 2,
.flags = OPT_FLAG_CLIENT_OPT
},
+ { .key = "features.cache-invalidation",
+ .voltype = "performance/md-cache",
+ .option = "cache-invalidation",
+ .op_version = GD_OP_VERSION_3_9_0,
+ .flags = OPT_FLAG_CLIENT_OPT
+ },
/* Feature translators */
{ .key = "features.uss",
diff --git a/xlators/performance/md-cache/src/md-cache-messages.h b/xlators/performance/md-cache/src/md-cache-messages.h
index 2fe8d457a9a..a4259bacf1b 100644
--- a/xlators/performance/md-cache/src/md-cache-messages.h
+++ b/xlators/performance/md-cache/src/md-cache-messages.h
@@ -40,7 +40,7 @@
*/
#define GLFS_MD_CACHE_BASE GLFS_MSGID_COMP_MD_CACHE
-#define GLFS_MD_CACHE_NUM_MESSAGES 1
+#define GLFS_MD_CACHE_NUM_MESSAGES 2
#define GLFS_MSGID_END (GLFS_MD_CACHE_BASE + GLFS_MD_CACHE_NUM_MESSAGES + 1)
/* Messages with message IDs */
@@ -58,6 +58,14 @@
#define MD_CACHE_MSG_NO_MEMORY (GLFS_MD_CACHE_BASE + 1)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction None
+ *
+ */
+
+#define MD_CACHE_MSG_DISCARD_UPDATE (GLFS_MD_CACHE_BASE + 2)
/*------------*/
#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
index 0a4d3c78ead..57a1a0d1a4e 100644
--- a/xlators/performance/md-cache/src/md-cache.c
+++ b/xlators/performance/md-cache/src/md-cache.c
@@ -16,6 +16,8 @@
#include "md-cache-mem-types.h"
#include "compat-errno.h"
#include "glusterfs-acl.h"
+#include "defaults.h"
+#include "upcall-utils.h"
#include <assert.h>
#include <sys/time.h>
#include "md-cache-messages.h"
@@ -34,6 +36,9 @@ struct mdc_conf {
gf_boolean_t force_readdirp;
gf_boolean_t cache_swift_metadata;
gf_boolean_t cache_samba_metadata;
+ gf_boolean_t mdc_invalidation;
+ time_t last_child_down;
+ gf_lock_t lock;
};
@@ -312,20 +317,58 @@ unlock:
}
+/* Cache is valid if:
+ * - It is not cached before any brick was down. Brick down case is handled by
+ * invalidating all the cache when any brick went down.
+ * - The cache time is not expired
+ */
+static gf_boolean_t
+__is_cache_valid (xlator_t *this, time_t mdc_time)
+{
+ time_t now = 0;
+ gf_boolean_t ret = _gf_true;
+ struct mdc_conf *conf = NULL;
+ int timeout = 0;
+ time_t last_child_down = 0;
+
+ conf = this->private;
+
+ /* conf->lock here is not taken deliberately, so that the multi
+ * threaded IO doesn't contend on a global lock. While updating
+ * the variable, the lock is taken, so that atleast the writes are
+ * intact. The read of last_child_down may return junk, but that
+ * is for a very short period of time.
+ */
+ last_child_down = conf->last_child_down;
+ timeout = conf->timeout;
+
+ time (&now);
+
+ if ((mdc_time == 0) ||
+ ((last_child_down != 0) && (mdc_time < last_child_down))) {
+ ret = _gf_false;
+ goto out;
+ }
+
+ if (now >= (mdc_time + timeout)) {
+ ret = _gf_false;
+ }
+
+out:
+ return ret;
+}
+
+
static gf_boolean_t
is_md_cache_iatt_valid (xlator_t *this, struct md_cache *mdc)
{
- struct mdc_conf *conf = NULL;
- time_t now = 0;
gf_boolean_t ret = _gf_true;
- conf = this->private;
-
- time (&now);
LOCK (&mdc->lock);
{
- if (now >= (mdc->ia_time + conf->timeout))
- ret = _gf_false;
+ ret = __is_cache_valid (this, mdc->ia_time);
+ if (ret == _gf_false)
+ mdc->ia_time = 0;
}
UNLOCK (&mdc->lock);
@@ -336,18 +379,13 @@ is_md_cache_iatt_valid (xlator_t *this, struct md_cache *mdc)
static gf_boolean_t
is_md_cache_xatt_valid (xlator_t *this, struct md_cache *mdc)
{
- struct mdc_conf *conf = NULL;
- time_t now = 0;
gf_boolean_t ret = _gf_true;
- conf = this->private;
-
- time (&now);
-
LOCK (&mdc->lock);
{
- if (now >= (mdc->xa_time + conf->timeout))
- ret = _gf_false;
+ ret = __is_cache_valid (this, mdc->xa_time);
+ if (ret == _gf_false)
+ mdc->xa_time = 0;
}
UNLOCK (&mdc->lock);
@@ -397,12 +435,14 @@ int
mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
struct iatt *iatt)
{
- int ret = -1;
+ int ret = 0;
struct md_cache *mdc = NULL;
mdc = mdc_inode_prep (this, inode);
- if (!mdc)
+ if (!mdc) {
+ ret = -1;
goto out;
+ }
LOCK (&mdc->lock);
{
@@ -411,6 +451,33 @@ mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
goto unlock;
}
+ /* There could be a race in invalidation, where the
+ * invalidations in order A, B reaches md-cache in the order
+ * B, A. Hence, make sure the invalidation A is discarded if
+ * it comes after B. ctime of a file is always in ascending
+ * order unlike atime and mtime(which can be changed by user
+ * to any date), also ctime gets updates when atime/mtime
+ * changes, hence check for ctime only.
+ */
+ if (mdc->md_ctime > iatt->ia_ctime) {
+ gf_msg_callingfn (this->name, GF_LOG_DEBUG, EINVAL,
+ MD_CACHE_MSG_DISCARD_UPDATE,
+ "discarding the iatt validate "
+ "request");
+ ret = -1;
+ goto unlock;
+
+ }
+ if ((mdc->md_ctime == iatt->ia_ctime) &&
+ (mdc->md_ctime_nsec > iatt->ia_ctime_nsec)) {
+ gf_msg_callingfn (this->name, GF_LOG_DEBUG, EINVAL,
+ MD_CACHE_MSG_DISCARD_UPDATE,
+ "discarding the iatt validate "
+ "request(ctime_nsec)");
+ ret = -1;
+ goto unlock;
+ }
+
/*
* Invalidate the inode if the mtime or ctime has changed
* and the prebuf doesn't match the value we have cached.
@@ -434,7 +501,7 @@ mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
}
unlock:
UNLOCK (&mdc->lock);
- ret = 0;
+
out:
return ret;
}
@@ -2284,15 +2351,81 @@ mdc_key_load_set (struct mdc_key *keys, char *pattern, gf_boolean_t val)
return 0;
}
+struct set {
+ inode_t *inode;
+ xlator_t *this;
+};
+
+static int
+mdc_inval_xatt (dict_t *d, char *k, data_t *v, void *tmp)
+{
+ struct set *tmp1 = NULL;
+ int ret = 0;
+
+ tmp1 = (struct set *)tmp;
+ ret = mdc_inode_xatt_unset (tmp1->this, tmp1->inode, k);
+ return ret;
+}
+
+static int
+mdc_invalidate (xlator_t *this, void *data)
+{
+ struct gf_upcall *up_data = NULL;
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
+ struct set tmp = {0, };
+ inode_table_t *itable = NULL;
+
+ up_data = (struct gf_upcall *)data;
+
+ if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION)
+ goto out;
+
+ up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
+
+ itable = ((xlator_t *)this->graph->top)->itable;
+ inode = inode_find (itable, up_data->gfid);
+ if (!inode) {
+ ret = -1;
+ goto out;
+ }
+
+ if (up_ci->flags & IATT_UPDATE_FLAGS) {
+ ret = mdc_inode_iatt_set_validate (this, inode, NULL,
+ &up_ci->stat);
+ /* one of the scenarios where ret < 0 is when this invalidate
+ * is older than the current stat, in that case do not
+ * update the xattrs as well
+ */
+ if (ret < 0)
+ goto out;
+ }
+ if (up_ci->flags & UP_XATTR) {
+ ret = mdc_inode_xatt_update (this, inode, up_ci->dict);
+ } else if (up_ci->flags & UP_XATTR_RM) {
+ tmp.inode = inode;
+ tmp.this = this;
+ ret = dict_foreach (up_ci->dict, mdc_inval_xatt, &tmp);
+ }
+
+out:
+ if (inode)
+ inode_unref (inode);
+
+ return ret;
+}
+
int
reconfigure (xlator_t *this, dict_t *options)
{
struct mdc_conf *conf = NULL;
+ int timeout = 0;
conf = this->private;
- GF_OPTION_RECONF ("md-cache-timeout", conf->timeout, options, int32, out);
+ GF_OPTION_RECONF ("md-cache-timeout", timeout, options, int32, out);
GF_OPTION_RECONF ("cache-selinux", conf->cache_selinux, options, bool, out);
mdc_key_load_set (mdc_keys, "security.", conf->cache_selinux);
@@ -2314,7 +2447,19 @@ reconfigure (xlator_t *this, dict_t *options)
conf->cache_samba_metadata);
GF_OPTION_RECONF("force-readdirp", conf->force_readdirp, options, bool, out);
-
+ GF_OPTION_RECONF("cache-invalidation", conf->mdc_invalidation, options,
+ bool, out);
+
+ /* If timeout is greater than 60s (default before the patch that added
+ * cache invalidation support was added) then, cache invalidation
+ * feature for md-cache needs to be enabled, if not set timeout to the
+ * previous max which is 60s
+ */
+ if ((timeout > 60) && (!conf->mdc_invalidation)) {
+ conf->timeout = 60;
+ goto out;
+ }
+ conf->timeout = timeout;
out:
return 0;
}
@@ -2332,6 +2477,7 @@ int
init (xlator_t *this)
{
struct mdc_conf *conf = NULL;
+ int timeout = 0;
conf = GF_CALLOC (sizeof (*conf), 1, gf_mdc_mt_mdc_conf_t);
if (!conf) {
@@ -2340,7 +2486,7 @@ init (xlator_t *this)
return -1;
}
- GF_OPTION_INIT ("md-cache-timeout", conf->timeout, int32, out);
+ GF_OPTION_INIT ("md-cache-timeout", timeout, int32, out);
GF_OPTION_INIT ("cache-selinux", conf->cache_selinux, bool, out);
mdc_key_load_set (mdc_keys, "security.", conf->cache_selinux);
@@ -2362,6 +2508,22 @@ init (xlator_t *this)
conf->cache_samba_metadata);
GF_OPTION_INIT("force-readdirp", conf->force_readdirp, bool, out);
+ GF_OPTION_INIT("cache-invalidation", conf->mdc_invalidation, bool, out);
+
+ LOCK_INIT (&conf->lock);
+ time (&conf->last_child_down);
+
+ /* If timeout is greater than 60s (default before the patch that added
+ * cache invalidation support was added) then, cache invalidation
+ * feature for md-cache needs to be enabled, if not set timeout to the
+ * previous max which is 60s
+ */
+ if ((timeout > 60) && (!conf->mdc_invalidation)) {
+ conf->timeout = 60;
+ goto out;
+ }
+ conf->timeout = timeout;
+
out:
this->private = conf;
@@ -2370,6 +2532,52 @@ out:
void
+mdc_update_child_down_time (xlator_t *this, time_t *now)
+{
+ struct mdc_conf *conf = NULL;
+
+ conf = this->private;
+
+ LOCK (&conf->lock);
+ {
+ conf->last_child_down = *now;
+ }
+ UNLOCK (&conf->lock);
+}
+
+
+int
+notify (xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+ struct mdc_conf *conf = NULL;
+ time_t now = 0;
+
+ conf = this->private;
+ switch (event) {
+ case GF_EVENT_CHILD_DOWN:
+ case GF_EVENT_SOME_CHILD_DOWN:
+ case GF_EVENT_CHILD_MODIFIED:
+ time (&now);
+ mdc_update_child_down_time (this, &now);
+ ret = default_notify (this, event, data);
+ break;
+ case GF_EVENT_UPCALL:
+ if (conf->mdc_invalidation)
+ ret = mdc_invalidate (this, data);
+ if (default_notify (this, event, data) != 0)
+ ret = -1;
+ break;
+ default:
+ ret = default_notify (this, event, data);
+ break;
+ }
+
+ return ret;
+}
+
+
+void
fini (xlator_t *this)
{
return;
@@ -2437,7 +2645,7 @@ struct volume_options options[] = {
{ .key = {"md-cache-timeout"},
.type = GF_OPTION_TYPE_INT,
.min = 0,
- .max = 60,
+ .max = 600,
.default_value = "1",
.description = "Time period after which cache has to be refreshed",
},
@@ -2447,5 +2655,11 @@ struct volume_options options[] = {
.description = "Convert all readdir requests to readdirplus to "
"collect stat info on each entry.",
},
+ { .key = {"cache-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "When \"on\", invalidates/updates the metadata cache "
+ "on receiving of the cache-invalidation notifications",
+ },
{ .key = {NULL} },
};