summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorBrian Foster <bfoster@redhat.com>2012-06-13 12:08:38 -0400
committerAnand Avati <avati@redhat.com>2012-07-13 09:46:09 -0700
commit32ffb79f18cbaebcbe6bba51599ca234f44675cc (patch)
tree3f7589078e618cf0f575e5ad03a67afecb50d006 /xlators
parentca4900497142127c31d0dba7a53a921200aaf790 (diff)
fuse/md-cache: add support for the 'fopen-keep-cache' mount option
fopen-keep-cache disables unconditional page-cache invalidations on file open in fuse (via FOPEN_KEEP_CACHE) and replaces that behavior with detection of remote changes and explicit invalidations from mount/fuse. This option improves local caching through the page cache and native client. This change defines a new 'invalidate' translator callback to identify when an inode's cache mapping has been determined to be invalid. md-cache implements the policy to detect and invoke inode invalidations. fuse-bridge and io-cache implement invalidate handlers to invalidate the respective caches (page cache in the case of fuse). BUG: 833564 Change-Id: I99818da5777eaf06276c1c0b194669f5bab92d48 Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-on: http://review.gluster.com/3584 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c113
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h1
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in5
-rw-r--r--xlators/performance/io-cache/src/io-cache.c15
-rw-r--r--xlators/performance/md-cache/src/md-cache.c32
5 files changed, 145 insertions, 21 deletions
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c
index 3b4c6c68c..21e14efb3 100644
--- a/xlators/mount/fuse/src/fuse-bridge.c
+++ b/xlators/mount/fuse/src/fuse-bridge.c
@@ -25,6 +25,34 @@ static int gf_fuse_xattr_enotsup_log;
void fini (xlator_t *this_xl);
+static void fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino);
+
+/*
+ * Send an invalidate notification up to fuse to purge the file from local
+ * page cache.
+ */
+static int32_t
+fuse_invalidate(xlator_t *this, inode_t *inode)
+{
+ fuse_private_t *priv = this->private;
+ uint64_t nodeid;
+
+ /*
+ * NOTE: We only invalidate at the moment if fopen_keep_cache is
+ * enabled because otherwise this is a departure from default
+ * behavior. Specifically, the performance/write-behind xlator
+ * causes unconditional invalidations on write requests.
+ */
+ if (!priv->fopen_keep_cache)
+ return 0;
+
+ nodeid = inode_to_fuse_nodeid(inode);
+ gf_log(this->name, GF_LOG_DEBUG, "Invalidate inode id %lu.", nodeid);
+ fuse_invalidate_inode(this, nodeid);
+
+ return 0;
+}
+
fuse_fd_ctx_t *
__fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd)
{
@@ -161,7 +189,7 @@ send_fuse_data (xlator_t *this, fuse_in_header_t *finh, void *data, size_t size)
static void
-fuse_invalidate (xlator_t *this, uint64_t fuse_ino)
+fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino)
{
struct fuse_out_header *fouh = NULL;
struct fuse_notify_inval_entry_out *fnieo = NULL;
@@ -207,6 +235,47 @@ fuse_invalidate (xlator_t *this, uint64_t fuse_ino)
}
}
+/*
+ * Send an inval inode notification to fuse. This causes an invalidation of the
+ * entire page cache mapping on the inode.
+ */
+static void
+fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino)
+{
+ struct fuse_out_header *fouh = NULL;
+ struct fuse_notify_inval_inode_out *fniio = NULL;
+ fuse_private_t *priv = NULL;
+ int rv = 0;
+ char inval_buf[INVAL_BUF_SIZE] = {0};
+
+ fouh = (struct fuse_out_header *) inval_buf;
+ fniio = (struct fuse_notify_inval_inode_out *) (fouh + 1);
+
+ priv = this->private;
+
+ if (priv->revchan_out < 0)
+ return;
+
+ fouh->unique = 0;
+ fouh->error = FUSE_NOTIFY_INVAL_INODE;
+ fouh->len = sizeof(struct fuse_out_header) +
+ sizeof(struct fuse_notify_inval_inode_out);
+
+ /* inval the entire mapping until we learn how to be more granular */
+ fniio->ino = fuse_ino;
+ fniio->off = 0;
+ fniio->len = -1;
+
+ rv = write(priv->revchan_out, inval_buf, fouh->len);
+ if (rv != fouh->len) {
+ gf_log("glusterfs-fuse", GF_LOG_ERROR, "kernel notification "
+ "daemon defunct");
+ close(priv->fd);
+ }
+
+ gf_log("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE inode: %lu", fuse_ino);
+}
+
int
send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error)
{
@@ -670,17 +739,27 @@ fuse_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
|| (priv->direct_io_mode == 1))
foo.open_flags |= FOPEN_DIRECT_IO;
#ifdef GF_DARWIN_HOST_OS
- /* In Linux: by default, buffer cache
- * is purged upon open, setting
- * FOPEN_KEEP_CACHE implies no-purge
- *
- * In MacFUSE: by default, buffer cache
- * is left intact upon open, setting
- * FOPEN_PURGE_UBC implies purge
- *
- * [[Interesting...]]
- */
- foo.open_flags |= FOPEN_PURGE_UBC;
+ /* In Linux: by default, buffer cache
+ * is purged upon open, setting
+ * FOPEN_KEEP_CACHE implies no-purge
+ *
+ * In MacFUSE: by default, buffer cache
+ * is left intact upon open, setting
+ * FOPEN_PURGE_UBC implies purge
+ *
+ * [[Interesting...]]
+ */
+ if (!priv->fopen_keep_cache)
+ foo.open_flags |= FOPEN_PURGE_UBC;
+#else
+ /*
+ * If fopen-keep-cache is enabled, we set the associated
+ * flag here such that files are not invalidated on open.
+ * File invalidations occur either in fuse or explicitly
+ * when the cache is set invalid on the inode.
+ */
+ if (priv->fopen_keep_cache)
+ foo.open_flags |= FOPEN_KEEP_CACHE;
#endif
}
@@ -2663,7 +2742,7 @@ fuse_setxattr (xlator_t *this, fuse_in_header_t *finh, void *msg)
gf_log ("fuse", GF_LOG_TRACE,
"got request to invalidate %"PRIu64, finh->nodeid);
send_fuse_err (this, finh, 0);
- fuse_invalidate (this, finh->nodeid);
+ fuse_invalidate_entry (this, finh->nodeid);
GF_FREE (finh);
return;
}
@@ -4523,6 +4602,9 @@ init (xlator_t *this_xl)
GF_ASSERT (ret == 0);
}
+ GF_OPTION_INIT("fopen-keep-cache", priv->fopen_keep_cache, bool,
+ cleanup_exit);
+
cmd_args = &this_xl->ctx->cmd_args;
fsname = cmd_args->volfile;
if (!fsname && cmd_args->volfile_server) {
@@ -4644,6 +4726,7 @@ struct xlator_fops fops = {
};
struct xlator_cbks cbks = {
+ .invalidate = fuse_invalidate,
};
@@ -4683,5 +4766,9 @@ struct volume_options options[] = {
{ .key = {"read-only"},
.type = GF_OPTION_TYPE_BOOL
},
+ { .key = {"fopen-keep-cache"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false"
+ },
{ .key = {NULL} },
};
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h
index c6c8438a9..dcd962924 100644
--- a/xlators/mount/fuse/src/fuse-bridge.h
+++ b/xlators/mount/fuse/src/fuse-bridge.h
@@ -109,6 +109,7 @@ struct fuse_private {
gf_boolean_t acl;
gf_boolean_t selinux;
gf_boolean_t read_only;
+ gf_boolean_t fopen_keep_cache;
fdtable_t *fdtable;
/* For fuse-reverse-validation */
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in
index ee956885c..b623d3428 100755
--- a/xlators/mount/fuse/utils/mount.glusterfs.in
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in
@@ -132,6 +132,10 @@ start_glusterfs ()
cmd_line=$(echo "$cmd_line --entry-timeout=$entry_timeout");
fi
+ if [ -n "$fopen_keep_cache" ]; then
+ cmd_line=$(echo "$cmd_line --fopen-keep-cache");
+ fi
+
# for rdma volume, we have to fetch volfile with '.rdma' added
# to volume name, so that it fetches the right client vol file
volume_id_rdma="";
@@ -297,6 +301,7 @@ main ()
"acl") acl=1 ;;
"selinux") selinux=1 ;;
"worm") worm=1 ;;
+ "fopen-keep-cache") fopen_keep_cache=1 ;;
# "mount -t glusterfs" sends this, but it's useless.
"rw") ;;
*)
diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c
index 85e876531..bdaf0f1b8 100644
--- a/xlators/performance/io-cache/src/io-cache.c
+++ b/xlators/performance/io-cache/src/io-cache.c
@@ -313,6 +313,18 @@ ioc_forget (xlator_t *this, inode_t *inode)
return 0;
}
+static int32_t
+ioc_invalidate(xlator_t *this, inode_t *inode)
+{
+ ioc_inode_t *ioc_inode = NULL;
+
+ inode_ctx_get(inode, this, (uint64_t *) &ioc_inode);
+
+ if (ioc_inode)
+ ioc_inode_flush(ioc_inode);
+
+ return 0;
+}
/*
* ioc_cache_validate_cbk -
@@ -1977,7 +1989,8 @@ struct xlator_dumpops dumpops = {
struct xlator_cbks cbks = {
.forget = ioc_forget,
- .release = ioc_release
+ .release = ioc_release,
+ .invalidate = ioc_invalidate,
};
struct volume_options options[] = {
diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c
index 9acffba2a..cf1aee9d6 100644
--- a/xlators/performance/md-cache/src/md-cache.c
+++ b/xlators/performance/md-cache/src/md-cache.c
@@ -378,7 +378,8 @@ mdc_to_iatt (struct md_cache *mdc, struct iatt *iatt)
int
-mdc_inode_iatt_set (xlator_t *this, inode_t *inode, struct iatt *iatt)
+mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf,
+ struct iatt *iatt)
{
int ret = -1;
struct md_cache *mdc = NULL;
@@ -394,6 +395,19 @@ mdc_inode_iatt_set (xlator_t *this, inode_t *inode, struct iatt *iatt)
goto unlock;
}
+ /*
+ * Invalidate the inode if the mtime or ctime has changed
+ * and the prebuf doesn't match the value we have cached.
+ * TODO: writev returns with a NULL iatt due to
+ * performance/write-behind, causing invalidation on writes.
+ */
+ if (IA_ISREG(inode->ia_type) &&
+ ((iatt->ia_mtime != mdc->md_mtime) ||
+ (iatt->ia_ctime != mdc->md_ctime)))
+ if (!prebuf || (prebuf->ia_ctime != mdc->md_ctime) ||
+ (prebuf->ia_mtime != mdc->md_mtime))
+ inode_invalidate(inode);
+
mdc_from_iatt (mdc, iatt);
time (&mdc->ia_time);
@@ -405,6 +419,10 @@ out:
return ret;
}
+int mdc_inode_iatt_set(xlator_t *this, inode_t *inode, struct iatt *iatt)
+{
+ return mdc_inode_iatt_set_validate(this, inode, NULL, iatt);
+}
int
mdc_inode_iatt_get (xlator_t *this, inode_t *inode, struct iatt *iatt)
@@ -859,7 +877,7 @@ mdc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!local)
goto out;
- mdc_inode_iatt_set (this, local->loc.inode, postbuf);
+ mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf);
out:
MDC_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, postbuf,
@@ -901,7 +919,7 @@ mdc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!local)
goto out;
- mdc_inode_iatt_set (this, local->fd->inode, postbuf);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
out:
MDC_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
@@ -1377,7 +1395,7 @@ mdc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!local)
goto out;
- mdc_inode_iatt_set (this, local->fd->inode, postbuf);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
out:
MDC_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
@@ -1422,7 +1440,7 @@ mdc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!local)
goto out;
- mdc_inode_iatt_set (this, local->loc.inode, postbuf);
+ mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf);
out:
MDC_STACK_UNWIND (setattr, frame, op_ret, op_errno, prebuf, postbuf,
@@ -1464,7 +1482,7 @@ mdc_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!local)
goto out;
- mdc_inode_iatt_set (this, local->fd->inode, postbuf);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
out:
MDC_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, prebuf, postbuf,
@@ -1506,7 +1524,7 @@ mdc_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!local)
goto out;
- mdc_inode_iatt_set (this, local->fd->inode, postbuf);
+ mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf);
out:
MDC_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,