diff options
author | Brian Foster <bfoster@redhat.com> | 2012-06-13 12:08:38 -0400 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2012-07-13 09:46:09 -0700 |
commit | 32ffb79f18cbaebcbe6bba51599ca234f44675cc (patch) | |
tree | 3f7589078e618cf0f575e5ad03a67afecb50d006 /xlators | |
parent | ca4900497142127c31d0dba7a53a921200aaf790 (diff) |
fuse/md-cache: add support for the 'fopen-keep-cache' mount option
fopen-keep-cache disables unconditional page-cache invalidations
on file open in fuse (via FOPEN_KEEP_CACHE) and replaces that
behavior with detection of remote changes and explicit
invalidations from mount/fuse. This option improves local caching
through the page cache and native client.
This change defines a new 'invalidate' translator callback to
identify when an inode's cache mapping has been determined to be
invalid. md-cache implements the policy to detect and invoke
inode invalidations. fuse-bridge and io-cache implement
invalidate handlers to invalidate the respective caches (page
cache in the case of fuse).
BUG: 833564
Change-Id: I99818da5777eaf06276c1c0b194669f5bab92d48
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-on: http://review.gluster.com/3584
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.c | 113 | ||||
-rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.h | 1 | ||||
-rwxr-xr-x | xlators/mount/fuse/utils/mount.glusterfs.in | 5 | ||||
-rw-r--r-- | xlators/performance/io-cache/src/io-cache.c | 15 | ||||
-rw-r--r-- | xlators/performance/md-cache/src/md-cache.c | 32 |
5 files changed, 145 insertions, 21 deletions
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 3b4c6c68c..21e14efb3 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -25,6 +25,34 @@ static int gf_fuse_xattr_enotsup_log; void fini (xlator_t *this_xl); +static void fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino); + +/* + * Send an invalidate notification up to fuse to purge the file from local + * page cache. + */ +static int32_t +fuse_invalidate(xlator_t *this, inode_t *inode) +{ + fuse_private_t *priv = this->private; + uint64_t nodeid; + + /* + * NOTE: We only invalidate at the moment if fopen_keep_cache is + * enabled because otherwise this is a departure from default + * behavior. Specifically, the performance/write-behind xlator + * causes unconditional invalidations on write requests. + */ + if (!priv->fopen_keep_cache) + return 0; + + nodeid = inode_to_fuse_nodeid(inode); + gf_log(this->name, GF_LOG_DEBUG, "Invalidate inode id %lu.", nodeid); + fuse_invalidate_inode(this, nodeid); + + return 0; +} + fuse_fd_ctx_t * __fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd) { @@ -161,7 +189,7 @@ send_fuse_data (xlator_t *this, fuse_in_header_t *finh, void *data, size_t size) static void -fuse_invalidate (xlator_t *this, uint64_t fuse_ino) +fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino) { struct fuse_out_header *fouh = NULL; struct fuse_notify_inval_entry_out *fnieo = NULL; @@ -207,6 +235,47 @@ fuse_invalidate (xlator_t *this, uint64_t fuse_ino) } } +/* + * Send an inval inode notification to fuse. This causes an invalidation of the + * entire page cache mapping on the inode. + */ +static void +fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) +{ + struct fuse_out_header *fouh = NULL; + struct fuse_notify_inval_inode_out *fniio = NULL; + fuse_private_t *priv = NULL; + int rv = 0; + char inval_buf[INVAL_BUF_SIZE] = {0}; + + fouh = (struct fuse_out_header *) inval_buf; + fniio = (struct fuse_notify_inval_inode_out *) (fouh + 1); + + priv = this->private; + + if (priv->revchan_out < 0) + return; + + fouh->unique = 0; + fouh->error = FUSE_NOTIFY_INVAL_INODE; + fouh->len = sizeof(struct fuse_out_header) + + sizeof(struct fuse_notify_inval_inode_out); + + /* inval the entire mapping until we learn how to be more granular */ + fniio->ino = fuse_ino; + fniio->off = 0; + fniio->len = -1; + + rv = write(priv->revchan_out, inval_buf, fouh->len); + if (rv != fouh->len) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, "kernel notification " + "daemon defunct"); + close(priv->fd); + } + + gf_log("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE inode: %lu", fuse_ino); +} + int send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error) { @@ -670,17 +739,27 @@ fuse_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, || (priv->direct_io_mode == 1)) foo.open_flags |= FOPEN_DIRECT_IO; #ifdef GF_DARWIN_HOST_OS - /* In Linux: by default, buffer cache - * is purged upon open, setting - * FOPEN_KEEP_CACHE implies no-purge - * - * In MacFUSE: by default, buffer cache - * is left intact upon open, setting - * FOPEN_PURGE_UBC implies purge - * - * [[Interesting...]] - */ - foo.open_flags |= FOPEN_PURGE_UBC; + /* In Linux: by default, buffer cache + * is purged upon open, setting + * FOPEN_KEEP_CACHE implies no-purge + * + * In MacFUSE: by default, buffer cache + * is left intact upon open, setting + * FOPEN_PURGE_UBC implies purge + * + * [[Interesting...]] + */ + if (!priv->fopen_keep_cache) + foo.open_flags |= FOPEN_PURGE_UBC; +#else + /* + * If fopen-keep-cache is enabled, we set the associated + * flag here such that files are not invalidated on open. + * File invalidations occur either in fuse or explicitly + * when the cache is set invalid on the inode. + */ + if (priv->fopen_keep_cache) + foo.open_flags |= FOPEN_KEEP_CACHE; #endif } @@ -2663,7 +2742,7 @@ fuse_setxattr (xlator_t *this, fuse_in_header_t *finh, void *msg) gf_log ("fuse", GF_LOG_TRACE, "got request to invalidate %"PRIu64, finh->nodeid); send_fuse_err (this, finh, 0); - fuse_invalidate (this, finh->nodeid); + fuse_invalidate_entry (this, finh->nodeid); GF_FREE (finh); return; } @@ -4523,6 +4602,9 @@ init (xlator_t *this_xl) GF_ASSERT (ret == 0); } + GF_OPTION_INIT("fopen-keep-cache", priv->fopen_keep_cache, bool, + cleanup_exit); + cmd_args = &this_xl->ctx->cmd_args; fsname = cmd_args->volfile; if (!fsname && cmd_args->volfile_server) { @@ -4644,6 +4726,7 @@ struct xlator_fops fops = { }; struct xlator_cbks cbks = { + .invalidate = fuse_invalidate, }; @@ -4683,5 +4766,9 @@ struct volume_options options[] = { { .key = {"read-only"}, .type = GF_OPTION_TYPE_BOOL }, + { .key = {"fopen-keep-cache"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false" + }, { .key = {NULL} }, }; diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h index c6c8438a9..dcd962924 100644 --- a/xlators/mount/fuse/src/fuse-bridge.h +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -109,6 +109,7 @@ struct fuse_private { gf_boolean_t acl; gf_boolean_t selinux; gf_boolean_t read_only; + gf_boolean_t fopen_keep_cache; fdtable_t *fdtable; /* For fuse-reverse-validation */ diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in index ee956885c..b623d3428 100755 --- a/xlators/mount/fuse/utils/mount.glusterfs.in +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -132,6 +132,10 @@ start_glusterfs () cmd_line=$(echo "$cmd_line --entry-timeout=$entry_timeout"); fi + if [ -n "$fopen_keep_cache" ]; then + cmd_line=$(echo "$cmd_line --fopen-keep-cache"); + fi + # for rdma volume, we have to fetch volfile with '.rdma' added # to volume name, so that it fetches the right client vol file volume_id_rdma=""; @@ -297,6 +301,7 @@ main () "acl") acl=1 ;; "selinux") selinux=1 ;; "worm") worm=1 ;; + "fopen-keep-cache") fopen_keep_cache=1 ;; # "mount -t glusterfs" sends this, but it's useless. "rw") ;; *) diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c index 85e876531..bdaf0f1b8 100644 --- a/xlators/performance/io-cache/src/io-cache.c +++ b/xlators/performance/io-cache/src/io-cache.c @@ -313,6 +313,18 @@ ioc_forget (xlator_t *this, inode_t *inode) return 0; } +static int32_t +ioc_invalidate(xlator_t *this, inode_t *inode) +{ + ioc_inode_t *ioc_inode = NULL; + + inode_ctx_get(inode, this, (uint64_t *) &ioc_inode); + + if (ioc_inode) + ioc_inode_flush(ioc_inode); + + return 0; +} /* * ioc_cache_validate_cbk - @@ -1977,7 +1989,8 @@ struct xlator_dumpops dumpops = { struct xlator_cbks cbks = { .forget = ioc_forget, - .release = ioc_release + .release = ioc_release, + .invalidate = ioc_invalidate, }; struct volume_options options[] = { diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c index 9acffba2a..cf1aee9d6 100644 --- a/xlators/performance/md-cache/src/md-cache.c +++ b/xlators/performance/md-cache/src/md-cache.c @@ -378,7 +378,8 @@ mdc_to_iatt (struct md_cache *mdc, struct iatt *iatt) int -mdc_inode_iatt_set (xlator_t *this, inode_t *inode, struct iatt *iatt) +mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf, + struct iatt *iatt) { int ret = -1; struct md_cache *mdc = NULL; @@ -394,6 +395,19 @@ mdc_inode_iatt_set (xlator_t *this, inode_t *inode, struct iatt *iatt) goto unlock; } + /* + * Invalidate the inode if the mtime or ctime has changed + * and the prebuf doesn't match the value we have cached. + * TODO: writev returns with a NULL iatt due to + * performance/write-behind, causing invalidation on writes. + */ + if (IA_ISREG(inode->ia_type) && + ((iatt->ia_mtime != mdc->md_mtime) || + (iatt->ia_ctime != mdc->md_ctime))) + if (!prebuf || (prebuf->ia_ctime != mdc->md_ctime) || + (prebuf->ia_mtime != mdc->md_mtime)) + inode_invalidate(inode); + mdc_from_iatt (mdc, iatt); time (&mdc->ia_time); @@ -405,6 +419,10 @@ out: return ret; } +int mdc_inode_iatt_set(xlator_t *this, inode_t *inode, struct iatt *iatt) +{ + return mdc_inode_iatt_set_validate(this, inode, NULL, iatt); +} int mdc_inode_iatt_get (xlator_t *this, inode_t *inode, struct iatt *iatt) @@ -859,7 +877,7 @@ mdc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!local) goto out; - mdc_inode_iatt_set (this, local->loc.inode, postbuf); + mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf); out: MDC_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, postbuf, @@ -901,7 +919,7 @@ mdc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!local) goto out; - mdc_inode_iatt_set (this, local->fd->inode, postbuf); + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); out: MDC_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, postbuf, @@ -1377,7 +1395,7 @@ mdc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!local) goto out; - mdc_inode_iatt_set (this, local->fd->inode, postbuf); + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); out: MDC_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf, @@ -1422,7 +1440,7 @@ mdc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!local) goto out; - mdc_inode_iatt_set (this, local->loc.inode, postbuf); + mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf); out: MDC_STACK_UNWIND (setattr, frame, op_ret, op_errno, prebuf, postbuf, @@ -1464,7 +1482,7 @@ mdc_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!local) goto out; - mdc_inode_iatt_set (this, local->fd->inode, postbuf); + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); out: MDC_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, prebuf, postbuf, @@ -1506,7 +1524,7 @@ mdc_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!local) goto out; - mdc_inode_iatt_set (this, local->fd->inode, postbuf); + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); out: MDC_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf, |