diff options
author | Brian Foster <bfoster@redhat.com> | 2012-06-13 12:08:38 -0400 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2012-07-13 09:46:09 -0700 |
commit | 32ffb79f18cbaebcbe6bba51599ca234f44675cc (patch) | |
tree | 3f7589078e618cf0f575e5ad03a67afecb50d006 | |
parent | ca4900497142127c31d0dba7a53a921200aaf790 (diff) |
fuse/md-cache: add support for the 'fopen-keep-cache' mount option
fopen-keep-cache disables unconditional page-cache invalidations
on file open in fuse (via FOPEN_KEEP_CACHE) and replaces that
behavior with detection of remote changes and explicit
invalidations from mount/fuse. This option improves local caching
through the page cache and native client.
This change defines a new 'invalidate' translator callback to
identify when an inode's cache mapping has been determined to be
invalid. md-cache implements the policy to detect and invoke
inode invalidations. fuse-bridge and io-cache implement
invalidate handlers to invalidate the respective caches (page
cache in the case of fuse).
BUG: 833564
Change-Id: I99818da5777eaf06276c1c0b194669f5bab92d48
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-on: http://review.gluster.com/3584
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
-rw-r--r-- | glusterfsd/src/glusterfsd.c | 19 | ||||
-rw-r--r-- | glusterfsd/src/glusterfsd.h | 1 | ||||
-rw-r--r-- | libglusterfs/src/glusterfs.h | 1 | ||||
-rw-r--r-- | libglusterfs/src/inode.c | 48 | ||||
-rw-r--r-- | libglusterfs/src/inode.h | 3 | ||||
-rw-r--r-- | libglusterfs/src/xlator.h | 3 | ||||
-rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.c | 113 | ||||
-rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.h | 1 | ||||
-rwxr-xr-x | xlators/mount/fuse/utils/mount.glusterfs.in | 5 | ||||
-rw-r--r-- | xlators/performance/io-cache/src/io-cache.c | 15 | ||||
-rw-r--r-- | xlators/performance/md-cache/src/md-cache.c | 32 |
11 files changed, 219 insertions, 22 deletions
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c index 0ab8fcd4c6d..76f01948193 100644 --- a/glusterfsd/src/glusterfsd.c +++ b/glusterfsd/src/glusterfsd.c @@ -168,6 +168,8 @@ static struct argp_option gf_options[] = { "Brick name to be registered with Gluster portmapper" }, {"brick-port", ARGP_BRICK_PORT_KEY, "BRICK-PORT", OPTION_HIDDEN, "Brick Port to be registered with Gluster portmapper" }, + {"fopen-keep-cache", ARGP_FOPEN_KEEP_CACHE_KEY, 0, 0, + "Do not purge the cache on file open"}, {0, 0, 0, 0, "Fuse options:"}, {"direct-io-mode", ARGP_DIRECT_IO_MODE_KEY, "BOOL", OPTION_ARG_OPTIONAL, @@ -368,6 +370,17 @@ create_fuse_mount (glusterfs_ctx_t *ctx) } } + if (cmd_args->fopen_keep_cache) { + ret = dict_set_static_ptr(master->options, "fopen-keep-cache", + "on"); + if (ret < 0) { + gf_log("glusterfsd", GF_LOG_ERROR, + "failed to set dict value for key " + "fopen-keep-cache"); + goto err; + } + } + switch (cmd_args->fuse_direct_io_mode) { case GF_OPTION_DISABLE: /* disable */ ret = dict_set_static_ptr (master->options, ZR_DIRECT_IO_OPT, @@ -814,7 +827,11 @@ parse_opts (int key, char *arg, struct argp_state *state) ctx = glusterfs_ctx_get (); ctx->mem_accounting = 1; break; - } + + case ARGP_FOPEN_KEEP_CACHE_KEY: + cmd_args->fopen_keep_cache = 1; + break; + } return 0; } diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h index 8ec121954bb..382a8cc71fc 100644 --- a/glusterfsd/src/glusterfsd.h +++ b/glusterfsd/src/glusterfsd.h @@ -87,6 +87,7 @@ enum argp_option_keys { ARGP_USER_MAP_ROOT_KEY = 156, ARGP_MEM_ACCOUNTING_KEY = 157, ARGP_SELINUX_KEY = 158, + ARGP_FOPEN_KEEP_CACHE_KEY = 159, }; struct _gfd_vol_top_priv_t { diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h index 780353d29a8..357284e27b4 100644 --- a/libglusterfs/src/glusterfs.h +++ b/libglusterfs/src/glusterfs.h @@ -285,6 +285,7 @@ struct _cmd_args { int selinux; int worm; int mac_compat; + int fopen_keep_cache; struct list_head xlator_options; /* list of xlator_option_t */ /* fuse options */ diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c index e32eddb5d1c..a0088c03c55 100644 --- a/libglusterfs/src/inode.c +++ b/libglusterfs/src/inode.c @@ -950,6 +950,54 @@ inode_forget (inode_t *inode, uint64_t nlookup) return 0; } +/* + * Invalidate an inode. This is invoked when a translator decides that an inode's + * cache is no longer valid. Any translator interested in taking action in this + * situation can define the invalidate callback. + */ +int +inode_invalidate(inode_t *inode) +{ + int ret = 0; + xlator_t *xl = NULL; + xlator_t *old_THIS = NULL; + + if (!inode) { + gf_log_callingfn(THIS->name, GF_LOG_WARNING, "inode not found"); + return -1; + } + + /* + * The master xlator is not in the graph but it can define an invalidate + * handler. + */ + xl = inode->table->xl->ctx->master; + if (xl && xl->cbks->invalidate) { + old_THIS = THIS; + THIS = xl; + ret = xl->cbks->invalidate(xl, inode); + THIS = old_THIS; + if (ret) + return ret; + } + + xl = inode->table->xl->graph->first; + while (xl) { + old_THIS = THIS; + THIS = xl; + if (xl->cbks->invalidate) + ret = xl->cbks->invalidate(xl, inode); + THIS = old_THIS; + + if (ret) + break; + + xl = xl->next; + } + + return ret; +} + static void __inode_unlink (inode_t *inode, inode_t *parent, const char *name) diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h index 41003df71ca..20e28f6820d 100644 --- a/libglusterfs/src/inode.h +++ b/libglusterfs/src/inode.h @@ -131,6 +131,9 @@ int inode_forget (inode_t *inode, uint64_t nlookup); int +inode_invalidate(inode_t *inode); + +int inode_rename (inode_table_t *table, inode_t *olddir, const char *oldname, inode_t *newdir, const char *newname, inode_t *inode, struct iatt *stbuf); diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h index 2fce7dc474a..5162d20e500 100644 --- a/libglusterfs/src/xlator.h +++ b/libglusterfs/src/xlator.h @@ -730,10 +730,13 @@ typedef int32_t (*cbk_forget_t) (xlator_t *this, typedef int32_t (*cbk_release_t) (xlator_t *this, fd_t *fd); +typedef int32_t (*cbk_invalidate_t)(xlator_t *this, inode_t *inode); + struct xlator_cbks { cbk_forget_t forget; cbk_release_t release; cbk_release_t releasedir; + cbk_invalidate_t invalidate; }; typedef int32_t (*dumpop_priv_t) (xlator_t *this); diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 3b4c6c68c97..21e14efb384 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -25,6 +25,34 @@ static int gf_fuse_xattr_enotsup_log; void fini (xlator_t *this_xl); +static void fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino); + +/* + * Send an invalidate notification up to fuse to purge the file from local + * page cache. + */ +static int32_t +fuse_invalidate(xlator_t *this, inode_t *inode) +{ + fuse_private_t *priv = this->private; + uint64_t nodeid; + + /* + * NOTE: We only invalidate at the moment if fopen_keep_cache is + * enabled because otherwise this is a departure from default + * behavior. Specifically, the performance/write-behind xlator + * causes unconditional invalidations on write requests. + */ + if (!priv->fopen_keep_cache) + return 0; + + nodeid = inode_to_fuse_nodeid(inode); + gf_log(this->name, GF_LOG_DEBUG, "Invalidate inode id %lu.", nodeid); + fuse_invalidate_inode(this, nodeid); + + return 0; +} + fuse_fd_ctx_t * __fuse_fd_ctx_check_n_create (xlator_t *this, fd_t *fd) { @@ -161,7 +189,7 @@ send_fuse_data (xlator_t *this, fuse_in_header_t *finh, void *data, size_t size) static void -fuse_invalidate (xlator_t *this, uint64_t fuse_ino) +fuse_invalidate_entry (xlator_t *this, uint64_t fuse_ino) { struct fuse_out_header *fouh = NULL; struct fuse_notify_inval_entry_out *fnieo = NULL; @@ -207,6 +235,47 @@ fuse_invalidate (xlator_t *this, uint64_t fuse_ino) } } +/* + * Send an inval inode notification to fuse. This causes an invalidation of the + * entire page cache mapping on the inode. + */ +static void +fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) +{ + struct fuse_out_header *fouh = NULL; + struct fuse_notify_inval_inode_out *fniio = NULL; + fuse_private_t *priv = NULL; + int rv = 0; + char inval_buf[INVAL_BUF_SIZE] = {0}; + + fouh = (struct fuse_out_header *) inval_buf; + fniio = (struct fuse_notify_inval_inode_out *) (fouh + 1); + + priv = this->private; + + if (priv->revchan_out < 0) + return; + + fouh->unique = 0; + fouh->error = FUSE_NOTIFY_INVAL_INODE; + fouh->len = sizeof(struct fuse_out_header) + + sizeof(struct fuse_notify_inval_inode_out); + + /* inval the entire mapping until we learn how to be more granular */ + fniio->ino = fuse_ino; + fniio->off = 0; + fniio->len = -1; + + rv = write(priv->revchan_out, inval_buf, fouh->len); + if (rv != fouh->len) { + gf_log("glusterfs-fuse", GF_LOG_ERROR, "kernel notification " + "daemon defunct"); + close(priv->fd); + } + + gf_log("glusterfs-fuse", GF_LOG_TRACE, "INVALIDATE inode: %lu", fuse_ino); +} + int send_fuse_err (xlator_t *this, fuse_in_header_t *finh, int error) { @@ -670,17 +739,27 @@ fuse_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, || (priv->direct_io_mode == 1)) foo.open_flags |= FOPEN_DIRECT_IO; #ifdef GF_DARWIN_HOST_OS - /* In Linux: by default, buffer cache - * is purged upon open, setting - * FOPEN_KEEP_CACHE implies no-purge - * - * In MacFUSE: by default, buffer cache - * is left intact upon open, setting - * FOPEN_PURGE_UBC implies purge - * - * [[Interesting...]] - */ - foo.open_flags |= FOPEN_PURGE_UBC; + /* In Linux: by default, buffer cache + * is purged upon open, setting + * FOPEN_KEEP_CACHE implies no-purge + * + * In MacFUSE: by default, buffer cache + * is left intact upon open, setting + * FOPEN_PURGE_UBC implies purge + * + * [[Interesting...]] + */ + if (!priv->fopen_keep_cache) + foo.open_flags |= FOPEN_PURGE_UBC; +#else + /* + * If fopen-keep-cache is enabled, we set the associated + * flag here such that files are not invalidated on open. + * File invalidations occur either in fuse or explicitly + * when the cache is set invalid on the inode. + */ + if (priv->fopen_keep_cache) + foo.open_flags |= FOPEN_KEEP_CACHE; #endif } @@ -2663,7 +2742,7 @@ fuse_setxattr (xlator_t *this, fuse_in_header_t *finh, void *msg) gf_log ("fuse", GF_LOG_TRACE, "got request to invalidate %"PRIu64, finh->nodeid); send_fuse_err (this, finh, 0); - fuse_invalidate (this, finh->nodeid); + fuse_invalidate_entry (this, finh->nodeid); GF_FREE (finh); return; } @@ -4523,6 +4602,9 @@ init (xlator_t *this_xl) GF_ASSERT (ret == 0); } + GF_OPTION_INIT("fopen-keep-cache", priv->fopen_keep_cache, bool, + cleanup_exit); + cmd_args = &this_xl->ctx->cmd_args; fsname = cmd_args->volfile; if (!fsname && cmd_args->volfile_server) { @@ -4644,6 +4726,7 @@ struct xlator_fops fops = { }; struct xlator_cbks cbks = { + .invalidate = fuse_invalidate, }; @@ -4683,5 +4766,9 @@ struct volume_options options[] = { { .key = {"read-only"}, .type = GF_OPTION_TYPE_BOOL }, + { .key = {"fopen-keep-cache"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false" + }, { .key = {NULL} }, }; diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h index c6c8438a9ed..dcd9629246c 100644 --- a/xlators/mount/fuse/src/fuse-bridge.h +++ b/xlators/mount/fuse/src/fuse-bridge.h @@ -109,6 +109,7 @@ struct fuse_private { gf_boolean_t acl; gf_boolean_t selinux; gf_boolean_t read_only; + gf_boolean_t fopen_keep_cache; fdtable_t *fdtable; /* For fuse-reverse-validation */ diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in index ee956885c4c..b623d3428b7 100755 --- a/xlators/mount/fuse/utils/mount.glusterfs.in +++ b/xlators/mount/fuse/utils/mount.glusterfs.in @@ -132,6 +132,10 @@ start_glusterfs () cmd_line=$(echo "$cmd_line --entry-timeout=$entry_timeout"); fi + if [ -n "$fopen_keep_cache" ]; then + cmd_line=$(echo "$cmd_line --fopen-keep-cache"); + fi + # for rdma volume, we have to fetch volfile with '.rdma' added # to volume name, so that it fetches the right client vol file volume_id_rdma=""; @@ -297,6 +301,7 @@ main () "acl") acl=1 ;; "selinux") selinux=1 ;; "worm") worm=1 ;; + "fopen-keep-cache") fopen_keep_cache=1 ;; # "mount -t glusterfs" sends this, but it's useless. "rw") ;; *) diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c index 85e876531a5..bdaf0f1b81a 100644 --- a/xlators/performance/io-cache/src/io-cache.c +++ b/xlators/performance/io-cache/src/io-cache.c @@ -313,6 +313,18 @@ ioc_forget (xlator_t *this, inode_t *inode) return 0; } +static int32_t +ioc_invalidate(xlator_t *this, inode_t *inode) +{ + ioc_inode_t *ioc_inode = NULL; + + inode_ctx_get(inode, this, (uint64_t *) &ioc_inode); + + if (ioc_inode) + ioc_inode_flush(ioc_inode); + + return 0; +} /* * ioc_cache_validate_cbk - @@ -1977,7 +1989,8 @@ struct xlator_dumpops dumpops = { struct xlator_cbks cbks = { .forget = ioc_forget, - .release = ioc_release + .release = ioc_release, + .invalidate = ioc_invalidate, }; struct volume_options options[] = { diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c index 9acffba2a4e..cf1aee9d6b9 100644 --- a/xlators/performance/md-cache/src/md-cache.c +++ b/xlators/performance/md-cache/src/md-cache.c @@ -378,7 +378,8 @@ mdc_to_iatt (struct md_cache *mdc, struct iatt *iatt) int -mdc_inode_iatt_set (xlator_t *this, inode_t *inode, struct iatt *iatt) +mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf, + struct iatt *iatt) { int ret = -1; struct md_cache *mdc = NULL; @@ -394,6 +395,19 @@ mdc_inode_iatt_set (xlator_t *this, inode_t *inode, struct iatt *iatt) goto unlock; } + /* + * Invalidate the inode if the mtime or ctime has changed + * and the prebuf doesn't match the value we have cached. + * TODO: writev returns with a NULL iatt due to + * performance/write-behind, causing invalidation on writes. + */ + if (IA_ISREG(inode->ia_type) && + ((iatt->ia_mtime != mdc->md_mtime) || + (iatt->ia_ctime != mdc->md_ctime))) + if (!prebuf || (prebuf->ia_ctime != mdc->md_ctime) || + (prebuf->ia_mtime != mdc->md_mtime)) + inode_invalidate(inode); + mdc_from_iatt (mdc, iatt); time (&mdc->ia_time); @@ -405,6 +419,10 @@ out: return ret; } +int mdc_inode_iatt_set(xlator_t *this, inode_t *inode, struct iatt *iatt) +{ + return mdc_inode_iatt_set_validate(this, inode, NULL, iatt); +} int mdc_inode_iatt_get (xlator_t *this, inode_t *inode, struct iatt *iatt) @@ -859,7 +877,7 @@ mdc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!local) goto out; - mdc_inode_iatt_set (this, local->loc.inode, postbuf); + mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf); out: MDC_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, postbuf, @@ -901,7 +919,7 @@ mdc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!local) goto out; - mdc_inode_iatt_set (this, local->fd->inode, postbuf); + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); out: MDC_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, postbuf, @@ -1377,7 +1395,7 @@ mdc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!local) goto out; - mdc_inode_iatt_set (this, local->fd->inode, postbuf); + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); out: MDC_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf, @@ -1422,7 +1440,7 @@ mdc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!local) goto out; - mdc_inode_iatt_set (this, local->loc.inode, postbuf); + mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf); out: MDC_STACK_UNWIND (setattr, frame, op_ret, op_errno, prebuf, postbuf, @@ -1464,7 +1482,7 @@ mdc_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!local) goto out; - mdc_inode_iatt_set (this, local->fd->inode, postbuf); + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); out: MDC_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, prebuf, postbuf, @@ -1506,7 +1524,7 @@ mdc_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!local) goto out; - mdc_inode_iatt_set (this, local->fd->inode, postbuf); + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); out: MDC_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf, |