diff options
author | Krutika Dhananjay <kdhananj@redhat.com> | 2017-01-17 16:40:04 +0530 |
---|---|---|
committer | Raghavendra G <rgowdapp@redhat.com> | 2018-08-18 07:28:53 +0000 |
commit | c9bde3021202f1d5c5a2d19ac05a510fc1f788ac (patch) | |
tree | eba54e5f791b33d079afae9655d5438d007c3091 | |
parent | 045d70a5450daa85aa5564b6e9f93065c342ab12 (diff) |
performance/readdir-ahead: keep stats of cached dentries in sync with modifications
PROBLEM:
Stats of dentries that are readdirp'd ahead can become stale due to
fops like writes, truncate etc that modify the file pointed by
dentries. When a readdir is finally wound at offset corresponding to
these entries, the iatts that are returned to the application come
from readdir-ahead's cache, which are stale by now. This problem gets
further aggravated when caching translators/modules cache and continue
to serve this stale information.
FIX:
* Store the iatt in context of the inode pointed by dentry.
* Whenever the inode pointed by dentry undergoes modification, in cbk
of modification fop, update the iatt stored in inode-ctx to reflect
the modification.
* When serving a readdirp response from application, update iatts of
dentries with the iatts stored in the context of inodes pointed by
these dentries.
* Some fops don't have valid iatts in their responses. For eg., write
response whose data is still cached in write-behind will have zeroed
out stat. In this case keep only ia_type and ia_gfid and reset rest
of the iatt members to zero.
- fuse-bridge in this case just sends "entry" information back to
kernel and attr is not sent.
- gfapi sets entry->inode to NULL and zeroes out the entire stat
* There is one tiny race between the entry creation and a readdirp on
its parent dir, which could cause the inode-ctx setting and inode
ctx reading to happen on two different inode objects. To prevent
this, when entry->inode doesn't eqaul to linked_inode,
- fuse-bridge is made to send only "entry" information without
attributes
- gfapi sets entry->inode to NULL and zeroes out the entire stat.
Change-Id: Ia27ff49a61922e88c73a1547ad8aacc9968a69df
BUG: 1390050
Updates: bz#1390050
Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com>
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
-rw-r--r-- | libglusterfs/src/inode.h | 1 | ||||
-rw-r--r-- | tests/basic/afr/split-brain-healing.t | 1 | ||||
-rw-r--r-- | tests/bugs/readdir-ahead/bug-1390050.c | 70 | ||||
-rw-r--r-- | tests/bugs/readdir-ahead/bug-1390050.t | 29 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 4 | ||||
-rw-r--r-- | xlators/mount/fuse/src/fuse-bridge.c | 17 | ||||
-rw-r--r-- | xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h | 1 | ||||
-rw-r--r-- | xlators/performance/readdir-ahead/src/readdir-ahead.c | 573 | ||||
-rw-r--r-- | xlators/performance/readdir-ahead/src/readdir-ahead.h | 52 |
9 files changed, 721 insertions, 27 deletions
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h index ffee2a5516a..96f67c05629 100644 --- a/libglusterfs/src/inode.h +++ b/libglusterfs/src/inode.h @@ -260,6 +260,7 @@ inode_ctx_put(inode_t *inode, xlator_t *this, uint64_t v) #define inode_ctx_get(i,x,v) inode_ctx_get0(i,x,v) #define inode_ctx_del(i,x,v) inode_ctx_del2(i,x,v,0) +#define inode_ctx_del1(i, x, v) inode_ctx_del2(i, x, 0, v) gf_boolean_t __is_root_gfid (uuid_t gfid); diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t index 773a8b33b12..c80f900b909 100644 --- a/tests/basic/afr/split-brain-healing.t +++ b/tests/basic/afr/split-brain-healing.t @@ -76,7 +76,6 @@ do done BIGGER_FILE_SIZE=$(stat -c %s file1) - TEST $CLI volume start $V0 force EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1 EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3 diff --git a/tests/bugs/readdir-ahead/bug-1390050.c b/tests/bugs/readdir-ahead/bug-1390050.c new file mode 100644 index 00000000000..5593a1d4c0c --- /dev/null +++ b/tests/bugs/readdir-ahead/bug-1390050.c @@ -0,0 +1,70 @@ +#include <stdio.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> +#include <dirent.h> +#include <string.h> +#include <errno.h> + +int +main (int argc, char *argv[]) +{ + const char *glfs_dir = NULL, *filepath = NULL; + DIR *dirfd = NULL; + int filefd = 0, ret = 0; + struct stat stbuf = {0, }; + size_t size_before_write = 0; + + glfs_dir = argv[1]; + filepath = argv[2]; + dirfd = opendir (glfs_dir); + if (dirfd == NULL) { + fprintf (stderr, "opening directory failed (%s)\n", + strerror (errno)); + goto err; + } + + filefd = open (filepath, O_RDWR); + if (filefd < 0) { + fprintf (stderr, "open failed on path %s (%s)\n", filepath, + strerror (errno)); + goto err; + } + + ret = stat (filepath, &stbuf); + if (ret < 0) { + fprintf (stderr, "stat failed on path %s (%s)\n", filepath, + strerror (errno)); + goto err; + } + + size_before_write = stbuf.st_size; + + ret = write (filefd, "testdata", strlen ("testdata123") + 1); + if (ret <= 0) { + fprintf (stderr, "write failed (%s)\n", strerror (errno)); + goto err; + } + + while (readdir (dirfd)) { + /* do nothing */ + } + + ret = stat (filepath, &stbuf); + if (ret < 0) { + fprintf (stderr, "stat failed on path %s (%s)\n", + strerror (errno)); + goto err; + } + + if (stbuf.st_size == size_before_write) { + fprintf (stderr, "file size (%lu) has not changed even after " + "its written to\n", stbuf.st_size); + goto err; + } + + return 0; +err: + return -1; +} diff --git a/tests/bugs/readdir-ahead/bug-1390050.t b/tests/bugs/readdir-ahead/bug-1390050.t new file mode 100644 index 00000000000..ab1d7d4ead9 --- /dev/null +++ b/tests/bugs/readdir-ahead/bug-1390050.t @@ -0,0 +1,29 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../fileio.rc + +cleanup; + +TEST glusterd + +TEST $CLI volume create $V0 $H0:$B{0..1}/$V0 +TEST $CLI volume set $V0 readdir-ahead on + +DIRECTORY="$M0/subdir1/subdir2" + +#Make sure md-cache has large timeout to hold stat from readdirp_cbk in its cache +TEST $CLI volume set $V0 performance.md-cache-timeout 600 +TEST $CLI volume start $V0 +TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0 +rm -rf $M0/* +TEST mkdir -p $DIRECTORY +rm -rf $DIRECTORY/* +TEST touch $DIRECTORY/file{0..10} +rdd_tester=$(dirname $0)/rdd-tester +TEST build_tester $(dirname $0)/bug-1390050.c -o $rdd_tester +TEST $rdd_tester $DIRECTORY $DIRECTORY/file4 +rm -f $rdd_tester +cleanup; + diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index afec1889071..055c92fcbfc 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -6336,9 +6336,9 @@ dht_file_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, out: if (local->fop == GF_FOP_REMOVEXATTR) { - DHT_STACK_UNWIND (removexattr, frame, op_ret, op_errno, NULL); + DHT_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata); } else { - DHT_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL); + DHT_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata); } return 0; diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c index 20f015431a0..ee50329f03b 100644 --- a/xlators/mount/fuse/src/fuse-bridge.c +++ b/xlators/mount/fuse/src/fuse-bridge.c @@ -3056,6 +3056,10 @@ fuse_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!linked_inode) goto next_entry; + if (entry->inode != linked_inode) { + memset (&entry->d_stat, 0, sizeof (entry->d_stat)); + } + feo->nodeid = inode_to_fuse_nodeid (linked_inode); if (!((strcmp (entry->d_name, ".") == 0) || @@ -3069,10 +3073,15 @@ fuse_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, calc_timeout_sec (priv->entry_timeout); feo->entry_valid_nsec = calc_timeout_nsec (priv->entry_timeout); - feo->attr_valid = - calc_timeout_sec (priv->attribute_timeout); - feo->attr_valid_nsec = - calc_timeout_nsec (priv->attribute_timeout); + + if (entry->d_stat.ia_ctime) { + feo->attr_valid = + calc_timeout_sec (priv->attribute_timeout); + feo->attr_valid_nsec = + calc_timeout_nsec (priv->attribute_timeout); + } else { + feo->attr_valid = feo->attr_valid_nsec = 0; + } next_entry: if (size == max_size) diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h index 39e2c536975..ae533840c7e 100644 --- a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h +++ b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h @@ -18,6 +18,7 @@ enum gf_rda_mem_types_ { gf_rda_mt_rda_local = gf_common_mt_end + 1, gf_rda_mt_rda_fd_ctx, gf_rda_mt_rda_priv, + gf_rda_mt_inode_ctx_t, gf_rda_mt_end }; diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c index cb2e73f9d4d..72ab64c5974 100644 --- a/xlators/performance/readdir-ahead/src/readdir-ahead.c +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c @@ -33,6 +33,17 @@ #include "readdir-ahead-messages.h" static int rda_fill_fd(call_frame_t *, xlator_t *, fd_t *); +static void +rda_local_wipe (struct rda_local *local) +{ + if (local->fd) + fd_unref (local->fd); + if (local->xattrs) + dict_unref (local->xattrs); + if (local->inode) + inode_unref (local->inode); +} + /* * Get (or create) the fd context for storing prepopulated directory * entries. @@ -70,6 +81,102 @@ out: return ctx; } +static rda_inode_ctx_t * +__rda_inode_ctx_get (inode_t *inode, xlator_t *this) +{ + int ret = -1; + uint64_t ctx_uint = 0; + rda_inode_ctx_t *ctx_p = NULL; + + ret = __inode_ctx_get1 (inode, this, &ctx_uint); + if (ret == 0) + return (rda_inode_ctx_t *)ctx_uint; + + ctx_p = GF_CALLOC (1, sizeof (*ctx_p), gf_rda_mt_inode_ctx_t); + if (!ctx_p) + return NULL; + + GF_ATOMIC_INIT (ctx_p->generation, 0); + + ret = __inode_ctx_set1 (inode, this, (uint64_t *)&ctx_p); + if (ret < 0) { + GF_FREE (ctx_p); + return NULL; + } + + return ctx_p; +} + +static int +__rda_inode_ctx_update_iatts (inode_t *inode, xlator_t *this, + struct iatt *stbuf_in, struct iatt *stbuf_out, + uint64_t generation) +{ + rda_inode_ctx_t *ctx_p = NULL; + struct iatt tmp_stat = {0, }; + + ctx_p = __rda_inode_ctx_get (inode, this); + if (!ctx_p) + return -1; + + if ((!stbuf_in) || (stbuf_in->ia_ctime == 0)) { + /* A fop modified a file but valid stbuf is not provided. + * Can't update iatt to reflect results of fop and hence + * invalidate the iatt stored in dentry. + * + * An example of this case can be response of write request + * that is cached in write-behind. + */ + tmp_stat = ctx_p->statbuf; + memset (&ctx_p->statbuf, 0, + sizeof (ctx_p->statbuf)); + gf_uuid_copy (ctx_p->statbuf.ia_gfid, + tmp_stat.ia_gfid); + ctx_p->statbuf.ia_type = tmp_stat.ia_type; + GF_ATOMIC_INC (ctx_p->generation); + } else { + if (ctx_p->statbuf.ia_ctime) { + if (stbuf_in->ia_ctime < ctx_p->statbuf.ia_ctime) { + goto out; + } + + if ((stbuf_in->ia_ctime == ctx_p->statbuf.ia_ctime) && + (stbuf_in->ia_ctime_nsec + < ctx_p->statbuf.ia_ctime_nsec)) { + goto out; + } + } else { + if (generation != GF_ATOMIC_GET (ctx_p->generation)) + goto out; + } + + ctx_p->statbuf = *stbuf_in; + } + +out: + if (stbuf_out) + *stbuf_out = ctx_p->statbuf; + + return 0; +} + +static int +rda_inode_ctx_update_iatts (inode_t *inode, xlator_t *this, + struct iatt *stbuf_in, struct iatt *stbuf_out, + uint64_t generation) +{ + int ret = -1; + + LOCK(&inode->lock); + { + ret = __rda_inode_ctx_update_iatts (inode, this, stbuf_in, + stbuf_out, generation); + } + UNLOCK(&inode->lock); + + return ret; +} + /* * Reset the tracking state of the context. */ @@ -112,6 +219,27 @@ rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size) return _gf_false; } +void +rda_inode_ctx_get_iatt (inode_t *inode, xlator_t *this, struct iatt *attr) +{ + rda_inode_ctx_t *ctx_p = NULL; + + if (!inode || !this || !attr) + goto out; + + LOCK (&inode->lock); + { + ctx_p = __rda_inode_ctx_get (inode, this); + if (ctx_p) { + *attr = ctx_p->statbuf; + } + } + UNLOCK (&inode->lock); + +out: + return; +} + /* * Serve a request from the fd dentry list based on the size of the request * buffer. ctx must be locked. @@ -124,6 +252,7 @@ __rda_fill_readdirp (xlator_t *this, gf_dirent_t *entries, size_t request_size, size_t dirent_size, size = 0; int32_t count = 0; struct rda_priv *priv = NULL; + struct iatt tmp_stat = {0,}; priv = this->private; @@ -132,6 +261,13 @@ __rda_fill_readdirp (xlator_t *this, gf_dirent_t *entries, size_t request_size, if (size + dirent_size > request_size) break; + memset (&tmp_stat, 0, sizeof (tmp_stat)); + + if (dirent->inode) { + rda_inode_ctx_get_iatt (dirent->inode, this, &tmp_stat); + dirent->d_stat = tmp_stat; + } + size += dirent_size; list_del_init(&dirent->list); ctx->cur_size -= dirent_size; @@ -319,6 +455,17 @@ rda_fill_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, list_del_init(&dirent->list); /* must preserve entry order */ list_add_tail(&dirent->list, &ctx->entries.list); + if (dirent->inode) { + /* If ctxp->stat is invalidated, don't update it + * with dirent->d_stat as we don't have + * generation number of the inode when readdirp + * request was initiated. So, we pass 0 for + * generation number + */ + rda_inode_ctx_update_iatts (dirent->inode, this, + &dirent->d_stat, + &dirent->d_stat, 0); + } dirent_size = gf_dirent_size (dirent->d_name); @@ -379,6 +526,7 @@ out: ctx->xattrs = NULL; } + rda_local_wipe (ctx->fill_frame->local); STACK_DESTROY(ctx->fill_frame->root); ctx->fill_frame = NULL; } @@ -444,7 +592,7 @@ rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd) } local->ctx = ctx; - local->fd = fd; + local->fd = fd_ref (fd); nframe->local = local; ctx->fill_frame = nframe; @@ -469,8 +617,10 @@ rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd) return 0; err: - if (nframe) + if (nframe) { + rda_local_wipe (nframe->local); FRAME_DESTROY(nframe); + } return -1; } @@ -479,24 +629,10 @@ static int32_t rda_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - struct rda_local *local = frame->local; - if (!op_ret) rda_fill_fd(frame, this, fd); - frame->local = NULL; - - STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata); - - if (local && local->xattrs) { - /* unref for dict_new() done in rda_opendir */ - dict_unref (local->xattrs); - local->xattrs = NULL; - } - - if (local) - mem_put (local); - + RDA_STACK_UNWIND(opendir, frame, op_ret, op_errno, fd, xdata); return 0; } @@ -540,6 +676,374 @@ unwind: } static int32_t +rda_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + struct rda_local *local = NULL; + struct iatt postbuf_out = {0,}; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_inode_ctx_update_iatts (local->inode, this, postbuf, &postbuf_out, + local->generation); + + if (postbuf_out.ia_ctime == 0) + memset (&postbuf_out, 0, sizeof (postbuf_out)); +unwind: + RDA_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, + &postbuf_out, xdata); + return 0; +} + +static int32_t +rda_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t off, uint32_t flags, + struct iobref *iobref, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP (writev, frame, this, fd->inode, xdata, fd, + vector, count, off, flags, iobref); + return 0; +} + +static int32_t +rda_fallocate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + struct rda_local *local = NULL; + struct iatt postbuf_out = {0,}; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_inode_ctx_update_iatts (local->inode, this, postbuf, &postbuf_out, + local->generation); + + if (postbuf_out.ia_ctime == 0) + memset (&postbuf_out, 0, sizeof (postbuf_out)); + +unwind: + RDA_STACK_UNWIND (fallocate, frame, op_ret, op_errno, prebuf, + &postbuf_out, xdata); + return 0; +} + +static int32_t +rda_fallocate (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size, + off_t offset, size_t len, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP (fallocate, frame, this, fd->inode, xdata, + fd, keep_size, offset, len); + return 0; +} + +static int32_t +rda_zerofill_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + struct rda_local *local = NULL; + struct iatt postbuf_out = {0,}; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_inode_ctx_update_iatts (local->inode, this, postbuf, &postbuf_out, + local->generation); + + if (postbuf_out.ia_ctime == 0) + memset (&postbuf_out, 0, sizeof (postbuf_out)); + +unwind: + RDA_STACK_UNWIND (zerofill, frame, op_ret, op_errno, prebuf, + &postbuf_out, xdata); + return 0; +} + +static int32_t +rda_zerofill (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP (zerofill, frame, this, fd->inode, xdata, + fd, offset, len); + return 0; +} + +static int32_t +rda_discard_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + struct rda_local *local = NULL; + struct iatt postbuf_out = {0,}; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_inode_ctx_update_iatts (local->inode, this, postbuf, &postbuf_out, + local->generation); + + if (postbuf_out.ia_ctime == 0) + memset (&postbuf_out, 0, sizeof (postbuf_out)); +unwind: + RDA_STACK_UNWIND (discard, frame, op_ret, op_errno, prebuf, + &postbuf_out, xdata); + return 0; +} + +static int32_t +rda_discard (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP (discard, frame, this, fd->inode, xdata, + fd, offset, len); + return 0; +} + +static int32_t +rda_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + struct rda_local *local = NULL; + struct iatt postbuf_out = {0,}; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_inode_ctx_update_iatts (local->inode, this, postbuf, &postbuf_out, + local->generation); + + if (postbuf_out.ia_ctime == 0) + memset (&postbuf_out, 0, sizeof (postbuf_out)); + +unwind: + RDA_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, + &postbuf_out, xdata); + return 0; +} + +static int32_t +rda_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP (ftruncate, frame, this, fd->inode, xdata, + fd, offset); + return 0; +} + +static int32_t +rda_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + struct rda_local *local = NULL; + struct iatt postbuf_out = {0,}; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_inode_ctx_update_iatts (local->inode, this, postbuf, &postbuf_out, + local->generation); + if (postbuf_out.ia_ctime == 0) + memset (&postbuf_out, 0, sizeof (postbuf_out)); + +unwind: + RDA_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, + &postbuf_out, xdata); + return 0; +} + +static int32_t +rda_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP (truncate, frame, this, loc->inode, xdata, + loc, offset); + return 0; +} + +static int32_t +rda_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + struct rda_local *local = NULL; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + + rda_inode_ctx_update_iatts (local->inode, this, NULL, NULL, + local->generation); +unwind: + RDA_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); + return 0; +} + +static int32_t +rda_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP (setxattr, frame, this, loc->inode, + xdata, loc, dict, flags); + return 0; +} + +static int32_t +rda_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + struct rda_local *local = NULL; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + + rda_inode_ctx_update_iatts (local->inode, this, NULL, NULL, + local->generation); +unwind: + RDA_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata); + return 0; +} + +static int32_t +rda_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, + int32_t flags, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP (fsetxattr, frame, this, fd->inode, + xdata, fd, dict, flags); + return 0; +} + +static int32_t +rda_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + struct rda_local *local = NULL; + struct iatt postbuf_out = {0,}; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_inode_ctx_update_iatts (local->inode, this, statpost, &postbuf_out, + local->generation); + if (postbuf_out.ia_ctime == 0) + memset (&postbuf_out, 0, sizeof (postbuf_out)); + +unwind: + RDA_STACK_UNWIND (setattr, frame, op_ret, op_errno, statpre, + &postbuf_out, xdata); + return 0; +} + +static int32_t +rda_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP (setattr, frame, this, loc->inode, xdata, + loc, stbuf, valid); + return 0; +} + +static int32_t +rda_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *statpre, + struct iatt *statpost, dict_t *xdata) +{ + struct rda_local *local = NULL; + struct iatt postbuf_out = {0,}; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + rda_inode_ctx_update_iatts (local->inode, this, statpost, &postbuf_out, + local->generation); + if (postbuf_out.ia_ctime == 0) + memset (&postbuf_out, 0, sizeof (postbuf_out)); + +unwind: + RDA_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, statpre, + &postbuf_out, xdata); + return 0; +} + +static int32_t +rda_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, + int32_t valid, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP (fsetattr, frame, this, fd->inode, xdata, + fd, stbuf, valid); + return 0; +} + +static int32_t +rda_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + struct rda_local *local = NULL; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + + rda_inode_ctx_update_iatts (local->inode, this, NULL, NULL, + local->generation); +unwind: + RDA_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata); + return 0; +} + +static int32_t +rda_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP (removexattr, frame, this, loc->inode, + xdata, loc, name); + return 0; +} + +static int32_t +rda_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + struct rda_local *local = NULL; + + if (op_ret < 0) + goto unwind; + + local = frame->local; + + rda_inode_ctx_update_iatts (local->inode, this, NULL, NULL, + local->generation); +unwind: + RDA_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata); + return 0; +} + +static int32_t +rda_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + RDA_COMMON_MODIFICATION_FOP (fremovexattr, frame, this, fd->inode, + xdata, fd, name); + return 0; +} + +static int32_t rda_releasedir(xlator_t *this, fd_t *fd) { uint64_t val; @@ -566,6 +1070,23 @@ rda_releasedir(xlator_t *this, fd_t *fd) return 0; } +static int +rda_forget (xlator_t *this, inode_t *inode) +{ + uint64_t ctx_uint = 0; + rda_inode_ctx_t *ctx = NULL; + + inode_ctx_del1 (inode, this, &ctx_uint); + if (!ctx_uint) + return 0; + + ctx = (rda_inode_ctx_t *)ctx_uint; + + GF_FREE (ctx); + + return 0; +} + int32_t mem_acct_init(xlator_t *this) { @@ -677,10 +1198,28 @@ out: struct xlator_fops fops = { .opendir = rda_opendir, .readdirp = rda_readdirp, + /* inode write */ + /* TODO: invalidate a dentry's stats if its pointing to a directory + * when entry operations happen in that directory + */ + .writev = rda_writev, + .truncate = rda_truncate, + .ftruncate = rda_ftruncate, + .fallocate = rda_fallocate, + .discard = rda_discard, + .zerofill = rda_zerofill, + /* metadata write */ + .setxattr = rda_setxattr, + .fsetxattr = rda_fsetxattr, + .setattr = rda_setattr, + .fsetattr = rda_fsetattr, + .removexattr = rda_removexattr, + .fremovexattr = rda_fremovexattr, }; struct xlator_cbks cbks = { .releasedir = rda_releasedir, + .forget = rda_forget, }; struct volume_options options[] = { diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.h b/xlators/performance/readdir-ahead/src/readdir-ahead.h index 3c06cc0f107..7dbed4c60ed 100644 --- a/xlators/performance/readdir-ahead/src/readdir-ahead.h +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.h @@ -19,6 +19,44 @@ #define RDA_FD_BYPASS (1 << 4) #define RDA_FD_PLUGGED (1 << 5) + +#define RDA_COMMON_MODIFICATION_FOP(name, frame, this, __inode, __xdata, args ...)\ + do { \ + struct rda_local *__local = NULL; \ + rda_inode_ctx_t *ctx_p = NULL; \ + \ + __local = mem_get0 (this->local_pool); \ + __local->inode = inode_ref (__inode); \ + LOCK (&__inode->lock); \ + { \ + ctx_p = __rda_inode_ctx_get (__inode, this); \ + } \ + UNLOCK (&__inode->lock); \ + __local->generation = GF_ATOMIC_GET (ctx_p->generation); \ + \ + frame->local = __local; \ + if (__xdata) \ + __local->xattrs = dict_ref (__xdata); \ + \ + STACK_WIND (frame, rda_##name##_cbk, FIRST_CHILD(this), \ + FIRST_CHILD(this)->fops->name, args, __xdata); \ + } while (0) + + +#define RDA_STACK_UNWIND(fop, frame, params ...) do { \ + struct rda_local *__local = NULL; \ + if (frame) { \ + __local = frame->local; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT (fop, frame, params); \ + if (__local) { \ + rda_local_wipe (__local); \ + mem_put (__local); \ + } \ +} while (0) + + struct rda_fd_ctx { off_t cur_offset; /* current head of the ctx */ size_t cur_size; /* current size of the preload */ @@ -34,9 +72,12 @@ struct rda_fd_ctx { struct rda_local { struct rda_fd_ctx *ctx; - fd_t *fd; - off_t offset; - dict_t *xattrs; /* xattrs to be sent in readdirp() */ + fd_t *fd; + dict_t *xattrs; /* md-cache keys to be sent in readdirp() */ + inode_t *inode; + off_t offset; + uint64_t generation; + int32_t skip_dir; }; struct rda_priv { @@ -48,4 +89,9 @@ struct rda_priv { gf_boolean_t parallel_readdir; }; +typedef struct rda_inode_ctx { + struct iatt statbuf; + gf_atomic_t generation; +} rda_inode_ctx_t; + #endif /* __READDIR_AHEAD_H */ |