diff options
Diffstat (limited to 'xlators/performance')
27 files changed, 3143 insertions, 3506 deletions
diff --git a/xlators/performance/Makefile.am b/xlators/performance/Makefile.am index eb94d8d6a..a494190ba 100644 --- a/xlators/performance/Makefile.am +++ b/xlators/performance/Makefile.am @@ -1,3 +1,3 @@ -SUBDIRS = write-behind read-ahead io-threads io-cache symlink-cache quick-read md-cache +SUBDIRS = write-behind read-ahead readdir-ahead io-threads io-cache symlink-cache quick-read md-cache open-behind CLEANFILES = diff --git a/xlators/performance/io-cache/src/Makefile.am b/xlators/performance/io-cache/src/Makefile.am index 838e5f597..155be9988 100644 --- a/xlators/performance/io-cache/src/Makefile.am +++ b/xlators/performance/io-cache/src/Makefile.am @@ -1,7 +1,7 @@ xlator_LTLIBRARIES = io-cache.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance -io_cache_la_LDFLAGS = -module -avoidversion +io_cache_la_LDFLAGS = -module -avoid-version io_cache_la_SOURCES = io-cache.c page.c ioc-inode.c io_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c index 0793e6043..201777b38 100644 --- a/xlators/performance/io-cache/src/io-cache.c +++ b/xlators/performance/io-cache/src/io-cache.c @@ -31,7 +31,7 @@ ioc_get_priority (ioc_table_t *table, const char *path); struct volume_options options[]; -inline uint32_t +static inline uint32_t ioc_hashfn (void *data, int len) { off_t offset; @@ -41,7 +41,7 @@ ioc_hashfn (void *data, int len) return (offset >> ioc_log2_page_size); } -inline ioc_inode_t * +static inline ioc_inode_t * ioc_inode_reupdate (ioc_inode_t *ioc_inode) { ioc_table_t *table = NULL; @@ -54,7 +54,7 @@ ioc_inode_reupdate (ioc_inode_t *ioc_inode) return ioc_inode; } -inline ioc_inode_t * +static inline ioc_inode_t * ioc_get_inode (dict_t *dict, char *name) { ioc_inode_t *ioc_inode = NULL; @@ -316,9 +316,11 @@ ioc_forget (xlator_t *this, inode_t *inode) static int32_t ioc_invalidate(xlator_t *this, inode_t *inode) { + uint64_t ioc_addr = 0; ioc_inode_t *ioc_inode = NULL; - inode_ctx_get(inode, this, (uint64_t *) &ioc_inode); + inode_ctx_get(inode, this, (uint64_t *) &ioc_addr); + ioc_inode = (void *) ioc_addr; if (ioc_inode) ioc_inode_flush(ioc_inode); @@ -489,7 +491,7 @@ out: return ret; } -inline uint32_t +static inline uint32_t is_match (const char *path, const char *pattern) { int32_t ret = 0; @@ -551,6 +553,13 @@ ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, inode_ctx_get (fd->inode, this, &tmp_ioc_inode); ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; + //TODO: see why inode context is NULL and handle it. + if (!ioc_inode) { + gf_log (this->name, GF_LOG_ERROR, "inode context is " + "NULL (%s)", uuid_utoa (fd->inode->gfid)); + goto out; + } + ioc_table_lock (ioc_inode->table); { list_move_tail (&ioc_inode->inode_lru, @@ -1415,6 +1424,58 @@ ioc_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, return 0; } +static int32_t +ioc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata); + return 0; +} + +static int32_t +ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + uint64_t ioc_inode = 0; + + inode_ctx_get (fd->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND(frame, ioc_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); + return 0; +} + +static int32_t +ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + STACK_UNWIND_STRICT(zerofill, frame, op_ret, + op_errno, pre, post, xdata); + return 0; +} + +static int32_t +ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + uint64_t ioc_inode = 0; + + inode_ctx_get (fd->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + return 0; +} + + int32_t ioc_get_priority_list (const char *opt_str, struct list_head *first) { @@ -1878,11 +1939,11 @@ int ioc_inode_dump (xlator_t *this, inode_t *inode) { - char *path = NULL; + char *path = NULL; int ret = -1; char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; uint64_t tmp_ioc_inode = 0; - ioc_inode_t *ioc_inode = NULL; + ioc_inode_t *ioc_inode = NULL; gf_boolean_t section_added = _gf_false; char uuid_str[64] = {0,}; @@ -1896,9 +1957,6 @@ ioc_inode_dump (xlator_t *this, inode_t *inode) if (ioc_inode == NULL) goto out; - gf_proc_dump_add_section (key_prefix); - section_added = _gf_true; - /* Similar to ioc_page_dump function its better to use * pthread_mutex_trylock and not to use gf_log in statedump * to avoid deadlocks. @@ -1906,24 +1964,30 @@ ioc_inode_dump (xlator_t *this, inode_t *inode) ret = pthread_mutex_trylock (&ioc_inode->inode_lock); if (ret) goto out; - else + { - gf_proc_dump_write ("inode.weight", "%d", ioc_inode->weight); + if (uuid_is_null (ioc_inode->inode->gfid)) + goto unlock; + + gf_proc_dump_add_section (key_prefix); + section_added = _gf_true; - //inode_path takes blocking lock on the itable. __inode_path (ioc_inode->inode, NULL, &path); + gf_proc_dump_write ("inode.weight", "%d", ioc_inode->weight); + if (path) { gf_proc_dump_write ("path", "%s", path); GF_FREE (path); } + gf_proc_dump_write ("uuid", "%s", uuid_utoa_r (ioc_inode->inode->gfid, uuid_str)); __ioc_cache_dump (ioc_inode, key_prefix); __ioc_inode_waitq_dump (ioc_inode, key_prefix); - - pthread_mutex_unlock (&ioc_inode->inode_lock); } +unlock: + pthread_mutex_unlock (&ioc_inode->inode_lock); out: if (ret && ioc_inode) { @@ -2037,6 +2101,8 @@ struct xlator_fops fops = { .mknod = ioc_mknod, .readdirp = ioc_readdirp, + .discard = ioc_discard, + .zerofill = ioc_zerofill, }; diff --git a/xlators/performance/io-cache/src/io-cache.h b/xlators/performance/io-cache/src/io-cache.h index 41bbeea8b..46d758a66 100644 --- a/xlators/performance/io-cache/src/io-cache.h +++ b/xlators/performance/io-cache/src/io-cache.h @@ -329,6 +329,4 @@ ioc_prune (ioc_table_t *table); int32_t ioc_need_prune (ioc_table_t *table); -inline uint32_t -ioc_hashfn (void *data, int len); #endif /* __IO_CACHE_H */ diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c index c18c04a0b..b2e20ba65 100644 --- a/xlators/performance/io-cache/src/page.c +++ b/xlators/performance/io-cache/src/page.c @@ -804,7 +804,7 @@ ioc_frame_unwind (call_frame_t *frame) int32_t copied = 0; struct iobref *iobref = NULL; struct iatt stbuf = {0,}; - int32_t op_ret = 0; + int32_t op_ret = 0, op_errno = 0; GF_ASSERT (frame); @@ -813,16 +813,21 @@ ioc_frame_unwind (call_frame_t *frame) gf_log (frame->this->name, GF_LOG_WARNING, "local is NULL"); op_ret = -1; - local->op_errno = ENOMEM; + op_errno = ENOMEM; + goto unwind; + } + + if (local->op_ret < 0) { + op_ret = local->op_ret; + op_errno = local->op_errno; goto unwind; } // ioc_local_lock (local); - frame->local = NULL; iobref = iobref_new (); if (iobref == NULL) { op_ret = -1; - local->op_errno = ENOMEM; + op_errno = ENOMEM; } if (list_empty (&local->fill_list)) { @@ -839,7 +844,7 @@ ioc_frame_unwind (call_frame_t *frame) vector = GF_CALLOC (count, sizeof (*vector), gf_ioc_mt_iovec); if (vector == NULL) { op_ret = -1; - local->op_errno = ENOMEM; + op_errno = ENOMEM; } list_for_each_entry_safe (fill, next, &local->fill_list, list) { @@ -869,7 +874,8 @@ unwind: // ioc_local_unlock (local); - STACK_UNWIND_STRICT (readv, frame, op_ret, local->op_errno, vector, + frame->local = NULL; + STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, &stbuf, iobref, NULL); if (iobref != NULL) { @@ -882,7 +888,8 @@ unwind: } pthread_mutex_destroy (&local->local_lock); - mem_put (local); + if (local) + mem_put (local); return; } diff --git a/xlators/performance/io-threads/src/Makefile.am b/xlators/performance/io-threads/src/Makefile.am index 0f5a3b181..d63042e7c 100644 --- a/xlators/performance/io-threads/src/Makefile.am +++ b/xlators/performance/io-threads/src/Makefile.am @@ -1,7 +1,7 @@ xlator_LTLIBRARIES = io-threads.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance -io_threads_la_LDFLAGS = -module -avoidversion +io_threads_la_LDFLAGS = -module -avoid-version io_threads_la_SOURCES = io-threads.c io_threads_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index dbf1929e8..bbcf4ed26 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -307,6 +307,9 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub) case GF_FOP_XATTROP: case GF_FOP_FXATTROP: case GF_FOP_RCHECKSUM: + case GF_FOP_FALLOCATE: + case GF_FOP_DISCARD: + case GF_FOP_ZEROFILL: pri = IOT_PRI_LO; break; @@ -321,9 +324,9 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub) break; } out: - ret = do_iot_schedule (this->private, stub, pri); gf_log (this->name, GF_LOG_DEBUG, "%s scheduled as %s fop", gf_fop_list[stub->fop], iot_get_pri_meaning (pri)); + ret = do_iot_schedule (this->private, stub, pri); return ret; } @@ -2406,6 +2409,155 @@ out: return 0; } +int +iot_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preop, struct iatt *postop, dict_t *xdata) +{ + STACK_UNWIND_STRICT (fallocate, frame, op_ret, op_errno, preop, postop, + xdata); + return 0; +} + + +int +iot_fallocate_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + STACK_WIND (frame, iot_fallocate_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fallocate, fd, mode, offset, len, + xdata); + return 0; +} + + +int +iot_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + call_stub_t *stub = NULL; + int ret = -1; + + stub = fop_fallocate_stub(frame, iot_fallocate_wrapper, fd, mode, offset, + len, xdata); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot create fallocate stub" + "(out of memory)"); + ret = -ENOMEM; + goto out; + } + + ret = iot_schedule (frame, this, stub); + +out: + if (ret < 0) { + STACK_UNWIND_STRICT (fallocate, frame, -1, -ret, NULL, NULL, + NULL); + if (stub != NULL) { + call_stub_destroy (stub); + } + } + return 0; +} + +int +iot_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preop, struct iatt *postop, dict_t *xdata) +{ + STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, preop, postop, + xdata); + return 0; +} + + +int +iot_discard_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + STACK_WIND (frame, iot_discard_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->discard, fd, offset, len, xdata); + return 0; +} + + +int +iot_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + call_stub_t *stub = NULL; + int ret = -1; + + stub = fop_discard_stub(frame, iot_discard_wrapper, fd, offset, len, + xdata); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot create discard stub" + "(out of memory)"); + ret = -ENOMEM; + goto out; + } + + ret = iot_schedule (frame, this, stub); + +out: + if (ret < 0) { + STACK_UNWIND_STRICT (discard, frame, -1, -ret, NULL, NULL, + NULL); + if (stub != NULL) { + call_stub_destroy (stub); + } + } + return 0; +} + +int +iot_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preop, struct iatt *postop, dict_t *xdata) +{ + STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, preop, postop, + xdata); + return 0; +} + +int +iot_zerofill_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, + off_t offset, size_t len, dict_t *xdata) +{ + STACK_WIND (frame, iot_zerofill_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->zerofill, fd, offset, len, xdata); + return 0; +} + +int +iot_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + call_stub_t *stub = NULL; + int ret = -1; + + stub = fop_zerofill_stub(frame, iot_zerofill_wrapper, fd, + offset, len, xdata); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot create zerofill stub" + "(out of memory)"); + ret = -ENOMEM; + goto out; + } + + ret = iot_schedule (frame, this, stub); + +out: + if (ret < 0) { + STACK_UNWIND_STRICT (zerofill, frame, -1, -ret, NULL, NULL, + NULL); + if (stub != NULL) { + call_stub_destroy (stub); + } + } + return 0; +} + int __iot_workers_scale (iot_conf_t *conf) @@ -2432,7 +2584,7 @@ __iot_workers_scale (iot_conf_t *conf) while (diff) { diff --; - ret = pthread_create (&thread, &conf->w_attr, iot_worker, conf); + ret = gf_thread_create (&thread, &conf->w_attr, iot_worker, conf); if (ret == 0) { conf->curr_count++; gf_log (conf->this->name, GF_LOG_DEBUG, @@ -2736,10 +2888,12 @@ struct xlator_fops fops = { .xattrop = iot_xattrop, .fxattrop = iot_fxattrop, .rchecksum = iot_rchecksum, + .fallocate = iot_fallocate, + .discard = iot_discard, + .zerofill = iot_zerofill, }; -struct xlator_cbks cbks = { -}; +struct xlator_cbks cbks; struct volume_options options[] = { { .key = {"thread-count"}, @@ -2800,6 +2954,7 @@ struct volume_options options[] = { {.key = {"least-rate-limit"}, .type = GF_OPTION_TYPE_INT, .min = 0, + .max = INT_MAX, .default_value = "0", .description = "Max number of least priority operations to handle " "per-second" diff --git a/xlators/performance/md-cache/src/Makefile.am b/xlators/performance/md-cache/src/Makefile.am index bd09c15c2..8c9f5a858 100644 --- a/xlators/performance/md-cache/src/Makefile.am +++ b/xlators/performance/md-cache/src/Makefile.am @@ -1,7 +1,7 @@ xlator_LTLIBRARIES = md-cache.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance -md_cache_la_LDFLAGS = -module -avoidversion +md_cache_la_LDFLAGS = -module -avoid-version md_cache_la_SOURCES = md-cache.c md_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c index 237acab9f..84c363ad9 100644 --- a/xlators/performance/md-cache/src/md-cache.c +++ b/xlators/performance/md-cache/src/md-cache.c @@ -18,6 +18,7 @@ #include "dict.h" #include "xlator.h" #include "md-cache-mem-types.h" +#include "glusterfs-acl.h" #include <assert.h> #include <sys/time.h> @@ -32,6 +33,7 @@ struct mdc_conf { int timeout; gf_boolean_t cache_posix_acl; gf_boolean_t cache_selinux; + gf_boolean_t force_readdirp; }; @@ -41,17 +43,17 @@ static struct mdc_key { int check; } mdc_keys[] = { { - .name = "system.posix_acl_access", + .name = POSIX_ACL_ACCESS_XATTR, .load = 0, .check = 1, }, { - .name = "system.posix_acl_default", + .name = POSIX_ACL_DEFAULT_XATTR, .load = 0, .check = 1, }, { - .name = "security.selinux", + .name = GF_SELINUX_XATTR_KEY, .load = 0, .check = 1, }, @@ -65,7 +67,11 @@ static struct mdc_key { .load = 0, .check = 1, }, - {}, + { + .name = NULL, + .load = 0, + .check = 0, + } }; @@ -127,6 +133,7 @@ struct mdc_local { loc_t loc2; fd_t *fd; char *linkname; + char *key; dict_t *xattr; }; @@ -169,7 +176,7 @@ __mdc_inode_ctx_set (xlator_t *this, inode_t *inode, struct md_cache *mdc) uint64_t mdc_int = 0; mdc_int = (long) mdc; - ret = __inode_ctx_set2 (inode, this, &mdc_int, 0); + ret = __inode_ctx_set (inode, this, &mdc_int); return ret; } @@ -224,6 +231,8 @@ mdc_local_wipe (xlator_t *this, mdc_local_t *local) GF_FREE (local->linkname); + GF_FREE (local->key); + if (local->xattr) dict_unref (local->xattr); @@ -580,6 +589,31 @@ out: int +mdc_inode_xatt_unset (xlator_t *this, inode_t *inode, char *name) +{ + int ret = -1; + struct md_cache *mdc = NULL; + + mdc = mdc_inode_prep (this, inode); + if (!mdc) + goto out; + + if (!name) + goto out; + + LOCK (&mdc->lock); + { + dict_del (mdc->xattr, name); + } + UNLOCK (&mdc->lock); + + ret = 0; +out: + return ret; +} + + +int mdc_inode_xatt_get (xlator_t *this, inode_t *inode, dict_t **dict) { int ret = -1; @@ -593,13 +627,15 @@ mdc_inode_xatt_get (xlator_t *this, inode_t *inode, dict_t **dict) LOCK (&mdc->lock); { + ret = 0; + /* Missing xattr only means no keys were there, i.e + a negative cache for the "loaded" keys + */ if (!mdc->xattr) goto unlock; if (dict) *dict = dict_ref (mdc->xattr); - - ret = 0; } unlock: UNLOCK (&mdc->lock); @@ -609,6 +645,46 @@ out: } +int +mdc_inode_iatt_invalidate (xlator_t *this, inode_t *inode) +{ + int ret = -1; + struct md_cache *mdc = NULL; + + if (mdc_inode_ctx_get (this, inode, &mdc) != 0) + goto out; + + LOCK (&mdc->lock); + { + mdc->ia_time = 0; + } + UNLOCK (&mdc->lock); + +out: + return ret; +} + + +int +mdc_inode_xatt_invalidate (xlator_t *this, inode_t *inode) +{ + int ret = -1; + struct md_cache *mdc = NULL; + + if (mdc_inode_ctx_get (this, inode, &mdc) != 0) + goto out; + + LOCK (&mdc->lock); + { + mdc->xa_time = 0; + } + UNLOCK (&mdc->lock); + +out: + return ret; +} + + void mdc_load_reqs (xlator_t *this, dict_t *dict) { @@ -642,7 +718,7 @@ is_mdc_key_satisfied (const char *key) return 0; for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) { - if (!mdc_keys[i].check) + if (!mdc_keys[i].load) continue; if (strcmp (mdc_key, key) == 0) return 1; @@ -716,6 +792,7 @@ mdc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt stbuf = {0, }; struct iatt postparent = {0, }; dict_t *xattr_rsp = NULL; + dict_t *xattr_alloc = NULL; mdc_local_t *local = NULL; @@ -723,6 +800,13 @@ mdc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, if (!local) goto uncached; + if (!loc->name) + /* A nameless discovery is dangerous to cache. We + perform nameless lookup with the intention of + re-establishing an inode "properly" + */ + goto uncached; + loc_copy (&local->loc, loc); ret = mdc_inode_iatt_get (this, loc->inode, &stbuf); @@ -747,6 +831,8 @@ mdc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, return 0; uncached: + if (!xdata) + xdata = xattr_alloc = dict_new (); if (xdata) mdc_load_reqs (this, xdata); @@ -755,7 +841,8 @@ uncached: if (xattr_rsp) dict_unref (xattr_rsp); - + if (xattr_alloc) + dict_unref (xattr_alloc); return 0; } @@ -1568,6 +1655,8 @@ mdc_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, mdc_inode_xatt_update (this, local->loc.inode, local->xattr); + mdc_inode_iatt_invalidate (this, local->loc.inode); + out: MDC_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); @@ -1609,6 +1698,7 @@ mdc_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, mdc_inode_xatt_update (this, local->fd->inode, local->xattr); + mdc_inode_iatt_invalidate (this, local->fd->inode); out: MDC_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata); @@ -1661,6 +1751,7 @@ mdc_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, dict_t *xdata) { int ret; + int op_errno = ENODATA; mdc_local_t *local = NULL; dict_t *xattr = NULL; @@ -1677,10 +1768,12 @@ mdc_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, if (ret != 0) goto uncached; - if (!dict_get (xattr, (char *)key)) - goto uncached; + if (!xattr || !dict_get (xattr, (char *)key)) { + ret = -1; + op_errno = ENODATA; + } - MDC_STACK_UNWIND (getxattr, frame, 0, 0, xattr, xdata); + MDC_STACK_UNWIND (getxattr, frame, ret, op_errno, xattr, xdata); return 0; @@ -1722,6 +1815,7 @@ mdc_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, int ret; mdc_local_t *local = NULL; dict_t *xattr = NULL; + int op_errno = ENODATA; local = mdc_local_get (frame); if (!local) @@ -1736,10 +1830,12 @@ mdc_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, if (ret != 0) goto uncached; - if (!dict_get (xattr, (char *)key)) - goto uncached; + if (!xattr || !dict_get (xattr, (char *)key)) { + ret = -1; + op_errno = ENODATA; + } - MDC_STACK_UNWIND (fgetxattr, frame, 0, 0, xattr, xdata); + MDC_STACK_UNWIND (fgetxattr, frame, ret, op_errno, xattr, xdata); return 0; @@ -1750,6 +1846,97 @@ uncached: return 0; } +int +mdc_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + if (local->key) + mdc_inode_xatt_unset (this, local->loc.inode, local->key); + else + mdc_inode_xatt_invalidate (this, local->loc.inode); + + mdc_inode_iatt_invalidate (this, local->loc.inode); +out: + MDC_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata); + + return 0; +} + + +int +mdc_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + loc_copy (&local->loc, loc); + + local->key = gf_strdup (name); + + STACK_WIND (frame, mdc_removexattr_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->removexattr, + loc, name, xdata); + return 0; +} + + +int +mdc_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + if (local->key) + mdc_inode_xatt_unset (this, local->fd->inode, local->key); + else + mdc_inode_xatt_invalidate (this, local->fd->inode); + + mdc_inode_iatt_invalidate (this, local->fd->inode); +out: + MDC_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata); + + return 0; +} + + +int +mdc_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + local->fd = fd_ref (fd); + + local->key = gf_strdup (name); + + STACK_WIND (frame, mdc_fremovexattr_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->fremovexattr, + fd, name, xdata); + return 0; +} + int mdc_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -1777,18 +1964,42 @@ int mdc_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, dict_t *xdata) { + dict_t *xattr_alloc = NULL; + + if (!xdata) + xdata = xattr_alloc = dict_new (); + if (xdata) + mdc_load_reqs (this, xdata); + STACK_WIND (frame, mdc_readdirp_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->readdirp, fd, size, offset, xdata); + if (xattr_alloc) + dict_unref (xattr_alloc); return 0; } +int +mdc_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, gf_dirent_t *entries, dict_t *xdata) +{ + STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata); + return 0; +} int mdc_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, dict_t *xdata) { int need_unref = 0; + struct mdc_conf *conf = this->private; + + if (!conf->force_readdirp) { + STACK_WIND(frame, mdc_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, fd, size, offset, + xdata); + return 0; + } if (!xdata) { xdata = dict_new (); @@ -1798,9 +2009,9 @@ mdc_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, if (xdata) mdc_load_reqs (this, xdata); - STACK_WIND (frame, mdc_readdirp_cbk, - FIRST_CHILD (this), FIRST_CHILD (this)->fops->readdirp, - fd, size, offset, xdata); + STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, + xdata); if (need_unref && xdata) dict_unref (xdata); @@ -1808,6 +2019,123 @@ mdc_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, return 0; } +int +mdc_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); + +out: + MDC_STACK_UNWIND (fallocate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + +int mdc_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + mdc_local_t *local; + + local = mdc_local_get(frame); + local->fd = fd_ref(fd); + + STACK_WIND(frame, mdc_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, + xdata); + + return 0; +} + +int +mdc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); + +out: + MDC_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + +int mdc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + mdc_local_t *local; + + local = mdc_local_get(frame); + local->fd = fd_ref(fd); + + STACK_WIND(frame, mdc_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, + xdata); + + return 0; +} + +int +mdc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); + +out: + MDC_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + +int mdc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + mdc_local_t *local; + + local = mdc_local_get(frame); + local->fd = fd_ref(fd); + + STACK_WIND(frame, mdc_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, + xdata); + + return 0; +} + int mdc_forget (xlator_t *this, inode_t *inode) @@ -1862,6 +2190,8 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("cache-posix-acl", conf->cache_posix_acl, options, bool, out); mdc_key_load_set (mdc_keys, "system.posix_acl_", conf->cache_posix_acl); + GF_OPTION_RECONF("force-readdirp", conf->force_readdirp, options, bool, out); + out: return 0; } @@ -1894,6 +2224,8 @@ init (xlator_t *this) GF_OPTION_INIT ("cache-posix-acl", conf->cache_posix_acl, bool, out); mdc_key_load_set (mdc_keys, "system.posix_acl_", conf->cache_posix_acl); + + GF_OPTION_INIT("force-readdirp", conf->force_readdirp, bool, out); out: this->private = conf; @@ -1931,8 +2263,13 @@ struct xlator_fops fops = { .fsetxattr = mdc_fsetxattr, .getxattr = mdc_getxattr, .fgetxattr = mdc_fgetxattr, + .removexattr = mdc_removexattr, + .fremovexattr= mdc_fremovexattr, .readdirp = mdc_readdirp, - .readdir = mdc_readdir + .readdir = mdc_readdir, + .fallocate = mdc_fallocate, + .discard = mdc_discard, + .zerofill = mdc_zerofill, }; @@ -1956,4 +2293,11 @@ struct volume_options options[] = { .default_value = "1", .description = "Time period after which cache has to be refreshed", }, + { .key = {"force-readdirp"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "true", + .description = "Convert all readdir requests to readdirplus to " + "collect stat info on each entry.", + }, + { .key = {NULL} }, }; diff --git a/xlators/performance/open-behind/Makefile.am b/xlators/performance/open-behind/Makefile.am new file mode 100644 index 000000000..af437a64d --- /dev/null +++ b/xlators/performance/open-behind/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = src diff --git a/xlators/performance/open-behind/src/Makefile.am b/xlators/performance/open-behind/src/Makefile.am new file mode 100644 index 000000000..125285707 --- /dev/null +++ b/xlators/performance/open-behind/src/Makefile.am @@ -0,0 +1,15 @@ +xlator_LTLIBRARIES = open-behind.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance + +open_behind_la_LDFLAGS = -module -avoid-version + +open_behind_la_SOURCES = open-behind.c +open_behind_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = open-behind-mem-types.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/performance/open-behind/src/open-behind-mem-types.h b/xlators/performance/open-behind/src/open-behind-mem-types.h new file mode 100644 index 000000000..1e94296f4 --- /dev/null +++ b/xlators/performance/open-behind/src/open-behind-mem-types.h @@ -0,0 +1,21 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __OB_MEM_TYPES_H__ +#define __OB_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_ob_mem_types_ { + gf_ob_mt_fd_t = gf_common_mt_end + 1, + gf_ob_mt_conf_t, + gf_ob_mt_end +}; +#endif diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c new file mode 100644 index 000000000..7e5b57278 --- /dev/null +++ b/xlators/performance/open-behind/src/open-behind.c @@ -0,0 +1,1001 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "open-behind-mem-types.h" +#include "xlator.h" +#include "statedump.h" +#include "call-stub.h" +#include "defaults.h" + +typedef struct ob_conf { + gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe + e.g - fstat() readv() + + whereas for fops like writev(), lk(), + the fd is important for side effects + like mandatory locks + */ + gf_boolean_t lazy_open; /* delay backend open as much as possible */ +} ob_conf_t; + + +typedef struct ob_fd { + call_frame_t *open_frame; + loc_t loc; + dict_t *xdata; + int flags; + int op_errno; + struct list_head list; +} ob_fd_t; + + +ob_fd_t * +__ob_fd_ctx_get (xlator_t *this, fd_t *fd) +{ + uint64_t value = 0; + int ret = -1; + ob_fd_t *ob_fd = NULL; + + ret = __fd_ctx_get (fd, this, &value); + if (ret) + return NULL; + + ob_fd = (void *) ((long) value); + + return ob_fd; +} + + +ob_fd_t * +ob_fd_ctx_get (xlator_t *this, fd_t *fd) +{ + ob_fd_t *ob_fd = NULL; + + LOCK (&fd->lock); + { + ob_fd = __ob_fd_ctx_get (this, fd); + } + UNLOCK (&fd->lock); + + return ob_fd; +} + + +int +__ob_fd_ctx_set (xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) +{ + uint64_t value = 0; + int ret = -1; + + value = (long) ((void *) ob_fd); + + ret = __fd_ctx_set (fd, this, value); + + return ret; +} + + +int +ob_fd_ctx_set (xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) +{ + int ret = -1; + + LOCK (&fd->lock); + { + ret = __ob_fd_ctx_set (this, fd, ob_fd); + } + UNLOCK (&fd->lock); + + return ret; +} + + +ob_fd_t * +ob_fd_new (void) +{ + ob_fd_t *ob_fd = NULL; + + ob_fd = GF_CALLOC (1, sizeof (*ob_fd), gf_ob_mt_fd_t); + + INIT_LIST_HEAD (&ob_fd->list); + + return ob_fd; +} + + +void +ob_fd_free (ob_fd_t *ob_fd) +{ + loc_wipe (&ob_fd->loc); + + if (ob_fd->xdata) + dict_unref (ob_fd->xdata); + + if (ob_fd->open_frame) + STACK_DESTROY (ob_fd->open_frame->root); + + GF_FREE (ob_fd); +} + + +int +ob_wake_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd_ret, dict_t *xdata) +{ + fd_t *fd = NULL; + struct list_head list; + ob_fd_t *ob_fd = NULL; + call_stub_t *stub = NULL, *tmp = NULL; + + fd = frame->local; + frame->local = NULL; + + INIT_LIST_HEAD (&list); + + LOCK (&fd->lock); + { + ob_fd = __ob_fd_ctx_get (this, fd); + + list_splice_init (&ob_fd->list, &list); + + if (op_ret < 0) { + /* mark fd BAD for ever */ + ob_fd->op_errno = op_errno; + } else { + __fd_ctx_del (fd, this, NULL); + ob_fd_free (ob_fd); + } + } + UNLOCK (&fd->lock); + + list_for_each_entry_safe (stub, tmp, &list, list) { + list_del_init (&stub->list); + + if (op_ret < 0) + call_unwind_error (stub, -1, op_errno); + else + call_resume (stub); + } + + fd_unref (fd); + + STACK_DESTROY (frame->root); + + return 0; +} + + +int +ob_fd_wake (xlator_t *this, fd_t *fd) +{ + call_frame_t *frame = NULL; + ob_fd_t *ob_fd = NULL; + + LOCK (&fd->lock); + { + ob_fd = __ob_fd_ctx_get (this, fd); + if (!ob_fd) + goto unlock; + + frame = ob_fd->open_frame; + ob_fd->open_frame = NULL; + } +unlock: + UNLOCK (&fd->lock); + + if (frame) { + frame->local = fd_ref (fd); + + STACK_WIND (frame, ob_wake_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->open, + &ob_fd->loc, ob_fd->flags, fd, ob_fd->xdata); + } + + return 0; +} + + +int +open_and_resume (xlator_t *this, fd_t *fd, call_stub_t *stub) +{ + ob_fd_t *ob_fd = NULL; + int op_errno = 0; + + if (!fd) + goto nofd; + + LOCK (&fd->lock); + { + ob_fd = __ob_fd_ctx_get (this, fd); + if (!ob_fd) + goto unlock; + + if (ob_fd->op_errno) { + op_errno = ob_fd->op_errno; + goto unlock; + } + + list_add_tail (&stub->list, &ob_fd->list); + } +unlock: + UNLOCK (&fd->lock); + +nofd: + if (op_errno) + call_unwind_error (stub, -1, op_errno); + else if (ob_fd) + ob_fd_wake (this, fd); + else + call_resume (stub); + + return 0; +} + + +int +ob_open_behind (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + fd_t *fd, dict_t *xdata) +{ + ob_fd_t *ob_fd = NULL; + int ret = -1; + ob_conf_t *conf = NULL; + + + conf = this->private; + + if (flags & O_TRUNC) { + STACK_WIND (frame, default_open_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->open, + loc, flags, fd, xdata); + return 0; + } + + ob_fd = ob_fd_new (); + if (!ob_fd) + goto enomem; + + ob_fd->open_frame = copy_frame (frame); + if (!ob_fd->open_frame) + goto enomem; + ret = loc_copy (&ob_fd->loc, loc); + if (ret) + goto enomem; + + ob_fd->flags = flags; + if (xdata) + ob_fd->xdata = dict_ref (xdata); + + ret = ob_fd_ctx_set (this, fd, ob_fd); + if (ret) + goto enomem; + + fd_ref (fd); + + STACK_UNWIND_STRICT (open, frame, 0, 0, fd, xdata); + + if (!conf->lazy_open) + ob_fd_wake (this, fd); + + fd_unref (fd); + + return 0; +enomem: + if (ob_fd) { + if (ob_fd->open_frame) + STACK_DESTROY (ob_fd->open_frame->root); + loc_wipe (&ob_fd->loc); + if (ob_fd->xdata) + dict_unref (ob_fd->xdata); + GF_FREE (ob_fd); + } + + return -1; +} + + +int +ob_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + fd_t *fd, dict_t *xdata) +{ + fd_t *old_fd = NULL; + int ret = -1; + int op_errno = 0; + call_stub_t *stub = NULL; + + old_fd = fd_lookup (fd->inode, 0); + if (old_fd) { + /* open-behind only when this is the first FD */ + stub = fop_open_stub (frame, default_open_resume, + loc, flags, fd, xdata); + if (!stub) { + op_errno = ENOMEM; + fd_unref (old_fd); + goto err; + } + + open_and_resume (this, old_fd, stub); + + fd_unref (old_fd); + + return 0; + } + + ret = ob_open_behind (frame, this, loc, flags, fd, xdata); + if (ret) { + op_errno = ENOMEM; + goto err; + } + + return 0; +err: + gf_log (this->name, GF_LOG_ERROR, "%s: %s", loc->path, + strerror (op_errno)); + + STACK_UNWIND_STRICT (open, frame, -1, op_errno, 0, 0); + + return 0; +} + + +fd_t * +ob_get_wind_fd (xlator_t *this, fd_t *fd) +{ + ob_conf_t *conf = NULL; + ob_fd_t *ob_fd = NULL; + + conf = this->private; + + ob_fd = ob_fd_ctx_get (this, fd); + + if (ob_fd && conf->use_anonymous_fd) + return fd_anonymous (fd->inode); + + return fd_ref (fd); +} + + +int +ob_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + call_stub_t *stub = NULL; + fd_t *wind_fd = NULL; + + wind_fd = ob_get_wind_fd (this, fd); + + stub = fop_readv_stub (frame, default_readv_resume, wind_fd, + size, offset, flags, xdata); + fd_unref (wind_fd); + + if (!stub) + goto err; + + open_and_resume (this, wind_fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0); + + return 0; +} + + +int +ob_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov, + int count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_writev_stub (frame, default_writev_resume, fd, iov, count, + offset, flags, iobref, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, 0, 0, 0); + + return 0; +} + + +int +ob_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + call_stub_t *stub = NULL; + fd_t *wind_fd = NULL; + + wind_fd = ob_get_wind_fd (this, fd); + + stub = fop_fstat_stub (frame, default_fstat_resume, wind_fd, xdata); + + fd_unref (wind_fd); + + if (!stub) + goto err; + + open_and_resume (this, wind_fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, 0, 0); + + return 0; +} + + +int +ob_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + call_stub_t *stub = NULL; + ob_fd_t *ob_fd = NULL; + gf_boolean_t unwind = _gf_false; + + LOCK (&fd->lock); + { + ob_fd = __ob_fd_ctx_get (this, fd); + if (ob_fd && ob_fd->open_frame) + /* if open() was never wound to backend, + no need to wind flush() either. + */ + unwind = _gf_true; + } + UNLOCK (&fd->lock); + + if (unwind) + goto unwind; + + stub = fop_flush_stub (frame, default_flush_resume, fd, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, 0); + + return 0; + +unwind: + STACK_UNWIND_STRICT (flush, frame, 0, 0, 0); + + return 0; +} + + +int +ob_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int flag, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_fsync_stub (frame, default_fsync_resume, fd, flag, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0); + + return 0; +} + + +int +ob_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd, + struct gf_flock *flock, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_lk_stub (frame, default_lk_resume, fd, cmd, flock, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM, 0, 0); + + return 0; +} + +int +ob_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_ftruncate_stub (frame, default_ftruncate_resume, fd, offset, + xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, 0, 0, 0); + + return 0; +} + + +int +ob_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr, + int flags, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume, fd, xattr, + flags, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, 0); + + return 0; +} + + +int +ob_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume, fd, name, + xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM, 0, 0); + + return 0; +} + + +int +ob_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_fremovexattr_stub (frame, default_fremovexattr_resume, fd, + name, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (fremovexattr, frame, -1, ENOMEM, 0); + + return 0; +} + + +int +ob_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + int cmd, struct gf_flock *flock, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_finodelk_stub (frame, default_finodelk_resume, volume, fd, + cmd, flock, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, 0); + + return 0; +} + + +int +ob_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_fentrylk_stub (frame, default_fentrylk_resume, volume, fd, + basename, cmd, type, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, 0); + + return 0; +} + + +int +ob_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_fxattrop_stub (frame, default_fxattrop_resume, fd, optype, + xattr, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM, 0, 0); + + return 0; +} + + +int +ob_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *iatt, int valid, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_fsetattr_stub (frame, default_fsetattr_resume, fd, + iatt, valid, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, 0, 0, 0); + + return 0; +} + +int +ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + call_stub_t *stub; + + stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode, + offset, len, xdata); + if (!stub) + goto err; + + open_and_resume(this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; +} + +int +ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + call_stub_t *stub; + + stub = fop_discard_stub(frame, default_discard_resume, fd, offset, len, + xdata); + if (!stub) + goto err; + + open_and_resume(this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; +} + +int +ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + call_stub_t *stub; + + stub = fop_zerofill_stub(frame, default_zerofill_resume, fd, + offset, len, xdata); + if (!stub) + goto err; + + open_and_resume(this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; +} + + +int +ob_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) +{ + fd_t *fd = NULL; + call_stub_t *stub = NULL; + + stub = fop_unlink_stub (frame, default_unlink_resume, loc, + xflags, xdata); + if (!stub) + goto err; + + fd = fd_lookup (loc->inode, 0); + + open_and_resume (this, fd, stub); + if (fd) + fd_unref (fd); + + return 0; +err: + STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, 0, 0, 0); + + return 0; +} + + +int +ob_rename (call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst, + dict_t *xdata) +{ + fd_t *fd = NULL; + call_stub_t *stub = NULL; + + stub = fop_rename_stub (frame, default_rename_resume, src, dst, xdata); + if (!stub) + goto err; + + if (dst->inode) + fd = fd_lookup (dst->inode, 0); + + open_and_resume (this, fd, stub); + if (fd) + fd_unref (fd); + + return 0; +err: + STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM, 0, 0, 0, 0, 0, 0); + + return 0; +} + + +int +ob_release (xlator_t *this, fd_t *fd) +{ + ob_fd_t *ob_fd = NULL; + + ob_fd = ob_fd_ctx_get (this, fd); + + ob_fd_free (ob_fd); + + return 0; +} + + +int +ob_priv_dump (xlator_t *this) +{ + ob_conf_t *conf = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + + conf = this->private; + + if (!conf) + return -1; + + gf_proc_dump_build_key (key_prefix, "xlator.performance.open-behind", + "priv"); + + gf_proc_dump_add_section (key_prefix); + + gf_proc_dump_write ("use_anonymous_fd", "%d", conf->use_anonymous_fd); + + gf_proc_dump_write ("lazy_open", "%d", conf->lazy_open); + + return 0; +} + + +int +ob_fdctx_dump (xlator_t *this, fd_t *fd) +{ + ob_fd_t *ob_fd = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; + int ret = 0; + + ret = TRY_LOCK (&fd->lock); + if (ret) + return 0; + + ob_fd = __ob_fd_ctx_get (this, fd); + if (!ob_fd) { + UNLOCK (&fd->lock); + return 0; + } + + gf_proc_dump_build_key (key_prefix, "xlator.performance.open-behind", + "file"); + gf_proc_dump_add_section (key_prefix); + + gf_proc_dump_write ("fd", "%p", fd); + + gf_proc_dump_write ("open_frame", "%p", ob_fd->open_frame); + + gf_proc_dump_write ("open_frame.root.unique", "%p", + ob_fd->open_frame->root->unique); + + gf_proc_dump_write ("loc.path", "%s", ob_fd->loc.path); + + gf_proc_dump_write ("loc.ino", "%s", uuid_utoa (ob_fd->loc.gfid)); + + gf_proc_dump_write ("flags", "%p", ob_fd->open_frame); + + UNLOCK (&fd->lock); + + return 0; +} + + +int +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + ret = xlator_mem_acct_init (this, gf_ob_mt_end + 1); + + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Memory accounting failed"); + + return ret; +} + + +int +reconfigure (xlator_t *this, dict_t *options) +{ + ob_conf_t *conf = NULL; + int ret = -1; + + conf = this->private; + + GF_OPTION_RECONF ("use-anonymous-fd", conf->use_anonymous_fd, options, + bool, out); + + GF_OPTION_RECONF ("lazy-open", conf->lazy_open, options, bool, out); + + ret = 0; +out: + return ret; +} + + +int +init (xlator_t *this) +{ + ob_conf_t *conf = NULL; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "FATAL: volume (%s) not configured with exactly one " + "child", this->name); + return -1; + } + + if (!this->parents) + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + + conf = GF_CALLOC (1, sizeof (*conf), gf_ob_mt_conf_t); + if (!conf) + goto err; + + GF_OPTION_INIT ("use-anonymous-fd", conf->use_anonymous_fd, bool, err); + + GF_OPTION_INIT ("lazy-open", conf->lazy_open, bool, err); + + this->private = conf; + + return 0; +err: + if (conf) + GF_FREE (conf); + + return -1; +} + + +void +fini (xlator_t *this) +{ + ob_conf_t *conf = NULL; + + conf = this->private; + + GF_FREE (conf); + + return; +} + + +struct xlator_fops fops = { + .open = ob_open, + .readv = ob_readv, + .writev = ob_writev, + .flush = ob_flush, + .fsync = ob_fsync, + .fstat = ob_fstat, + .ftruncate = ob_ftruncate, + .fsetxattr = ob_fsetxattr, + .fgetxattr = ob_fgetxattr, + .fremovexattr = ob_fremovexattr, + .finodelk = ob_finodelk, + .fentrylk = ob_fentrylk, + .fxattrop = ob_fxattrop, + .fsetattr = ob_fsetattr, + .fallocate = ob_fallocate, + .discard = ob_discard, + .zerofill = ob_zerofill, + .unlink = ob_unlink, + .rename = ob_rename, + .lk = ob_lk, +}; + +struct xlator_cbks cbks = { + .release = ob_release, +}; + +struct xlator_dumpops dumpops = { + .priv = ob_priv_dump, + .fdctx = ob_fdctx_dump, +}; + + +struct volume_options options[] = { + { .key = {"use-anonymous-fd"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "yes", + .description = "For read operations, use anonymous FD when " + "original FD is open-behind and not yet opened in the backend.", + }, + { .key = {"lazy-open"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "yes", + .description = "Perform open in the backend only when a necessary " + "FOP arrives (e.g writev on the FD, unlink of the file). When option " + "is disabled, perform backend open right after unwinding open().", + }, + { .key = {NULL} } + +}; diff --git a/xlators/performance/quick-read/src/Makefile.am b/xlators/performance/quick-read/src/Makefile.am index 790f1e943..4906f408a 100644 --- a/xlators/performance/quick-read/src/Makefile.am +++ b/xlators/performance/quick-read/src/Makefile.am @@ -1,7 +1,7 @@ xlator_LTLIBRARIES = quick-read.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance -quick_read_la_LDFLAGS = -module -avoidversion +quick_read_la_LDFLAGS = -module -avoid-version quick_read_la_SOURCES = quick-read.c quick_read_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la diff --git a/xlators/performance/quick-read/src/quick-read-mem-types.h b/xlators/performance/quick-read/src/quick-read-mem-types.h index 73c87c819..78547f641 100644 --- a/xlators/performance/quick-read/src/quick-read-mem-types.h +++ b/xlators/performance/quick-read/src/quick-read-mem-types.h @@ -15,6 +15,7 @@ enum gf_qr_mem_types_ { gf_qr_mt_qr_inode_t = gf_common_mt_end + 1, + gf_qr_mt_content_t, gf_qr_mt_qr_fd_ctx_t, gf_qr_mt_iovec, gf_qr_mt_qr_conf_t, diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c index 867900b90..445ea8658 100644 --- a/xlators/performance/quick-read/src/quick-read.c +++ b/xlators/performance/quick-read/src/quick-read.c @@ -11,182 +11,102 @@ #include "quick-read.h" #include "statedump.h" -#define QR_DEFAULT_CACHE_SIZE 134217728 +qr_inode_t *qr_inode_ctx_get (xlator_t *this, inode_t *inode); +void __qr_inode_prune (qr_inode_table_t *table, qr_inode_t *qr_inode); -struct volume_options options[]; -void -_fd_unref (fd_t *fd); - -void -qr_local_free (qr_local_t *local) +int +__qr_inode_ctx_set (xlator_t *this, inode_t *inode, qr_inode_t *qr_inode) { - if (local == NULL) { - goto out; - } - - if (local->stub != NULL) { - call_stub_destroy (local->stub); - } + uint64_t value = 0; + int ret = -1; - GF_FREE (local->path); + value = (long) qr_inode; - mem_put (local); + ret = __inode_ctx_set (inode, this, &value); -out: - return; + return ret; } -qr_local_t * -qr_local_new (xlator_t *this) -{ - qr_local_t *local = NULL; - - local = mem_get0 (this->local_pool); - if (local == NULL) { - goto out; - } - - LOCK_INIT (&local->lock); - INIT_LIST_HEAD (&local->list); -out: - return local; -} - - -int32_t -qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata); - - -static void -qr_loc_wipe (loc_t *loc) +qr_inode_t * +__qr_inode_ctx_get (xlator_t *this, inode_t *inode) { - if (loc == NULL) { - goto out; - } + qr_inode_t *qr_inode = NULL; + uint64_t value = 0; + int ret = -1; - if (loc->path) { - GF_FREE ((char *)loc->path); - loc->path = NULL; - } + ret = __inode_ctx_get (inode, this, &value); + if (ret) + return NULL; - if (loc->inode) { - inode_unref (loc->inode); - loc->inode = NULL; - } - - if (loc->parent) { - inode_unref (loc->parent); - loc->parent = NULL; - } + qr_inode = (void *) ((long) value); -out: - return; + return qr_inode; } -static int32_t -qr_loc_fill (loc_t *loc, inode_t *inode, char *path) +qr_inode_t * +qr_inode_ctx_get (xlator_t *this, inode_t *inode) { - int32_t ret = -1; + qr_inode_t *qr_inode = NULL; - GF_VALIDATE_OR_GOTO_WITH_ERROR ("quick-read", loc, out, errno, EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR ("quick-read", inode, out, errno, - EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR ("quick-read", path, out, errno, EINVAL); + LOCK (&inode->lock); + { + qr_inode = __qr_inode_ctx_get (this, inode); + } + UNLOCK (&inode->lock); - loc->inode = inode_ref (inode); - uuid_copy (loc->gfid, inode->gfid); - - loc->path = gf_strdup (path); - if (!loc->path) - goto out; - - ret = 0; -out: - if (ret == -1) { - qr_loc_wipe (loc); - } - - return ret; + return qr_inode; } -void -qr_resume_pending_ops (qr_fd_ctx_t *qr_fd_ctx, int32_t op_ret, int32_t op_errno) +qr_inode_t * +qr_inode_new (xlator_t *this, inode_t *inode) { - call_stub_t *stub = NULL, *tmp = NULL; - struct list_head waiting_ops = {0, }; - - GF_VALIDATE_OR_GOTO ("quick-read", qr_fd_ctx, out); - - INIT_LIST_HEAD (&waiting_ops); - - LOCK (&qr_fd_ctx->lock); - { - qr_fd_ctx->open_in_transit = 0; - list_splice_init (&qr_fd_ctx->waiting_ops, - &waiting_ops); - } - UNLOCK (&qr_fd_ctx->lock); + qr_inode_t *qr_inode = NULL; - if (!list_empty (&waiting_ops)) { - list_for_each_entry_safe (stub, tmp, &waiting_ops, list) { - list_del_init (&stub->list); - if (op_ret < 0) { - qr_local_t *local = NULL; + qr_inode = GF_CALLOC (1, sizeof (*qr_inode), gf_qr_mt_qr_inode_t); + if (!qr_inode) + return NULL; - local = stub->frame->local; - local->op_ret = op_ret; - local->op_errno = op_errno; - } + INIT_LIST_HEAD (&qr_inode->lru); - call_resume (stub); - } - } + qr_inode->priority = 0; /* initial priority */ -out: - return; + return qr_inode; } -static void -qr_fd_ctx_free (qr_fd_ctx_t *qr_fd_ctx) +qr_inode_t * +qr_inode_ctx_get_or_new (xlator_t *this, inode_t *inode) { - GF_VALIDATE_OR_GOTO ("quick-read", qr_fd_ctx, out); + qr_inode_t *qr_inode = NULL; + int ret = -1; + qr_private_t *priv = NULL; - GF_ASSERT (list_empty (&qr_fd_ctx->waiting_ops)); + priv = this->private; - LOCK (&qr_fd_ctx->fd->inode->lock); - { - list_del_init (&qr_fd_ctx->inode_list); - } - UNLOCK (&qr_fd_ctx->fd->inode->lock); - - GF_FREE (qr_fd_ctx->path); - GF_FREE (qr_fd_ctx); - -out: - return; -} - - -static inline uint32_t -is_match (const char *path, const char *pattern) -{ - int32_t ret = 0; - uint32_t match = 0; + LOCK (&inode->lock); + { + qr_inode = __qr_inode_ctx_get (this, inode); + if (qr_inode) + goto unlock; - GF_VALIDATE_OR_GOTO ("quick-read", path, out); - GF_VALIDATE_OR_GOTO ("quick-read", pattern, out); + qr_inode = qr_inode_new (this, inode); + if (!qr_inode) + goto unlock; - ret = fnmatch (pattern, path, FNM_NOESCAPE); - match = (ret == 0); + ret = __qr_inode_ctx_set (this, inode, qr_inode); + if (ret) { + __qr_inode_prune (&priv->table, qr_inode); + GF_FREE (qr_inode); + } + } +unlock: + UNLOCK (&inode->lock); -out: - return match; + return qr_inode; } @@ -196,3230 +116,578 @@ qr_get_priority (qr_conf_t *conf, const char *path) uint32_t priority = 0; struct qr_priority *curr = NULL; - GF_VALIDATE_OR_GOTO ("quick-read", conf, out); - GF_VALIDATE_OR_GOTO ("quick-read", path, out); - list_for_each_entry (curr, &conf->priority_list, list) { - if (is_match (path, curr->pattern)) + if (fnmatch (curr->pattern, path, FNM_NOESCAPE) == 0) priority = curr->priority; } -out: return priority; } -/* To be called with this-priv->table.lock held */ -qr_inode_t * -__qr_inode_alloc (xlator_t *this, char *path, inode_t *inode) +void +__qr_inode_register (qr_inode_table_t *table, qr_inode_t *qr_inode) { - qr_inode_t *qr_inode = NULL; - qr_private_t *priv = NULL; - int priority = 0; - - GF_VALIDATE_OR_GOTO ("quick-read", this, out); - GF_VALIDATE_OR_GOTO (this->name, path, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); - - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - - qr_inode = GF_CALLOC (1, sizeof (*qr_inode), gf_qr_mt_qr_inode_t); - if (qr_inode == NULL) { - goto out; - } - - INIT_LIST_HEAD (&qr_inode->lru); - INIT_LIST_HEAD (&qr_inode->fd_list); - - priority = qr_get_priority (&priv->conf, path); + if (!qr_inode->data) + return; - list_add_tail (&qr_inode->lru, &priv->table.lru[priority]); + if (list_empty (&qr_inode->lru)) + /* first time addition of this qr_inode into table */ + table->cache_used += qr_inode->size; + else + list_del_init (&qr_inode->lru); - qr_inode->inode = inode; - qr_inode->priority = priority; -out: - return qr_inode; + list_add_tail (&qr_inode->lru, &table->lru[qr_inode->priority]); } -/* To be called with qr_inode->table->lock held */ void -__qr_inode_free (qr_inode_t *qr_inode) +qr_inode_set_priority (xlator_t *this, inode_t *inode, const char *path) { - qr_fd_ctx_t *fdctx = NULL, *tmp_fdctx = NULL; + uint32_t priority = 0; + qr_inode_table_t *table = NULL; + qr_inode_t *qr_inode = NULL; + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; - GF_VALIDATE_OR_GOTO ("quick-read", qr_inode, out); + qr_inode = qr_inode_ctx_get (this, inode); + if (!qr_inode) + return; - if (qr_inode->xattr) { - dict_unref (qr_inode->xattr); - } + priv = this->private; + table = &priv->table; + conf = &priv->conf; - list_del (&qr_inode->lru); + if (path) + priority = qr_get_priority (conf, path); + else + /* retain existing priority, just bump LRU */ + priority = qr_inode->priority; - LOCK (&qr_inode->inode->lock); - { - list_for_each_entry_safe (fdctx, tmp_fdctx, &qr_inode->fd_list, - inode_list) { - list_del_init (&fdctx->inode_list); - } - } - UNLOCK (&qr_inode->inode->lock); + LOCK (&table->lock); + { + qr_inode->priority = priority; - GF_FREE (qr_inode); -out: - return; + __qr_inode_register (table, qr_inode); + } + UNLOCK (&table->lock); } + /* To be called with priv->table.lock held */ void -__qr_cache_prune (xlator_t *this) +__qr_inode_prune (qr_inode_table_t *table, qr_inode_t *qr_inode) { - qr_private_t *priv = NULL; - qr_conf_t *conf = NULL; - qr_inode_table_t *table = NULL; - qr_inode_t *curr = NULL, *next = NULL; - int32_t index = 0; - uint64_t size_to_prune = 0; - uint64_t size_pruned = 0; - - GF_VALIDATE_OR_GOTO ("quick-read", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - - table = &priv->table; - conf = &priv->conf; - - size_to_prune = table->cache_used - conf->cache_size; - - for (index=0; index < conf->max_pri; index++) { - list_for_each_entry_safe (curr, next, &table->lru[index], lru) { - size_pruned += curr->stbuf.ia_size; - inode_ctx_del (curr->inode, this, NULL); - __qr_inode_free (curr); - if (size_pruned >= size_to_prune) - goto done; - } - } - -done: - table->cache_used -= size_pruned; - -out: - return; -} - -/* To be called with table->lock held */ -inline char -__qr_need_cache_prune (qr_conf_t *conf, qr_inode_table_t *table) -{ - char need_prune = 0; - - GF_VALIDATE_OR_GOTO ("quick-read", conf, out); - GF_VALIDATE_OR_GOTO ("quick-read", table, out); - - need_prune = (table->cache_used > conf->cache_size); - -out: - return need_prune; -} - - -int32_t -qr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *xdata, struct iatt *postparent) -{ - data_t *content = NULL; - qr_inode_t *qr_inode = NULL; - uint64_t value = 0; - int ret = -1; - qr_conf_t *conf = NULL; - qr_inode_table_t *table = NULL; - qr_private_t *priv = NULL; - qr_local_t *local = NULL; - - GF_ASSERT (frame); - - if ((op_ret == -1) || (xdata == NULL)) { - goto out; - } - - if ((this == NULL) || (this->private == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "quick-read configuration is not found"); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - priv = this->private; - conf = &priv->conf; - table = &priv->table; + GF_FREE (qr_inode->data); + qr_inode->data = NULL; - local = frame->local; + if (!list_empty (&qr_inode->lru)) { + table->cache_used -= qr_inode->size; + qr_inode->size = 0; - if (buf->ia_size > conf->max_file_size) { - goto out; - } - - if (IA_ISDIR (buf->ia_type)) { - goto out; - } - - if (inode == NULL) { - op_ret = -1; - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, - "lookup returned a NULL inode"); - goto out; - } - - content = dict_get (xdata, GF_CONTENT_KEY); - if (content == NULL) { - goto out; - } - - LOCK (&table->lock); - { - ret = inode_ctx_get (inode, this, &value); - if (ret == -1) { - qr_inode = __qr_inode_alloc (this, local->path, inode); - if (qr_inode == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto unlock; - } - - ret = inode_ctx_put (inode, this, - (uint64_t)(long)qr_inode); - if (ret == -1) { - __qr_inode_free (qr_inode); - qr_inode = NULL; - op_ret = -1; - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, - "cannot set quick-read context in " - "inode (gfid:%s)", - uuid_utoa (inode->gfid)); - goto unlock; - } - } else { - qr_inode = (qr_inode_t *)(long)value; - if (qr_inode == NULL) { - op_ret = -1; - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, - "cannot find quick-read context in " - "inode (gfid:%s)", - uuid_utoa (inode->gfid)); - goto unlock; - } - } - - /* - * Create our own internal dict and migrate the file content - * over to it so it isn't floating around in other translator - * caches. - */ - if (qr_inode->xattr) { - dict_unref (qr_inode->xattr); - qr_inode->xattr = NULL; - table->cache_used -= qr_inode->stbuf.ia_size; - } - - qr_inode->xattr = dict_new(); - if (!qr_inode->xattr) { - op_ret = -1; - op_errno = ENOMEM; - goto unlock; - } - - if (dict_set(qr_inode->xattr, GF_CONTENT_KEY, content) < 0) { - op_ret = -1; - op_errno = ENOMEM; - goto unlock; - } - - dict_del(xdata, GF_CONTENT_KEY); - - qr_inode->stbuf = *buf; - table->cache_used += buf->ia_size; - - gettimeofday (&qr_inode->tv, NULL); - if (__qr_need_cache_prune (conf, table)) { - __qr_cache_prune (this); - } - } -unlock: - UNLOCK (&table->lock); + list_del_init (&qr_inode->lru); + } -out: - /* - * FIXME: content size in dict can be greater than the size application - * requested for. Applications need to be careful till this is fixed. - */ - QR_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, xdata, - postparent); - - return 0; + memset (&qr_inode->buf, 0, sizeof (qr_inode->buf)); } -int32_t -qr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xdata) +void +qr_inode_prune (xlator_t *this, inode_t *inode) { - qr_conf_t *conf = NULL; - dict_t *new_req_dict = NULL; - int32_t op_ret = -1, op_errno = EINVAL; - data_t *content = NULL; - uint64_t requested_size = 0, size = 0, value = 0; - char cached = 0; - qr_inode_t *qr_inode = NULL; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - qr_local_t *local = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind); - - priv = this->private; - GF_VALIDATE_OR_GOTO (frame->this->name, priv, unwind); - - conf = &priv->conf; - if (conf == NULL) { - op_ret = -1; - op_errno = EINVAL; - goto unwind; - } - - table = &priv->table; - - local = qr_local_new (this); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind, op_errno, - ENOMEM); - - frame->local = local; - - local->path = gf_strdup (loc->path); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind, op_errno, - ENOMEM); - LOCK (&table->lock); - { - op_ret = inode_ctx_get (loc->inode, this, &value); - if (op_ret == 0) { - qr_inode = (qr_inode_t *)(long)value; - if (qr_inode != NULL) { - if (qr_inode->xattr) { - cached = 1; - } - } - } - } - UNLOCK (&table->lock); - - if ((xdata == NULL) && (conf->max_file_size > 0)) { - new_req_dict = xdata = dict_new (); - if (xdata == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - } - - if (!cached) { - if (xdata) { - content = dict_get (xdata, GF_CONTENT_KEY); - if (content) { - requested_size = data_to_uint64 (content); - } - } - - if ((conf->max_file_size > 0) - && (conf->max_file_size != requested_size)) { - size = (conf->max_file_size > requested_size) ? - conf->max_file_size : requested_size; - - op_ret = dict_set (xdata, GF_CONTENT_KEY, - data_from_uint64 (size)); - if (op_ret < 0) { - op_ret = -1; - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_WARNING, - "cannot set key in request dict to " - "request file " - "content during lookup cbk"); - goto unwind; - } - } - } - - STACK_WIND (frame, qr_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xdata); - - if (new_req_dict) { - dict_unref (new_req_dict); - } + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + qr_inode_t *qr_inode = NULL; - return 0; + qr_inode = qr_inode_ctx_get (this, inode); + if (!qr_inode) + return; -unwind: - QR_STACK_UNWIND (lookup, frame, op_ret, op_errno, NULL, NULL, - NULL, NULL); + priv = this->private; + table = &priv->table; - if (new_req_dict) { - dict_unref (new_req_dict); - } - - return 0; + LOCK (&table->lock); + { + __qr_inode_prune (table, qr_inode); + } + UNLOCK (&table->lock); } -int32_t -qr_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, fd_t *fd, dict_t *xdata) +/* To be called with priv->table.lock held */ +void +__qr_cache_prune (qr_inode_table_t *table, qr_conf_t *conf) { - uint64_t value = 0; - int32_t ret = -1; - qr_local_t *local = NULL; - qr_inode_t *qr_inode = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - call_stub_t *stub = NULL, *tmp = NULL; - char is_open = 0; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - struct list_head waiting_ops; - - GF_ASSERT (frame); - - priv = this->private; - table = &priv->table; + qr_inode_t *curr = NULL; + qr_inode_t *next = NULL; + int index = 0; + size_t size_pruned = 0; - local = frame->local; - if (local != NULL) { - is_open = local->is_open; - } - - INIT_LIST_HEAD (&waiting_ops); - - ret = fd_ctx_get (fd, this, &value); - if ((ret == -1) && (op_ret != -1)) { - op_ret = -1; - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, - "cannot find quick-read context in fd (%p) opened on " - "inode (gfid: %s)", fd, uuid_utoa (fd->inode->gfid)); - goto out; - } - - if (value) { - qr_fd_ctx = (qr_fd_ctx_t *) (long)value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - qr_fd_ctx->open_in_transit = 0; - - if (op_ret == 0) { - qr_fd_ctx->opened = 1; - } - list_splice_init (&qr_fd_ctx->waiting_ops, - &waiting_ops); - } - UNLOCK (&qr_fd_ctx->lock); - - if (local && local->is_open - && ((local->open_flags & O_TRUNC) == O_TRUNC)) { - LOCK (&table->lock); - { - ret = inode_ctx_del (fd->inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long) value; - - if (qr_inode != NULL) { - __qr_inode_free (qr_inode); - } - } - } - UNLOCK (&table->lock); - } + for (index = 0; index < conf->max_pri; index++) { + list_for_each_entry_safe (curr, next, &table->lru[index], lru) { - if (!list_empty (&waiting_ops)) { - list_for_each_entry_safe (stub, tmp, &waiting_ops, - list) { - list_del_init (&stub->list); - if (op_ret < 0) { - qr_local_t *local = NULL; + size_pruned += curr->size; - local = stub->frame->local; - local->op_ret = op_ret; - local->op_errno = op_errno; - } + __qr_inode_prune (table, curr); - call_resume (stub); - } + if (table->cache_used < conf->cache_size) + return; } } -out: - if (is_open) { - QR_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata); - } else { - STACK_DESTROY (frame->root); - } - return 0; + return; } -int32_t -qr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, dict_t *xdata) +void +qr_cache_prune (xlator_t *this) { - qr_inode_t *qr_inode = NULL; - int32_t ret = -1; - uint64_t filep = 0; - char content_cached = 0; - qr_fd_ctx_t *qr_fd_ctx = NULL, *tmp_fd_ctx = NULL; - int32_t op_ret = -1, op_errno = EINVAL; - qr_local_t *local = NULL; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this->private, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + qr_inode_table_t *table = NULL; priv = this->private; table = &priv->table; + conf = &priv->conf; - tmp_fd_ctx = qr_fd_ctx = GF_CALLOC (1, sizeof (*qr_fd_ctx), - gf_qr_mt_qr_fd_ctx_t); - if (qr_fd_ctx == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - LOCK_INIT (&qr_fd_ctx->lock); - INIT_LIST_HEAD (&qr_fd_ctx->waiting_ops); - INIT_LIST_HEAD (&qr_fd_ctx->inode_list); - qr_fd_ctx->fd = fd; - - qr_fd_ctx->path = gf_strdup (loc->path); - if (qr_fd_ctx->path == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - qr_fd_ctx->flags = flags; - - ret = fd_ctx_set (fd, this, (uint64_t)(long)qr_fd_ctx); - if (ret == -1) { - op_ret = -1; - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, - "cannot set quick-read context in " - "fd (%p) opened on inode (gfid:%s)", fd, - uuid_utoa (fd->inode->gfid)); - goto unwind; - } - - tmp_fd_ctx = NULL; - - local = qr_local_new (this); - if (local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - local->is_open = 1; - local->open_flags = flags; - frame->local = local; - LOCK (&table->lock); - { - ret = inode_ctx_get (fd->inode, this, &filep); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long) filep; - if (qr_inode) { - if (qr_inode->xattr) { - content_cached = 1; - } - } - } - } - UNLOCK (&table->lock); - - if (content_cached && (flags & O_DIRECTORY)) { - op_ret = -1; - op_errno = ENOTDIR; - gf_log (this->name, GF_LOG_WARNING, - "open with O_DIRECTORY flag received on non-directory"); - goto unwind; - } - - if (!content_cached || ((flags & O_ACCMODE) == O_WRONLY) - || ((flags & O_TRUNC) == O_TRUNC) - || ((flags & O_DIRECT) == O_DIRECT)) { - LOCK (&qr_fd_ctx->lock); - { - /* - * we really need not set this flag, since open is - * not yet unwound. - */ - - qr_fd_ctx->open_in_transit = 1; - if ((flags & O_DIRECT) == O_DIRECT) { - qr_fd_ctx->disabled = 1; - } - } - UNLOCK (&qr_fd_ctx->lock); - goto wind; - } else { - op_ret = 0; - op_errno = 0; - - LOCK (&fd->inode->lock); - { - list_add_tail (&qr_fd_ctx->inode_list, - &qr_inode->fd_list); - } - UNLOCK (&fd->inode->lock); - } - -unwind: - if (tmp_fd_ctx != NULL) { - qr_fd_ctx_free (tmp_fd_ctx); - } - - QR_STACK_UNWIND (open, frame, op_ret, op_errno, fd, NULL); - return 0; - -wind: - STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, - xdata); - return 0; -} - - -static inline time_t -qr_time_elapsed (struct timeval *now, struct timeval *then) -{ - time_t time_elapsed = 0; - - GF_VALIDATE_OR_GOTO ("quick-read", now, out); - GF_VALIDATE_OR_GOTO ("quick-read", then, out); - - time_elapsed = now->tv_sec - then->tv_sec; - -out: - return time_elapsed; + LOCK (&table->lock); + { + if (table->cache_used > conf->cache_size) + __qr_cache_prune (table, conf); + } + UNLOCK (&table->lock); } -static inline char -qr_need_validation (qr_conf_t *conf, qr_inode_t *qr_inode) +void * +qr_content_extract (dict_t *xdata) { - struct timeval now = {0, }; - char need_validation = 0; + data_t *data = NULL; + void *content = NULL; - GF_VALIDATE_OR_GOTO ("quick-read", conf, out); - GF_VALIDATE_OR_GOTO ("quick-read", qr_inode, out); + data = dict_get (xdata, GF_CONTENT_KEY); + if (!data) + return NULL; - gettimeofday (&now, NULL); + content = GF_CALLOC (1, data->len, gf_qr_mt_content_t); + if (!content) + return NULL; - if (qr_time_elapsed (&now, &qr_inode->tv) >= conf->cache_timeout) - need_validation = 1; + memcpy (content, data->data, data->len); -out: - return need_validation; + return content; } -static int32_t -qr_validate_cache_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - dict_t *xdata) +void +qr_content_update (xlator_t *this, qr_inode_t *qr_inode, void *data, + struct iatt *buf) { - qr_inode_t *qr_inode = NULL; - qr_local_t *local = NULL; - uint64_t value = 0; - int32_t ret = 0; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - call_stub_t *stub = NULL; - - GF_ASSERT (frame); - if (this == NULL) { - op_ret = -1; - op_errno = EINVAL; - gf_log (frame->this->name, GF_LOG_WARNING, - "xlator object (this) is NULL"); - goto unwind; - } - - local = frame->local; - if ((local == NULL) || ((local->fd) == NULL)) { - op_ret = -1; - op_errno = EINVAL; - gf_log (frame->this->name, GF_LOG_WARNING, - (local == NULL) ? "local is NULL" - : "fd is not stored in local"); - goto unwind; - } - - local->just_validated = 1; - - if (op_ret == -1) { - goto unwind; - } + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; priv = this->private; table = &priv->table; - LOCK (&table->lock); - { - ret = inode_ctx_get (local->fd->inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long) value; - } - - if (qr_inode != NULL) { - gettimeofday (&qr_inode->tv, NULL); - - if ((qr_inode->stbuf.ia_mtime != buf->ia_mtime) - || (qr_inode->stbuf.ia_mtime_nsec - != buf->ia_mtime_nsec)) { - inode_ctx_del (local->fd->inode, this, NULL); - __qr_inode_free (qr_inode); - } - } - } - UNLOCK (&table->lock); - - stub = local->stub; - local->stub = NULL; - - call_resume (stub); - - return 0; - -unwind: - /* this is actually unwind of readv */ - QR_STACK_UNWIND (readv, frame, op_ret, op_errno, NULL, -1, NULL, NULL, - NULL); - return 0; -} - - -int32_t -qr_validate_cache_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - dict_t *xdata) -{ - qr_local_t *local = NULL; - int32_t op_ret = -1, op_errno = -1; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, out); - - local = frame->local; - if (local == NULL) { - op_ret = -1; - op_errno = EINVAL; - } else { - op_ret = local->op_ret; - op_errno = local->op_errno; - } - -out: - if (op_ret == -1) { - qr_validate_cache_cbk (frame, NULL, this, op_ret, op_errno, - NULL, NULL); - } else { - STACK_WIND (frame, qr_validate_cache_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd, xdata); - } - - return 0; -} - - -int -qr_validate_cache (call_frame_t *frame, xlator_t *this, fd_t *fd, - call_stub_t *stub) -{ - int ret = -1; - int flags = 0; - uint64_t value = 0; - loc_t loc = {0, }; - char *path = NULL; - qr_local_t *local = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - call_stub_t *validate_stub = NULL; - char need_open = 0, can_wind = 0, validate_cbk_called = 0; - call_frame_t *open_frame = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, out); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, out); - GF_VALIDATE_OR_GOTO (frame->this->name, stub, out); - - if (frame->local == NULL) { - local = qr_local_new (this); - if (local == NULL) { - goto out; - } - } else { - local = frame->local; - } - - local->fd = fd; - local->stub = stub; - frame->local = local; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - validate_stub = fop_fstat_stub (frame, - qr_validate_cache_helper, - fd, NULL); - if (validate_stub == NULL) { - ret = -1; - if (need_open) { - qr_fd_ctx->open_in_transit = 0; - } - goto unlock; - } - - list_add_tail (&validate_stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - - if (ret == -1) { - goto out; - } - } else { - can_wind = 1; - } + LOCK (&table->lock); + { + __qr_inode_prune (table, qr_inode); - if (need_open) { - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - validate_cbk_called = 1; - goto out; - } + qr_inode->data = data; + qr_inode->size = buf->ia_size; - ret = qr_loc_fill (&loc, fd->inode, path); - if (ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - validate_cbk_called = 1; - STACK_DESTROY (open_frame->root); - goto out; - } + qr_inode->ia_mtime = buf->ia_mtime; + qr_inode->ia_mtime_nsec = buf->ia_mtime_nsec; - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, - &loc, flags, fd, NULL); + qr_inode->buf = *buf; - qr_loc_wipe (&loc); - } else if (can_wind) { - STACK_WIND (frame, qr_validate_cache_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd, NULL); - } + gettimeofday (&qr_inode->last_refresh, NULL); - ret = 0; -out: - if ((ret < 0) && !validate_cbk_called) { - if (frame->local == NULL) { - call_stub_destroy (stub); - } + __qr_inode_register (table, qr_inode); + } + UNLOCK (&table->lock); - qr_validate_cache_cbk (frame, NULL, this, -1, errno, NULL, NULL); - } - return ret; + qr_cache_prune (this); } -int32_t -qr_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iovec *vector, int32_t count, - struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) +gf_boolean_t +qr_size_fits (qr_conf_t *conf, struct iatt *buf) { - GF_ASSERT (frame); - - QR_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, - stbuf, iobref, xdata); - return 0; + return (buf->ia_size <= conf->max_file_size); } -int32_t -qr_readv_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t flags, dict_t *xdata) +gf_boolean_t +qr_mtime_equal (qr_inode_t *qr_inode, struct iatt *buf) { - qr_local_t *local = NULL; - int32_t op_errno = EINVAL, ret = 0; - uint64_t value = 0; - qr_fd_ctx_t *fdctx = NULL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding read call", - fdctx ? fdctx->path : NULL, strerror (errno)); - goto unwind; - } - - STACK_WIND (frame, qr_readv_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->readv, fd, size, offset, flags, - xdata); - return 0; - -unwind: - QR_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL); - return 0; + return (qr_inode->ia_mtime == buf->ia_mtime && + qr_inode->ia_mtime_nsec == buf->ia_mtime_nsec); } -int32_t -qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset, uint32_t read_flags, dict_t *xdata) +void +__qr_content_refresh (xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf) { - qr_inode_t *qr_inode = NULL; - int32_t ret = -1, op_ret = -1, op_errno = -1; - uint64_t value = 0; - int count = -1, flags = 0, i = 0; - char content_cached = 0, need_validation = 0; - char need_open = 0, can_wind = 0, need_unwind = 0; - struct iobuf *iobuf = NULL; - struct iobref *iobref = NULL; - struct iatt stbuf = {0, }; - data_t *content = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - call_stub_t *stub = NULL; - loc_t loc = {0, }; - qr_conf_t *conf = NULL; - struct iovec *vector = NULL; - char *path = NULL; - off_t start = 0, end = 0; - size_t len = 0; - struct iobuf_pool *iobuf_pool = NULL; - qr_local_t *local = NULL; - char just_validated = 0; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - call_frame_t *open_frame = NULL; - - op_ret = 0; + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + qr_conf_t *conf = NULL; priv = this->private; - conf = &priv->conf; table = &priv->table; + conf = &priv->conf; - local = frame->local; - - if (local != NULL) { - just_validated = local->just_validated; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - if (qr_fd_ctx != NULL) { - if (qr_fd_ctx->disabled) { - goto out; - } - } - } - - iobuf_pool = this->ctx->iobuf_pool; - - LOCK (&table->lock); - { - ret = inode_ctx_get (fd->inode, this, &value); - if (ret) - goto unlock; - - qr_inode = (qr_inode_t *)(long)value; - if (!qr_inode || !qr_inode->xattr) - goto unlock; - - if (!just_validated - && qr_need_validation (conf, qr_inode)) { - need_validation = 1; - goto unlock; - } - - content = dict_get (qr_inode->xattr, GF_CONTENT_KEY); - - stbuf = qr_inode->stbuf; - content_cached = 1; - list_move_tail (&qr_inode->lru, - &table->lru[qr_inode->priority]); - - if (offset > content->len) { - op_ret = 0; - end = content->len; - } else { - if ((offset + size) > content->len) { - op_ret = content->len - offset; - end = content->len; - } else { - op_ret = size; - end = offset + size; - } - } - - count = (op_ret / iobuf_pool->default_page_size); - if ((op_ret % iobuf_pool->default_page_size) != 0) { - count++; - } - - if (count == 0) { - op_ret = 0; - goto unlock; - } - - vector = GF_CALLOC (count, sizeof (*vector), gf_qr_mt_iovec); - if (vector == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - goto unlock; - } - - iobref = iobref_new (); - if (iobref == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - goto unlock; - } - - for (i = 0; i < count; i++) { - /* TODO: Now that we have support for variable - io-buf-sizes, i guess we need to get rid of - default size here */ - iobuf = iobuf_get (iobuf_pool); - if (iobuf == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - goto unlock; - } - - start = offset + (iobuf_pool->default_page_size * i); - - if (start > end) { - len = 0; - } else { - len = (iobuf_pool->default_page_size > - ((end - start)) ? (end - start) : - iobuf_pool->default_page_size); - - memcpy (iobuf->ptr, content->data + start, len); - } - - iobref_add (iobref, iobuf); - iobuf_unref (iobuf); + if (qr_size_fits (conf, buf) && qr_mtime_equal (qr_inode, buf)) { + qr_inode->buf = *buf; - vector[i].iov_base = iobuf->ptr; - vector[i].iov_len = len; - } - } -unlock: - UNLOCK (&table->lock); - -out: - if (content_cached || need_unwind) { - QR_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, - count, &stbuf, iobref, NULL); - - } else if (need_validation) { - stub = fop_readv_stub (frame, qr_readv, fd, size, offset, - read_flags, xdata); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - qr_validate_cache (frame, this, fd, stub); - } else { - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - if (frame->local == NULL) { - frame->local - = qr_local_new (this); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto fdctx_unlock; - } - } - - stub = fop_readv_stub (frame, - qr_readv_helper, - fd, size, - offset, - read_flags, xdata); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto fdctx_unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - fdctx_unlock: - UNLOCK (&qr_fd_ctx->lock); - - if (op_ret == -1) { - need_unwind = 1; - goto out; - } - } else { - can_wind = 1; - } - - if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } + gettimeofday (&qr_inode->last_refresh, NULL); - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, - &loc, flags, fd, NULL); + __qr_inode_register (table, qr_inode); + } else { + __qr_inode_prune (table, qr_inode); + } - qr_loc_wipe (&loc); - } else if (can_wind) { - STACK_WIND (frame, qr_readv_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->readv, fd, size, - offset, read_flags, xdata); - } - } - -ret: - GF_FREE (vector); - - if (iobref) { - iobref_unref (iobref); - } - - return 0; + return; } -int32_t -qr_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) -{ - GF_ASSERT (frame); - QR_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf, - xdata); - return 0; -} - - -int32_t -qr_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t off, - uint32_t flags, struct iobref *iobref, dict_t *xdata) -{ - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding write call", - fdctx ? fdctx->path : NULL, strerror (errno)); - goto unwind; - } - - STACK_WIND (frame, qr_writev_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->writev, fd, vector, count, off, - flags, iobref, xdata); - return 0; - -unwind: - QR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - - -int32_t -qr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, - int32_t count, off_t off, uint32_t wr_flags, struct iobref *iobref, - dict_t *xdata) +void +qr_content_refresh (xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf) { - uint64_t value = 0; - int flags = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_inode_t *qr_inode = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t op_ret = -1, op_errno = -1, ret = -1; - char can_wind = 0, need_unwind = 0, need_open = 0; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - call_frame_t *open_frame = NULL; + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; priv = this->private; table = &priv->table; - ret = fd_ctx_get (fd, this, &value); - - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } - - LOCK (&table->lock); - { - ret = inode_ctx_get (fd->inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long)value; - if (qr_inode != NULL) { - inode_ctx_del (fd->inode, this, NULL); - __qr_inode_free (qr_inode); - } - } - } - UNLOCK (&table->lock); - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = qr_local_new (this); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_writev_stub (frame, qr_writev_helper, - fd, vector, count, off, - wr_flags, iobref, xdata); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - - if (need_unwind) { - QR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_writev_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->writev, fd, vector, count, - off, wr_flags, iobref, xdata); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - NULL); - - qr_loc_wipe (&loc); - } - -ret: - return 0; -} - - -int32_t -qr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *buf, dict_t *xdata) -{ - QR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata); - return 0; + LOCK (&table->lock); + { + __qr_content_refresh (this, qr_inode, buf); + } + UNLOCK (&table->lock); } -int32_t -qr_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +gf_boolean_t +__qr_cache_is_fresh (xlator_t *this, qr_inode_t *qr_inode) { - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; + qr_conf_t *conf = NULL; + qr_private_t *priv = NULL; + struct timeval now; + struct timeval diff; - GF_ASSERT (frame); + priv = this->private; + conf = &priv->conf; - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); + gettimeofday (&now, NULL); - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } + timersub (&now, &qr_inode->last_refresh, &diff); - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding fstat call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, qr_fstat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd, xdata); - return 0; + if (diff.tv_sec >= conf->cache_timeout) + return _gf_false; -unwind: - QR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL); - return 0; + return _gf_true; } -int32_t -qr_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) -{ - qr_fd_ctx_t *qr_fd_ctx = NULL; - char need_open = 0, can_wind = 0, need_unwind = 0; - uint64_t value = 0; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - call_stub_t *stub = NULL; - loc_t loc = {0, }; - char *path = NULL; - int flags = 0; - call_frame_t *open_frame = NULL; - - GF_ASSERT (frame); - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "fd is NULL"); - need_unwind = 1; - goto unwind; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = qr_local_new (this); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_fstat_stub (frame, qr_fstat_helper, - fd, xdata); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - -unwind: - if (need_unwind) { - QR_STACK_UNWIND (fstat, frame, op_ret, op_errno, NULL, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_fstat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd, xdata); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - NULL); - - qr_loc_wipe (&loc); - } - -ret: - return 0; -} - - -int32_t -qr_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop, dict_t *xdata) -{ - GF_ASSERT (frame); - QR_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, preop, postop, - xdata); - return 0; -} - - -int32_t -qr_fsetattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid, dict_t *xdata) -{ - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding fsetattr " - "call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, qr_fsetattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, - valid, xdata); - return 0; - -unwind: - QR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - - -int32_t -qr_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid, dict_t *xdata) -{ - uint64_t value = 0; - int flags = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char need_open = 0, can_wind = 0, need_unwind = 0; - call_frame_t *open_frame = NULL; - - GF_ASSERT (frame); - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" : - "fd is NULL"); - need_unwind = 1; - goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = qr_local_new (this); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_fsetattr_stub (frame, - qr_fsetattr_helper, - fd, stbuf, valid, xdata); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - -out: - if (need_unwind) { - QR_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, NULL, NULL, - NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_fsetattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, - valid, xdata); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - NULL); - - qr_loc_wipe (&loc); - } - -ret: - return 0; -} - - -int32_t -qr_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - GF_ASSERT (frame); - QR_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata); - return 0; -} - - -int32_t -qr_fsetxattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - dict_t *dict, int32_t flags, dict_t *xdata) -{ - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding fsetxattr " - "call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, qr_fsetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags, - xdata); - return 0; - -unwind: - QR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL); - return 0; -} - - -int32_t -qr_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags, dict_t *xdata) -{ - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - int open_flags = 0; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char need_open = 0, can_wind = 0, need_unwind = 0; - call_frame_t *open_frame = NULL; - - GF_ASSERT (frame); - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) " - "is NULL" : "fd is NULL"); - need_unwind = 1; - goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - open_flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = qr_local_new (this); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_fsetxattr_stub (frame, - qr_fsetxattr_helper, - fd, dict, flags, xdata); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - -out: - if (need_unwind) { - QR_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_fsetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetxattr, fd, dict, - flags, xdata); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, open_flags, - fd, NULL); - - qr_loc_wipe (&loc); - } - -ret: - return 0; -} - - -int32_t -qr_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) -{ - GF_ASSERT (frame); - QR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata); - return 0; -} - - -int32_t -qr_fgetxattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name, dict_t *xdata) +int +qr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode_ret, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding fgetxattr " - "call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, qr_fgetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fgetxattr, fd, name, xdata); - return 0; - -unwind: - QR_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL); - return 0; -} + void *content = NULL; + qr_inode_t *qr_inode = NULL; + inode_t *inode = NULL; + inode = frame->local; + frame->local = NULL; -int32_t -qr_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, - dict_t *xdata) -{ - int flags = 0; - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char need_open = 0, can_wind = 0, need_unwind = 0; - call_frame_t *open_frame = NULL; - - /* - * FIXME: Can quick-read use the extended attributes stored in the - * cache? this needs to be discussed. - */ - - GF_ASSERT (frame); - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" : - "fd is NULL"); - need_unwind = 1; + if (op_ret == -1) { + qr_inode_prune (this, inode); goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = qr_local_new (this); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_fgetxattr_stub (frame, - qr_fgetxattr_helper, - fd, name, xdata); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - + } + + if (dict_get (xdata, "sh-failed")) { + qr_inode_prune (this, inode); + goto out; + } + + content = qr_content_extract (xdata); + + if (content) { + /* new content came along, always replace old content */ + qr_inode = qr_inode_ctx_get_or_new (this, inode); + if (!qr_inode) + /* no harm done */ + goto out; + + qr_content_update (this, qr_inode, content, buf); + } else { + /* purge old content if necessary */ + qr_inode = qr_inode_ctx_get (this, inode); + if (!qr_inode) + /* usual path for large files */ + goto out; + + qr_content_refresh (this, qr_inode, buf); + } out: - if (need_unwind) { - QR_STACK_UNWIND (open, frame, op_ret, op_errno, NULL, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_fgetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fgetxattr, fd, name, xdata); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - NULL); - - qr_loc_wipe (&loc); - } - -ret: - return 0; -} - - -int32_t -qr_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, dict_t *xdata) -{ - GF_ASSERT (frame); - QR_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata); - return 0; -} - - -int32_t -qr_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) -{ - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding flush call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } + if (inode) + inode_unref (inode); - STACK_WIND (frame, qr_flush_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->flush, fd, xdata); - return 0; - -unwind: - QR_STACK_UNWIND (flush, frame, -1, op_errno, NULL); + STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode_ret, + buf, xdata, postparent); return 0; } -int32_t -qr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +int +qr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - uint64_t value = 0; - call_stub_t *stub = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char can_wind = 0, need_unwind = 0; - - GF_ASSERT (frame); - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "fd is NULL"); - need_unwind = 1; - goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + qr_inode_t *qr_inode = NULL; + int ret = -1; + dict_t *new_xdata = NULL; - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - if (qr_fd_ctx->opened) { - can_wind = 1; - } else if (qr_fd_ctx->open_in_transit) { - frame->local = qr_local_new (this); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_flush_stub (frame, qr_flush_helper, - fd, xdata); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } else { - op_ret = 0; - need_unwind = 1; - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } + priv = this->private; + conf = &priv->conf; -out: - if (need_unwind) { - QR_STACK_UNWIND (flush, frame, op_ret, op_errno, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_flush_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->flush, fd, xdata); - } + qr_inode = qr_inode_ctx_get (this, loc->inode); + if (qr_inode && qr_inode->data) + /* cached. only validate in qr_lookup_cbk */ + goto wind; + + if (!xdata) + xdata = new_xdata = dict_new (); + + if (!xdata) + goto wind; + + ret = 0; + if (conf->max_file_size) + ret = dict_set (xdata, GF_CONTENT_KEY, + data_from_uint64 (conf->max_file_size)); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "cannot set key in request dict (%s)", + loc->path); +wind: + frame->local = inode_ref (loc->inode); - return 0; -} + STACK_WIND (frame, qr_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + if (new_xdata) + dict_unref (new_xdata); -int32_t -qr_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) -{ - GF_ASSERT (frame); - QR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, xdata); return 0; } -int32_t -qr_fentrylk_helper (call_frame_t *frame, xlator_t *this, const char *volume, - fd_t *fd, const char *basename, entrylk_cmd cmd, - entrylk_type type, dict_t *xdata) +int +qr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata) { - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; + gf_dirent_t *entry = NULL; + qr_inode_t *qr_inode = NULL; - GF_ASSERT (frame); + if (op_ret <= 0) + goto unwind; - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); + list_for_each_entry (entry, &entries->list, list) { + if (!entry->inode) + continue; - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } + qr_inode = qr_inode_ctx_get (this, entry->inode); + if (!qr_inode) + /* no harm */ + continue; - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding fentrylk " - "call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; + qr_content_refresh (this, qr_inode, &entry->d_stat); } - STACK_WIND(frame, qr_fentrylk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fentrylk, volume, fd, basename, - cmd, type, xdata); - return 0; - unwind: - QR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL); - return 0; -} - - -int32_t -qr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, - const char *basename, entrylk_cmd cmd, entrylk_type type, - dict_t *xdata) -{ - int flags = 0; - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char need_open = 0, can_wind = 0, need_unwind = 0; - call_frame_t *open_frame = NULL; - - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "fd is NULL"); - need_unwind = 1; - goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = qr_local_new (this); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_fentrylk_stub (frame, - qr_fentrylk_helper, - volume, fd, basename, - cmd, type, xdata); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - -out: - if (need_unwind) { - QR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_fentrylk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fentrylk, volume, fd, - basename, cmd, type, xdata); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - NULL); - - qr_loc_wipe (&loc); - } - -ret: - return 0; + STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata); + return 0; } -int32_t -qr_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +int +qr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, + size_t size, off_t offset, dict_t *xdata) { - GF_ASSERT (frame); - QR_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata); - return 0; + STACK_WIND (frame, qr_readdirp_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->readdirp, + fd, size, offset, xdata); + return 0; } -int32_t -qr_finodelk_helper (call_frame_t *frame, xlator_t *this, const char *volume, - fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata) -{ - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding finodelk " - "call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, qr_finodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, volume, fd, cmd, lock, - xdata); - return 0; - -unwind: - QR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL); - return 0; -} - - -int32_t -qr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, - int32_t cmd, struct gf_flock *lock, dict_t *xdata) -{ - int flags = 0; - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char need_open = 0, can_wind = 0, need_unwind = 0; - call_frame_t *open_frame = NULL; - - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "fd is NULL"); - need_unwind = 1; - goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = qr_local_new (this); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_finodelk_stub (frame, - qr_finodelk_helper, - volume, fd, cmd, - lock, xdata); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - -out: - if (need_unwind) { - QR_STACK_UNWIND (finodelk, frame, op_ret, op_errno, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_finodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, volume, fd, - cmd, lock, xdata); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } +int +qr_readv_cached (call_frame_t *frame, qr_inode_t *qr_inode, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + xlator_t *this = NULL; + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + int op_ret = -1; + struct iobuf *iobuf = NULL; + struct iobref *iobref = NULL; + struct iovec iov = {0, }; + struct iatt buf = {0, }; + + this = frame->this; + priv = this->private; + table = &priv->table; + + LOCK (&table->lock); + { + op_ret = -1; + + if (!qr_inode->data) + goto unlock; - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - NULL); + if (offset >= qr_inode->size) + goto unlock; - qr_loc_wipe (&loc); - } + if (!__qr_cache_is_fresh (this, qr_inode)) + goto unlock; -ret: - return 0; -} + op_ret = min (size, (qr_inode->size - offset)); + iobuf = iobuf_get2 (this->ctx->iobuf_pool, op_ret); + if (!iobuf) { + op_ret = -1; + goto unlock; + } -int32_t -qr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, - dict_t *xdata) -{ - GF_ASSERT (frame); - QR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata); - return 0; -} + iobref = iobref_new (); + if (!iobref) { + op_ret = -1; + iobuf_unref (iobuf); + goto unlock; + } + iobref_add (iobref, iobuf); -int32_t -qr_fsync_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, - dict_t *xdata) -{ - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; + memcpy (iobuf->ptr, qr_inode->data + offset, op_ret); - GF_ASSERT (frame); + buf = qr_inode->buf; - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); + /* bump LRU */ + __qr_inode_register (table, qr_inode); + } +unlock: + UNLOCK (&table->lock); - if (local->op_ret < 0) { - op_errno = local->op_errno; + if (op_ret > 0) { + iov.iov_base = iobuf->ptr; + iov.iov_len = op_ret; - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } + STACK_UNWIND_STRICT (readv, frame, op_ret, 0, &iov, 1, + &buf, iobref, xdata); + } - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding fsync call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } + if (iobuf) + iobuf_unref (iobuf); - STACK_WIND (frame, qr_fsync_cbk, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->fsync, fd, flags, xdata); - return 0; + if (iobref) + iobref_unref (iobref); -unwind: - QR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + return op_ret; } -int32_t -qr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, - dict_t *xdata) +int +qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - int open_flags = 0; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char need_open = 0, can_wind = 0, need_unwind = 0; - call_frame_t *open_frame = NULL; - - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "fd is NULL"); - need_unwind = 1; - goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - open_flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } + qr_inode_t *qr_inode = NULL; - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = qr_local_new (this); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_fsync_stub (frame, qr_fsync_helper, - fd, flags, xdata); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - -out: - if (need_unwind) { - QR_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_fsync_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsync, fd, flags, xdata); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } + qr_inode = qr_inode_ctx_get (this, fd->inode); + if (!qr_inode) + goto wind; - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, open_flags, - fd, NULL); + if (qr_readv_cached (frame, qr_inode, size, offset, flags, xdata) <= 0) + goto wind; - qr_loc_wipe (&loc); - } - -ret: - return 0; + return 0; +wind: + STACK_WIND (frame, default_readv_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv, + fd, size, offset, flags, xdata); + return 0; } -int32_t -qr_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +int +qr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov, + int count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) { - int32_t ret = 0; - uint64_t value = 0; - qr_inode_t *qr_inode = NULL; - qr_local_t *local = NULL; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; + qr_inode_prune (this, fd->inode); - GF_ASSERT (frame); - - if (op_ret == -1) { - goto out; - } - - local = frame->local; - if ((local == NULL) || (local->fd == NULL) - || (local->fd->inode == NULL)) { - op_ret = -1; - op_errno = EINVAL; - gf_log (frame->this->name, GF_LOG_WARNING, "cannot get inode"); - goto out; - } - - if ((this == NULL) || (this->private == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "cannot get quick read configuration from xlator " - "object"); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - priv = this->private; - table = &priv->table; - - LOCK (&table->lock); - { - ret = inode_ctx_get (local->fd->inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long) value; - - if (qr_inode) { - if (qr_inode->stbuf.ia_size != postbuf->ia_size) - { - inode_ctx_del (local->fd->inode, this, - NULL); - __qr_inode_free (qr_inode); - } - } - } - } - UNLOCK (&table->lock); - -out: - QR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, - postbuf, xdata); - return 0; + STACK_WIND (frame, default_writev_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev, + fd, iov, count, offset, flags, iobref, xdata); + return 0; } -int32_t -qr_ftruncate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset, dict_t *xdata) +int +qr_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } + qr_inode_prune (this, loc->inode); - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding ftruncate " - "call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, qr_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); - return 0; - -unwind: - QR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL); - return 0; + STACK_WIND (frame, default_truncate_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate, + loc, offset, xdata); + return 0; } -int32_t +int qr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - dict_t *xdata) -{ - int flags = 0; - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_local_t *local = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char need_open = 0, can_wind = 0, need_unwind = 0; - call_frame_t *open_frame = NULL; - - GF_ASSERT (frame); - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "fd is NULL"); - need_unwind = 1; - goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } - - local = qr_local_new (this); - if (local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - goto out; - } - - local->fd = fd; - frame->local = local; - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - stub = fop_ftruncate_stub (frame, - qr_ftruncate_helper, - fd, offset, xdata); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - -out: - if (need_unwind) { - QR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, - NULL, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - NULL); - - qr_loc_wipe (&loc); - } - -ret: - return 0; -} - - -int32_t -qr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct gf_flock *lock, dict_t *xdata) + dict_t *xdata) { - GF_ASSERT (frame); - QR_STACK_UNWIND (lk, frame, op_ret, op_errno, lock, xdata); - return 0; -} + qr_inode_prune (this, fd->inode); - -int32_t -qr_lk_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, - struct gf_flock *lock, dict_t *xdata) -{ - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding lk call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, qr_lk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata); - - return 0; - -unwind: - QR_STACK_UNWIND (lk, frame, -1, op_errno, lock, xdata); - return 0; -} - - -int32_t -qr_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, - struct gf_flock *lock, dict_t *xdata) -{ - int flags = 0; - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char need_open = 0, can_wind = 0, need_unwind = 0; - call_frame_t *open_frame = NULL; - - GF_ASSERT (frame); - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "fd is NULL"); - need_unwind = 1; - goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = qr_local_new (this); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_lk_stub (frame, qr_lk_helper, fd, - cmd, lock, xdata); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - -out: - if (need_unwind) { - QR_STACK_UNWIND (lk, frame, op_ret, op_errno, NULL, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_lk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - NULL); - - qr_loc_wipe (&loc); - } - -ret: - return 0; + STACK_WIND (frame, default_ftruncate_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate, + fd, offset, xdata); + return 0; } -int32_t -qr_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent, dict_t *xdata) -{ - QR_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent, - postparent, xdata); - return 0; -} - - -int32_t -qr_unlink_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata) -{ - qr_local_t *local = NULL; - uint32_t open_count = 0; - qr_unlink_ctx_t *unlink_ctx = NULL, *tmp = NULL; - - local = frame->local; - - LOCK (&local->lock); - { - open_count = --local->open_count; - } - UNLOCK (&local->lock); - - if (open_count > 0) { - goto out; - } - - list_for_each_entry_safe (unlink_ctx, tmp, &local->list, list) { - fd_unref (unlink_ctx->fdctx->fd); - list_del_init (&unlink_ctx->list); - GF_FREE (unlink_ctx); - } - - if (local->op_ret < 0) { - /* unwind even if we couldn't open one fd */ - QR_STACK_UNWIND (unlink, frame, -1, local->op_errno, NULL, NULL, - NULL); - } else { - STACK_WIND (frame, qr_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); - } - -out: - return 0; -} - - -qr_unlink_ctx_t * -qr_unlink_ctx_new () -{ - qr_unlink_ctx_t *ctx = NULL; - - ctx = GF_CALLOC (1, sizeof (*ctx), gf_qr_mt_qr_unlink_ctx_t); - if (ctx == NULL) { - goto out; - } - - INIT_LIST_HEAD (&ctx->list); -out: - return ctx; -} - - -int32_t -qr_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag, - dict_t *xdata) -{ - int32_t op_errno = -1, ret = -1, op_ret = -1; - uint64_t value = 0; - char need_open = 0; - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - call_frame_t *open_frame = NULL; - call_stub_t *stub = NULL; - qr_inode_t *qr_inode = NULL; - uint32_t open_count = 0; - qr_unlink_ctx_t *unlink_ctx = NULL; - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(unsigned long)value; - } - - if (qr_inode == NULL) { - goto wind; - } - - local = qr_local_new (this); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind, op_errno, - ENOMEM); - - frame->local = local; - - op_ret = 0; - - LOCK (&loc->inode->lock); - { - list_for_each_entry (fdctx, &qr_inode->fd_list, inode_list) { - need_open = 0; - - LOCK (&fdctx->lock); - { - if (qr_inode->stbuf.ia_nlink == 1) { - fdctx->disabled = 1; - } - - if ((fdctx->opened) - || (strcmp (loc->path, fdctx->path) != 0)) { - goto unlock; - } - - if (!(fdctx->opened - || fdctx->open_in_transit)) { - need_open = 1; - fdctx->open_in_transit = 1; - } - - if (!fdctx->opened) { - unlink_ctx = qr_unlink_ctx_new (); - if (unlink_ctx == NULL) { - op_ret = -1; - op_errno = ENOMEM; - fdctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_unlink_stub (frame, - qr_unlink_helper, - loc, xflag, - xdata); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - fdctx->open_in_transit = 0; - GF_FREE (unlink_ctx); - goto unlock; - } - - list_add_tail (&stub->list, - &fdctx->waiting_ops); - - local->open_count++; - - unlink_ctx->need_open = need_open; - __fd_ref (fdctx->fd); - unlink_ctx->fdctx = fdctx; - list_add_tail (&unlink_ctx->list, - &local->list); - } - } - unlock: - UNLOCK (&fdctx->lock); - - if (op_ret == -1) { - break; - } - } - - open_count = local->open_count; - } - UNLOCK (&loc->inode->lock); - - if (op_ret == -1) { - goto unwind; - } - - if (open_count == 0) { - goto wind; - } - - /* no need to hold local->lock, since we are gaurded by condition - * local->open_count cannot be zero till we send open on - * all the required fds. qr_unlink_helper will not modify - * local->list till local->open_count becomes 0. - */ - list_for_each_entry (unlink_ctx, &local->list, list) { - if (!unlink_ctx->need_open) { - continue; - } - - fdctx = unlink_ctx->fdctx; - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (fdctx, -1, ENOMEM); - continue; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, - loc, fdctx->flags, fdctx->fd, fdctx->xdata); - } - - return 0; - -unwind: - if (local && !list_empty (&local->list)) { - list_for_each_entry (unlink_ctx, &local->list, list) { - qr_resume_pending_ops (unlink_ctx->fdctx, -1, op_errno); - } - } else { - QR_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); - } - - return 0; - -wind: - STACK_WIND (frame, qr_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata); - return 0; -} - - -int32_t -qr_release (xlator_t *this, fd_t *fd) +int +qr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + fd_t *fd, dict_t *xdata) { - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = 0; - uint64_t value = 0; - - GF_VALIDATE_OR_GOTO ("quick-read", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); + qr_inode_set_priority (this, fd->inode, loc->path); - ret = fd_ctx_del (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - if (qr_fd_ctx) { - qr_fd_ctx_free (qr_fd_ctx); - } - } - -out: - return 0; + STACK_WIND (frame, default_open_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->open, + loc, flags, fd, xdata); + return 0; } - -int32_t +int qr_forget (xlator_t *this, inode_t *inode) { qr_inode_t *qr_inode = NULL; - uint64_t value = 0; - int32_t ret = -1; - qr_private_t *priv = NULL; - GF_VALIDATE_OR_GOTO ("quick-read", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); + qr_inode = qr_inode_ctx_get (this, inode); - priv = this->private; + if (!qr_inode) + return 0; - LOCK (&priv->table.lock); - { - ret = inode_ctx_del (inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long) value; - __qr_inode_free (qr_inode); - } - } - UNLOCK (&priv->table.lock); + qr_inode_prune (this, inode); -out: - return 0; + GF_FREE (qr_inode); + + return 0; } @@ -3427,32 +695,25 @@ int32_t qr_inodectx_dump (xlator_t *this, inode_t *inode) { qr_inode_t *qr_inode = NULL; - uint64_t value = 0; int32_t ret = -1; char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; char buf[256] = {0, }; - ret = inode_ctx_get (inode, this, &value); - if (ret != 0) { + qr_inode = qr_inode_ctx_get (this, inode); + if (!qr_inode) goto out; - } - - qr_inode = (qr_inode_t *)(long)value; - if (qr_inode == NULL) { - goto out; - } gf_proc_dump_build_key (key_prefix, "xlator.performance.quick-read", "inodectx"); gf_proc_dump_add_section (key_prefix); - gf_proc_dump_write ("entire-file-cached", "%s", qr_inode->xattr ? "yes" : "no"); + gf_proc_dump_write ("entire-file-cached", "%s", qr_inode->data ? "yes" : "no"); - if (qr_inode->tv.tv_sec) { - gf_time_fmt (buf, sizeof buf, qr_inode->tv.tv_sec, + if (qr_inode->last_refresh.tv_sec) { + gf_time_fmt (buf, sizeof buf, qr_inode->last_refresh.tv_sec, gf_timefmt_FT); snprintf (buf + strlen (buf), sizeof buf - strlen (buf), - ".%"GF_PRI_SUSECONDS, qr_inode->tv.tv_usec); + ".%"GF_PRI_SUSECONDS, qr_inode->last_refresh.tv_usec); gf_proc_dump_write ("last-cache-validation-time", "%s", buf); } @@ -3462,77 +723,6 @@ out: return ret; } -int32_t -qr_fdctx_dump (xlator_t *this, fd_t *fd) -{ - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0, i = 0; - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; - char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; - call_stub_t *stub = NULL; - gf_boolean_t add_section = _gf_false; - - ret = fd_ctx_get (fd, this, &value); - if (ret != 0) { - goto out; - } - - fdctx = (qr_fd_ctx_t *)(long)value; - if (fdctx == NULL) { - goto out; - } - - gf_proc_dump_build_key (key_prefix, "xlator.performance.quick-read", - "fdctx"); - gf_proc_dump_add_section (key_prefix); - add_section = _gf_true; - - gf_proc_dump_write ("fd", "%p", fd); - - ret = TRY_LOCK (&fdctx->lock); - if (ret) - goto out; - { - gf_proc_dump_write ("path", "%s", fdctx->path); - - gf_proc_dump_write ("opened", "%s", fdctx->opened ? "yes" : "no"); - - gf_proc_dump_write ("open-in-progress", "%s", fdctx->open_in_transit ? - "yes" : "no"); - - gf_proc_dump_write ("caching disabled (for this fd)", "%s", - fdctx->disabled ? "yes" : "no"); - - gf_proc_dump_write ("flags", "%d", fdctx->flags); - - list_for_each_entry (stub, &fdctx->waiting_ops, list) { - gf_proc_dump_build_key (key, "", - "waiting-ops[%d].frame", i); - gf_proc_dump_write (key, "%"PRId64, - stub->frame->root->unique); - - gf_proc_dump_build_key (key, "", - "waiting-ops[%d].fop", i); - gf_proc_dump_write (key, "%s", gf_fop_list[stub->fop]); - - i++; - } - } - UNLOCK (&fdctx->lock); - - ret = 0; -out: - if (ret && fdctx) { - if (add_section == _gf_false) - gf_proc_dump_add_section (key_prefix); - - gf_proc_dump_write ("Unable to dump the state of fdctx", - "(Lock acquistion failed) fd: %p, gfid: %s", - fd, uuid_utoa (fd->inode->gfid)); - } - return ret; -} int qr_priv_dump (xlator_t *this) @@ -3553,14 +743,11 @@ qr_priv_dump (xlator_t *this) priv = this->private; conf = &priv->conf; - if (!conf) { - gf_log (this->name, GF_LOG_WARNING, "conf null in xlator"); + if (!conf) return -1; - } table = &priv->table; - gf_proc_dump_build_key (key_prefix, "xlator.performance.quick-read", "priv"); @@ -3570,13 +757,12 @@ qr_priv_dump (xlator_t *this) gf_proc_dump_write ("cache_timeout", "%d", conf->cache_timeout); if (!table) { - gf_log (this->name, GF_LOG_WARNING, "table is NULL"); goto out; } else { for (i = 0; i < conf->max_pri; i++) { list_for_each_entry (curr, &table->lru[i], lru) { file_count++; - total_size += curr->stbuf.ia_size; + total_size += curr->size; } } } @@ -3652,6 +838,7 @@ reconfigure (xlator_t *this, dict_t *options) qr_private_t *priv = NULL; qr_conf_t *conf = NULL; uint64_t cache_size_new = 0; + GF_VALIDATE_OR_GOTO ("quick-read", this, out); GF_VALIDATE_OR_GOTO (this->name, this->private, out); GF_VALIDATE_OR_GOTO (this->name, options, out); @@ -3845,14 +1032,6 @@ init (xlator_t *this) INIT_LIST_HEAD (&priv->table.lru[i]); } - this->local_pool = mem_pool_new (qr_local_t, 64); - if (!this->local_pool) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, - "failed to create local_t's memory pool"); - goto out; - } - ret = 0; this->private = priv; @@ -3924,31 +1103,21 @@ out: struct xlator_fops fops = { .lookup = qr_lookup, + .readdirp = qr_readdirp, .open = qr_open, .readv = qr_readv, - .writev = qr_writev, - .fstat = qr_fstat, - .fsetxattr = qr_fsetxattr, - .fgetxattr = qr_fgetxattr, - .flush = qr_flush, - .fentrylk = qr_fentrylk, - .finodelk = qr_finodelk, - .fsync = qr_fsync, - .ftruncate = qr_ftruncate, - .lk = qr_lk, - .fsetattr = qr_fsetattr, - .unlink = qr_unlink, + .writev = qr_writev, + .truncate = qr_truncate, + .ftruncate = qr_ftruncate }; struct xlator_cbks cbks = { .forget = qr_forget, - .release = qr_release, }; struct xlator_dumpops dumpops = { .priv = qr_priv_dump, .inodectx = qr_inodectx_dump, - .fdctx = qr_fdctx_dump }; struct volume_options options[] = { @@ -3974,4 +1143,5 @@ struct volume_options options[] = { .max = 1 * GF_UNIT_KB * 1000, .default_value = "64KB", }, + { .key = {NULL} } }; diff --git a/xlators/performance/quick-read/src/quick-read.h b/xlators/performance/quick-read/src/quick-read.h index 10a04e79c..6f0a05417 100644 --- a/xlators/performance/quick-read/src/quick-read.h +++ b/xlators/performance/quick-read/src/quick-read.h @@ -34,48 +34,20 @@ #include <fnmatch.h> #include "quick-read-mem-types.h" -struct qr_fd_ctx { - char opened; - char disabled; - char open_in_transit; - char *path; - int flags; - int wbflags; - struct list_head waiting_ops; - gf_lock_t lock; - struct list_head inode_list; - fd_t *fd; - dict_t *xdata; -}; -typedef struct qr_fd_ctx qr_fd_ctx_t; - -struct qr_local { - char is_open; - char *path; - char just_validated; - fd_t *fd; - int open_flags; - int32_t op_ret; - int32_t op_errno; - uint32_t open_count; - call_stub_t *stub; - struct list_head list; - gf_lock_t lock; -}; -typedef struct qr_local qr_local_t; struct qr_inode { - dict_t *xattr; - inode_t *inode; + void *data; + size_t size; int priority; - struct iatt stbuf; - struct timeval tv; + uint32_t ia_mtime; + uint32_t ia_mtime_nsec; + struct iatt buf; + struct timeval last_refresh; struct list_head lru; - struct list_head fd_list; - struct list_head unlinked_dentries; }; typedef struct qr_inode qr_inode_t; + struct qr_priority { char *pattern; int32_t priority; @@ -105,20 +77,5 @@ struct qr_private { }; typedef struct qr_private qr_private_t; -struct qr_unlink_ctx { - struct list_head list; - qr_fd_ctx_t *fdctx; - char need_open; -}; -typedef struct qr_unlink_ctx qr_unlink_ctx_t; - -void qr_local_free (qr_local_t *local); - -#define QR_STACK_UNWIND(op, frame, params ...) do { \ - qr_local_t *__local = frame->local; \ - frame->local = NULL; \ - STACK_UNWIND_STRICT (op, frame, params); \ - qr_local_free (__local); \ - } while (0) #endif /* #ifndef __QUICK_READ_H */ diff --git a/xlators/performance/read-ahead/src/Makefile.am b/xlators/performance/read-ahead/src/Makefile.am index ae2b1ace9..be80ae7ac 100644 --- a/xlators/performance/read-ahead/src/Makefile.am +++ b/xlators/performance/read-ahead/src/Makefile.am @@ -1,7 +1,7 @@ xlator_LTLIBRARIES = read-ahead.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance -read_ahead_la_LDFLAGS = -module -avoidversion +read_ahead_la_LDFLAGS = -module -avoid-version read_ahead_la_SOURCES = read-ahead.c page.c read_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c index 549496755..069ab1f1a 100644 --- a/xlators/performance/read-ahead/src/read-ahead.c +++ b/xlators/performance/read-ahead/src/read-ahead.c @@ -498,7 +498,7 @@ ra_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, "expected offset (%"PRId64") when page_count=%d", offset, file->page_count); - if (file->expected < (conf->page_size * conf->page_count)) { + if (file->expected < (file->page_size * conf->page_count)) { file->expected += size; file->page_count = min ((file->expected / file->page_size), @@ -942,6 +942,106 @@ unwind: return 0; } +int +ra_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + GF_ASSERT (frame); + + STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + return 0; +} + +static int +ra_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + ra_file_t *file = NULL; + fd_t *iter_fd = NULL; + inode_t *inode = NULL; + uint64_t tmp_file = 0; + int32_t op_errno = EINVAL; + + GF_ASSERT (frame); + GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); + GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); + + inode = fd->inode; + + LOCK (&inode->lock); + { + list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { + fd_ctx_get (iter_fd, this, &tmp_file); + file = (ra_file_t *)(long)tmp_file; + if (!file) + continue; + + flush_region(frame, file, offset, len, 1); + } + } + UNLOCK (&inode->lock); + + STACK_WIND (frame, ra_discard_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->discard, fd, offset, len, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT (discard, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + +int +ra_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + GF_ASSERT (frame); + + STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + return 0; +} + +static int +ra_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + ra_file_t *file = NULL; + fd_t *iter_fd = NULL; + inode_t *inode = NULL; + uint64_t tmp_file = 0; + int32_t op_errno = EINVAL; + + GF_ASSERT (frame); + GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); + GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); + + inode = fd->inode; + + LOCK (&inode->lock); + { + list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { + fd_ctx_get (iter_fd, this, &tmp_file); + file = (ra_file_t *)(long)tmp_file; + if (!file) + continue; + + flush_region(frame, file, offset, len, 1); + } + } + UNLOCK (&inode->lock); + + STACK_WIND (frame, ra_zerofill_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->zerofill, fd, + offset, len, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT (zerofill, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} int ra_priv_dump (xlator_t *this) @@ -1024,6 +1124,8 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("page-count", conf->page_count, options, uint32, out); + GF_OPTION_RECONF ("page-size", conf->page_size, options, size, out); + ret = 0; out: return ret; @@ -1056,6 +1158,8 @@ init (xlator_t *this) conf->page_size = this->ctx->page_size; + GF_OPTION_INIT ("page-size", conf->page_size, size, out); + GF_OPTION_INIT ("page-count", conf->page_count, uint32, out); GF_OPTION_INIT ("force-atime-update", conf->force_atime_update, bool, out); @@ -1119,6 +1223,8 @@ struct xlator_fops fops = { .truncate = ra_truncate, .ftruncate = ra_ftruncate, .fstat = ra_fstat, + .discard = ra_discard, + .zerofill = ra_zerofill, }; struct xlator_cbks cbks = { @@ -1142,5 +1248,12 @@ struct volume_options options[] = { .default_value = "4", .description = "Number of pages that will be pre-fetched" }, + { .key = {"page-size"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 4096, + .max = 1048576 * 64, + .default_value = "131072", + .description = "Page size with which read-ahead performs server I/O" + }, { .key = {NULL} }, }; diff --git a/xlators/performance/readdir-ahead/Makefile.am b/xlators/performance/readdir-ahead/Makefile.am new file mode 100644 index 000000000..a985f42a8 --- /dev/null +++ b/xlators/performance/readdir-ahead/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/performance/readdir-ahead/src/Makefile.am b/xlators/performance/readdir-ahead/src/Makefile.am new file mode 100644 index 000000000..cdabd1428 --- /dev/null +++ b/xlators/performance/readdir-ahead/src/Makefile.am @@ -0,0 +1,15 @@ +xlator_LTLIBRARIES = readdir-ahead.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance + +readdir_ahead_la_LDFLAGS = -module -avoidversion + +readdir_ahead_la_SOURCES = readdir-ahead.c +readdir_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = readdir-ahead.h readdir-ahead-mem-types.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h new file mode 100644 index 000000000..39e2c5369 --- /dev/null +++ b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h @@ -0,0 +1,24 @@ +/* + Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef __RDA_MEM_TYPES_H__ +#define __RDA_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_rda_mem_types_ { + gf_rda_mt_rda_local = gf_common_mt_end + 1, + gf_rda_mt_rda_fd_ctx, + gf_rda_mt_rda_priv, + gf_rda_mt_end +}; + +#endif diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c new file mode 100644 index 000000000..53e6756f0 --- /dev/null +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c @@ -0,0 +1,560 @@ +/* + Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +/* + * performance/readdir-ahead preloads a local buffer with directory entries + * on opendir. The optimization involves using maximum sized gluster rpc + * requests (128k) to minimize overhead of smaller client requests. + * + * For example, fuse currently supports a maximum readdir buffer of 4k + * (regardless of the filesystem client's buffer size). readdir-ahead should + * effectively convert these smaller requests into fewer, larger sized requests + * for simple, sequential workloads (i.e., ls). + * + * The translator is currently designed to handle the simple, sequential case + * only. If a non-sequential directory read occurs, readdir-ahead disables + * preloads on the directory. + */ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "xlator.h" +#include "call-stub.h" +#include "readdir-ahead.h" +#include "readdir-ahead-mem-types.h" +#include "defaults.h" + +static int rda_fill_fd(call_frame_t *, xlator_t *, fd_t *); + +/* + * Get (or create) the fd context for storing prepopulated directory + * entries. + */ +static struct +rda_fd_ctx *get_rda_fd_ctx(fd_t *fd, xlator_t *this) +{ + uint64_t val; + struct rda_fd_ctx *ctx; + + LOCK(&fd->lock); + + if (__fd_ctx_get(fd, this, &val) < 0) { + ctx = GF_CALLOC(1, sizeof(struct rda_fd_ctx), + gf_rda_mt_rda_fd_ctx); + if (!ctx) + goto out; + + LOCK_INIT(&ctx->lock); + INIT_LIST_HEAD(&ctx->entries.list); + ctx->state = RDA_FD_NEW; + /* ctx offset values initialized to 0 */ + + if (__fd_ctx_set(fd, this, (uint64_t) ctx) < 0) { + GF_FREE(ctx); + ctx = NULL; + goto out; + } + } else { + ctx = (struct rda_fd_ctx *) val; + } +out: + UNLOCK(&fd->lock); + return ctx; +} + +/* + * Reset the tracking state of the context. + */ +static void +rda_reset_ctx(struct rda_fd_ctx *ctx) +{ + ctx->state = RDA_FD_NEW; + ctx->cur_offset = 0; + ctx->cur_size = 0; + ctx->next_offset = 0; + gf_dirent_free(&ctx->entries); +} + +/* + * Check whether we can handle a request. Offset verification is done by the + * caller, so we only check whether the preload buffer has completion status + * (including an error) or has some data to return. + */ +static gf_boolean_t +rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size) +{ + if ((ctx->state & RDA_FD_EOD) || + (ctx->state & RDA_FD_ERROR) || + (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0))) + return _gf_true; + + return _gf_false; +} + +/* + * Serve a request from the fd dentry list based on the size of the request + * buffer. ctx must be locked. + */ +static int32_t +__rda_serve_readdirp(xlator_t *this, gf_dirent_t *entries, size_t request_size, + struct rda_fd_ctx *ctx) +{ + gf_dirent_t *dirent, *tmp; + size_t dirent_size, size = 0; + int32_t count = 0; + struct rda_priv *priv = this->private; + + list_for_each_entry_safe(dirent, tmp, &ctx->entries.list, list) { + dirent_size = gf_dirent_size(dirent->d_name); + if (size + dirent_size > request_size) + break; + + size += dirent_size; + list_del_init(&dirent->list); + ctx->cur_size -= dirent_size; + + list_add_tail(&dirent->list, &entries->list); + ctx->cur_offset = dirent->d_off; + count++; + } + + if (ctx->cur_size <= priv->rda_low_wmark) + ctx->state |= RDA_FD_PLUGGED; + + return count; +} + +static int32_t +rda_readdirp_stub(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) +{ + gf_dirent_t entries; + int32_t ret; + struct rda_fd_ctx *ctx; + int op_errno = 0; + + ctx = get_rda_fd_ctx(fd, this); + INIT_LIST_HEAD(&entries.list); + ret = __rda_serve_readdirp(this, &entries, size, ctx); + + if (!ret && (ctx->state & RDA_FD_ERROR)) { + ret = -1; + op_errno = ctx->op_errno; + ctx->state &= ~RDA_FD_ERROR; + + /* + * the preload has stopped running in the event of an error, so + * pass all future requests along + */ + ctx->state |= RDA_FD_BYPASS; + } + + STACK_UNWIND_STRICT(readdirp, frame, ret, op_errno, &entries, xdata); + gf_dirent_free(&entries); + + return 0; +} + +static int32_t +rda_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + struct rda_fd_ctx *ctx; + call_stub_t *stub; + int fill = 0; + + ctx = get_rda_fd_ctx(fd, this); + if (!ctx) + goto err; + + if (ctx->state & RDA_FD_BYPASS) + goto bypass; + + LOCK(&ctx->lock); + + /* recheck now that we have the lock */ + if (ctx->state & RDA_FD_BYPASS) { + UNLOCK(&ctx->lock); + goto bypass; + } + + /* + * If a new read comes in at offset 0 and the buffer has been + * completed, reset the context and kickstart the filler again. + */ + if (!off && (ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)) { + rda_reset_ctx(ctx); + fill = 1; + } + + /* + * If a readdir occurs at an unexpected offset or we already have a + * request pending, admit defeat and just get out of the way. + */ + if (off != ctx->cur_offset || ctx->stub) { + ctx->state |= RDA_FD_BYPASS; + UNLOCK(&ctx->lock); + goto bypass; + } + + stub = fop_readdirp_stub(frame, rda_readdirp_stub, fd, size, off, xdata); + if (!stub) { + UNLOCK(&ctx->lock); + goto err; + } + + /* + * If we haven't bypassed the preload, this means we can either serve + * the request out of the preload or the request that enables us to do + * so is in flight... + */ + if (rda_can_serve_readdirp(ctx, size)) + call_resume(stub); + else + ctx->stub = stub; + + UNLOCK(&ctx->lock); + + if (fill) + rda_fill_fd(frame, this, fd); + + return 0; + +bypass: + STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata); + return 0; + +err: + STACK_UNWIND_STRICT(readdirp, frame, -1, ENOMEM, NULL, NULL); + return 0; +} + +static int32_t +rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + gf_dirent_t *dirent, *tmp; + struct rda_local *local = frame->local; + struct rda_fd_ctx *ctx = local->ctx; + struct rda_priv *priv = this->private; + int fill = 1; + + LOCK(&ctx->lock); + + /* Verify that the preload buffer is still pending on this data. */ + if (ctx->next_offset != local->offset) { + gf_log(this->name, GF_LOG_ERROR, + "Out of sequence directory preload."); + ctx->state |= (RDA_FD_BYPASS|RDA_FD_ERROR); + ctx->op_errno = EUCLEAN; + + goto out; + } + + if (entries) { + list_for_each_entry_safe(dirent, tmp, &entries->list, list) { + list_del_init(&dirent->list); + /* must preserve entry order */ + list_add_tail(&dirent->list, &ctx->entries.list); + + ctx->cur_size += gf_dirent_size(dirent->d_name); + ctx->next_offset = dirent->d_off; + } + } + + if (ctx->cur_size >= priv->rda_high_wmark) + ctx->state &= ~RDA_FD_PLUGGED; + + if (!op_ret) { + /* we've hit eod */ + ctx->state &= ~RDA_FD_RUNNING; + ctx->state |= RDA_FD_EOD; + } else if (op_ret == -1) { + /* kill the preload and pend the error */ + ctx->state &= ~RDA_FD_RUNNING; + ctx->state |= RDA_FD_ERROR; + ctx->op_errno = op_errno; + } + + /* + * NOTE: The strict bypass logic in readdirp() means a pending request + * is always based on ctx->cur_offset. + */ + if (ctx->stub && + rda_can_serve_readdirp(ctx, ctx->stub->args.size)) { + call_resume(ctx->stub); + ctx->stub = NULL; + } + +out: + /* + * If we have been marked for bypass and have no pending stub, clear the + * run state so we stop preloading the context with entries. + */ + if ((ctx->state & RDA_FD_BYPASS) && !ctx->stub) + ctx->state &= ~RDA_FD_RUNNING; + + if (!(ctx->state & RDA_FD_RUNNING)) { + fill = 0; + STACK_DESTROY(ctx->fill_frame->root); + ctx->fill_frame = NULL; + } + + UNLOCK(&ctx->lock); + + if (fill) + rda_fill_fd(frame, this, local->fd); + + return 0; +} + +/* + * Start prepopulating the fd context with directory entries. + */ +static int +rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd) +{ + call_frame_t *nframe = NULL; + struct rda_local *local = NULL; + struct rda_fd_ctx *ctx; + off_t offset; + struct rda_priv *priv = this->private; + + ctx = get_rda_fd_ctx(fd, this); + if (!ctx) + goto err; + + LOCK(&ctx->lock); + + if (ctx->state & RDA_FD_NEW) { + ctx->state &= ~RDA_FD_NEW; + ctx->state |= RDA_FD_RUNNING; + if (priv->rda_low_wmark) + ctx->state |= RDA_FD_PLUGGED; + } + + offset = ctx->next_offset; + + if (!ctx->fill_frame) { + nframe = copy_frame(frame); + if (!nframe) { + UNLOCK(&ctx->lock); + goto err; + } + + local = mem_get0(this->local_pool); + if (!local) { + UNLOCK(&ctx->lock); + goto err; + } + + local->ctx = ctx; + local->fd = fd; + nframe->local = local; + + ctx->fill_frame = nframe; + } else { + nframe = ctx->fill_frame; + local = nframe->local; + } + + local->offset = offset; + + UNLOCK(&ctx->lock); + + STACK_WIND(nframe, rda_fill_fd_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, priv->rda_req_size, + offset, NULL); + + return 0; + +err: + if (nframe) + FRAME_DESTROY(nframe); + + return -1; +} + +static int32_t +rda_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + if (!op_ret) + rda_fill_fd(frame, this, fd); + + STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +static int32_t +rda_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) +{ + STACK_WIND(frame, rda_opendir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); + return 0; +} + +static int32_t +rda_releasedir(xlator_t *this, fd_t *fd) +{ + uint64_t val; + struct rda_fd_ctx *ctx; + + if (fd_ctx_del(fd, this, &val) < 0) + return -1; + + ctx = (struct rda_fd_ctx *) val; + if (!ctx) + return 0; + + rda_reset_ctx(ctx); + + if (ctx->fill_frame) + STACK_DESTROY(ctx->fill_frame->root); + + if (ctx->stub) + gf_log(this->name, GF_LOG_ERROR, + "released a directory with a pending stub"); + + GF_FREE(ctx); + return 0; +} + +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + if (!this) + goto out; + + ret = xlator_mem_acct_init(this, gf_rda_mt_end + 1); + + if (ret != 0) + gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + +out: + return ret; +} + +int +reconfigure(xlator_t *this, dict_t *options) +{ + struct rda_priv *priv = this->private; + + GF_OPTION_RECONF("rda-request-size", priv->rda_req_size, options, + uint32, err); + GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size, + err); + GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options, size, + err); + + return 0; +err: + return -1; +} + +int +init(xlator_t *this) +{ + struct rda_priv *priv = NULL; + + GF_VALIDATE_OR_GOTO("readdir-ahead", this, err); + + if (!this->children || this->children->next) { + gf_log(this->name, GF_LOG_ERROR, + "FATAL: readdir-ahead not configured with exactly one" + " child"); + goto err; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + priv = GF_CALLOC(1, sizeof(struct rda_priv), gf_rda_mt_rda_priv); + if (!priv) + goto err; + this->private = priv; + + this->local_pool = mem_pool_new(struct rda_local, 32); + if (!this->local_pool) + goto err; + + GF_OPTION_INIT("rda-request-size", priv->rda_req_size, uint32, err); + GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size, err); + GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size, err); + + return 0; + +err: + if (this->local_pool) + mem_pool_destroy(this->local_pool); + if (priv) + GF_FREE(priv); + + return -1; +} + + +void +fini(xlator_t *this) +{ + GF_VALIDATE_OR_GOTO ("readdir-ahead", this, out); + + GF_FREE(this->private); + +out: + return; +} + +struct xlator_fops fops = { + .opendir = rda_opendir, + .readdirp = rda_readdirp, +}; + +struct xlator_cbks cbks = { + .releasedir = rda_releasedir, +}; + +struct volume_options options[] = { + { .key = {"rda-request-size"}, + .type = GF_OPTION_TYPE_INT, + .min = 4096, + .max = 131072, + .default_value = "131072", + .description = "readdir-ahead request size", + }, + { .key = {"rda-low-wmark"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 0, + .max = 10 * GF_UNIT_MB, + .default_value = "4096", + .description = "the value under which we plug", + }, + { .key = {"rda-high-wmark"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 0, + .max = 100 * GF_UNIT_MB, + .default_value = "131072", + .description = "the value over which we unplug", + }, + { .key = {NULL} }, +}; + diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.h b/xlators/performance/readdir-ahead/src/readdir-ahead.h new file mode 100644 index 000000000..e48786dae --- /dev/null +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.h @@ -0,0 +1,46 @@ +/* + Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __READDIR_AHEAD_H +#define __READDIR_AHEAD_H + +/* state flags */ +#define RDA_FD_NEW (1 << 0) +#define RDA_FD_RUNNING (1 << 1) +#define RDA_FD_EOD (1 << 2) +#define RDA_FD_ERROR (1 << 3) +#define RDA_FD_BYPASS (1 << 4) +#define RDA_FD_PLUGGED (1 << 5) + +struct rda_fd_ctx { + off_t cur_offset; /* current head of the ctx */ + size_t cur_size; /* current size of the preload */ + off_t next_offset; /* tail of the ctx */ + uint32_t state; + gf_lock_t lock; + gf_dirent_t entries; + call_frame_t *fill_frame; + call_stub_t *stub; + int op_errno; +}; + +struct rda_local { + struct rda_fd_ctx *ctx; + fd_t *fd; + off_t offset; +}; + +struct rda_priv { + uint32_t rda_req_size; + uint64_t rda_low_wmark; + uint64_t rda_high_wmark; +}; + +#endif /* __READDIR_AHEAD_H */ diff --git a/xlators/performance/symlink-cache/src/Makefile.am b/xlators/performance/symlink-cache/src/Makefile.am index c37d93e86..4091c3293 100644 --- a/xlators/performance/symlink-cache/src/Makefile.am +++ b/xlators/performance/symlink-cache/src/Makefile.am @@ -1,7 +1,7 @@ xlator_LTLIBRARIES = symlink-cache.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/performance -symlink_cache_la_LDFLAGS = -module -avoidversion +symlink_cache_la_LDFLAGS = -module -avoid-version symlink_cache_la_SOURCES = symlink-cache.c symlink_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la diff --git a/xlators/performance/write-behind/src/Makefile.am b/xlators/performance/write-behind/src/Makefile.am index 5ca0462ae..6c829d8ee 100644 --- a/xlators/performance/write-behind/src/Makefile.am +++ b/xlators/performance/write-behind/src/Makefile.am @@ -1,7 +1,7 @@ xlator_LTLIBRARIES = write-behind.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance -write_behind_la_LDFLAGS = -module -avoidversion +write_behind_la_LDFLAGS = -module -avoid-version write_behind_la_SOURCES = write-behind.c write_behind_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c index 232e6c0de..95c5921c6 100644 --- a/xlators/performance/write-behind/src/write-behind.c +++ b/xlators/performance/write-behind/src/write-behind.c @@ -43,13 +43,6 @@ typedef struct wb_inode { used for trickling_writes */ - int32_t op_ret; /* Last found op_ret and op_errno - while completing a liability - operation. Will be picked by - the next arriving writev/flush/fsync - */ - int32_t op_errno; - list_head_t all; /* All requests, from enqueue() till destroy(). Used only for resetting generation number when empty. @@ -89,6 +82,12 @@ typedef struct wb_inode { write-behind from this list, and therefore get "upgraded" to the "liability" list. */ + list_head_t wip; /* List of write calls in progress, SYNC or non-SYNC + which are currently STACK_WIND'ed towards the server. + This is for guaranteeing that no two overlapping + writes are in progress at the same time. Modules + like eager-lock in AFR depend on this behavior. + */ uint64_t gen; /* Liability generation number. Represents the current 'state' of liability. Every new addition to the liability list bumps @@ -120,10 +119,11 @@ typedef struct wb_request { list_head_t lie; /* either in @liability or @temptation */ list_head_t winds; list_head_t unwinds; + list_head_t wip; call_stub_t *stub; - size_t write_size; /* currently held size + ssize_t write_size; /* currently held size (after collapsing) */ size_t orig_size; /* size which arrived with the request. This is the size by which we grow @@ -205,6 +205,26 @@ out: } +gf_boolean_t +wb_fd_err (fd_t *fd, xlator_t *this, int32_t *op_errno) +{ + gf_boolean_t err = _gf_false; + uint64_t value = 0; + int32_t tmp = 0; + + if (fd_ctx_get (fd, this, &value) == 0) { + if (op_errno) { + tmp = value; + *op_errno = tmp; + } + + err = _gf_true; + } + + return err; +} + + /* Below is a succinct explanation of the code deciding whether two regions overlap, from Pavan <tcp@gluster.com>. @@ -302,6 +322,30 @@ wb_liability_has_conflict (wb_inode_t *wb_inode, wb_request_t *req) } +gf_boolean_t +wb_wip_has_conflict (wb_inode_t *wb_inode, wb_request_t *req) +{ + wb_request_t *each = NULL; + + if (req->stub->fop != GF_FOP_WRITE) + /* non-writes fundamentally never conflict with WIP requests */ + return _gf_false; + + list_for_each_entry (each, &wb_inode->wip, wip) { + if (each == req) + /* request never conflicts with itself, + though this condition should never occur. + */ + continue; + + if (wb_requests_overlap (each, req)) + return _gf_true; + } + + return _gf_false; +} + + static int __wb_request_unref (wb_request_t *req) { @@ -320,6 +364,7 @@ __wb_request_unref (wb_request_t *req) if (req->refcount == 0) { list_del_init (&req->todo); list_del_init (&req->lie); + list_del_init (&req->wip); list_del_init (&req->all); if (list_empty (&wb_inode->all)) { @@ -425,6 +470,7 @@ wb_enqueue_common (wb_inode_t *wb_inode, call_stub_t *stub, int tempted) INIT_LIST_HEAD (&req->lie); INIT_LIST_HEAD (&req->winds); INIT_LIST_HEAD (&req->unwinds); + INIT_LIST_HEAD (&req->wip); req->stub = stub; req->wb_inode = wb_inode; @@ -432,8 +478,8 @@ wb_enqueue_common (wb_inode_t *wb_inode, call_stub_t *stub, int tempted) req->ordering.tempted = tempted; if (stub->fop == GF_FOP_WRITE) { - req->write_size = iov_length (stub->args.writev.vector, - stub->args.writev.count); + req->write_size = iov_length (stub->args.vector, + stub->args.count); /* req->write_size can change as we collapse small writes. But the window needs to grow @@ -449,7 +495,7 @@ wb_enqueue_common (wb_inode_t *wb_inode, call_stub_t *stub, int tempted) req->op_ret = req->write_size; req->op_errno = 0; - if (stub->args.writev.fd->flags & O_APPEND) + if (stub->args.fd->flags & O_APPEND) req->ordering.append = 1; } @@ -457,28 +503,28 @@ wb_enqueue_common (wb_inode_t *wb_inode, call_stub_t *stub, int tempted) switch (stub->fop) { case GF_FOP_WRITE: - req->ordering.off = stub->args.writev.off; + req->ordering.off = stub->args.offset; req->ordering.size = req->write_size; - req->fd = fd_ref (stub->args.writev.fd); + req->fd = fd_ref (stub->args.fd); break; case GF_FOP_READ: - req->ordering.off = stub->args.readv.off; - req->ordering.size = stub->args.readv.size; + req->ordering.off = stub->args.offset; + req->ordering.size = stub->args.size; - req->fd = fd_ref (stub->args.readv.fd); + req->fd = fd_ref (stub->args.fd); break; case GF_FOP_TRUNCATE: - req->ordering.off = stub->args.truncate.off; + req->ordering.off = stub->args.offset; req->ordering.size = 0; /* till infinity */ break; case GF_FOP_FTRUNCATE: - req->ordering.off = stub->args.ftruncate.off; + req->ordering.off = stub->args.offset; req->ordering.size = 0; /* till infinity */ - req->fd = fd_ref (stub->args.ftruncate.fd); + req->fd = fd_ref (stub->args.fd); break; default: @@ -541,6 +587,7 @@ __wb_inode_create (xlator_t *this, inode_t *inode) INIT_LIST_HEAD (&wb_inode->todo); INIT_LIST_HEAD (&wb_inode->liability); INIT_LIST_HEAD (&wb_inode->temptation); + INIT_LIST_HEAD (&wb_inode->wip); wb_inode->this = this; @@ -629,12 +676,25 @@ wb_head_done (wb_request_t *head) void -wb_inode_err (wb_inode_t *wb_inode, int op_errno) +wb_fulfill_err (wb_request_t *head, int op_errno) { + wb_inode_t *wb_inode; + wb_request_t *req; + + wb_inode = head->wb_inode; + + /* for all future requests yet to arrive */ + fd_ctx_set (head->fd, THIS, op_errno); + LOCK (&wb_inode->lock); { - wb_inode->op_ret = -1; - wb_inode->op_errno = op_errno; + /* for all requests already arrived */ + list_for_each_entry (req, &wb_inode->all, all) { + if (req->fd != head->fd) + continue; + req->op_ret = -1; + req->op_errno = op_errno; + } } UNLOCK (&wb_inode->lock); } @@ -654,7 +714,7 @@ wb_fulfill_cbk (call_frame_t *frame, void *cookie, xlator_t *this, wb_inode = head->wb_inode; if (op_ret == -1) { - wb_inode_err (wb_inode, op_errno); + wb_fulfill_err (head, op_errno); } else if (op_ret < head->total_size) { /* * We've encountered a short write, for whatever reason. @@ -664,7 +724,7 @@ wb_fulfill_cbk (call_frame_t *frame, void *cookie, xlator_t *this, * TODO: Retry the write so we can potentially capture * a real error condition (i.e., ENOSPC). */ - wb_inode_err (wb_inode, EIO); + wb_fulfill_err (head, EIO); } wb_head_done (head); @@ -678,34 +738,47 @@ wb_fulfill_cbk (call_frame_t *frame, void *cookie, xlator_t *this, #define WB_IOV_LOAD(vec, cnt, req, head) do { \ - memcpy (&vec[cnt], req->stub->args.writev.vector, \ - (req->stub->args.writev.count * sizeof(vec[0]))); \ - cnt += req->stub->args.writev.count; \ + memcpy (&vec[cnt], req->stub->args.vector, \ + (req->stub->args.count * sizeof(vec[0]))); \ + cnt += req->stub->args.count; \ head->total_size += req->write_size; \ } while (0) -void +int wb_fulfill_head (wb_inode_t *wb_inode, wb_request_t *head) { - struct iovec vector[MAX_VECTOR_COUNT]; - int count = 0; - wb_request_t *req = NULL; - call_frame_t *frame = NULL; + struct iovec vector[MAX_VECTOR_COUNT]; + int count = 0; + wb_request_t *req = NULL; + call_frame_t *frame = NULL; + gf_boolean_t fderr = _gf_false; + xlator_t *this = NULL; - frame = create_frame (wb_inode->this, wb_inode->this->ctx->pool); - if (!frame) - goto enomem; + this = THIS; + + /* make sure head->total_size is updated before we run into any + * errors + */ WB_IOV_LOAD (vector, count, head, head); list_for_each_entry (req, &head->winds, winds) { WB_IOV_LOAD (vector, count, req, head); - iobref_merge (head->stub->args.writev.iobref, - req->stub->args.writev.iobref); + iobref_merge (head->stub->args.iobref, + req->stub->args.iobref); } + if (wb_fd_err (head->fd, this, NULL)) { + fderr = _gf_true; + goto err; + } + + frame = create_frame (wb_inode->this, wb_inode->this->ctx->pool); + if (!frame) + goto err; + frame->root->lk_owner = head->lk_owner; frame->local = head; @@ -718,32 +791,36 @@ wb_fulfill_head (wb_inode_t *wb_inode, wb_request_t *head) STACK_WIND (frame, wb_fulfill_cbk, FIRST_CHILD (frame->this), FIRST_CHILD (frame->this)->fops->writev, head->fd, vector, count, - head->stub->args.writev.off, - head->stub->args.writev.flags, - head->stub->args.writev.iobref, NULL); + head->stub->args.offset, + head->stub->args.flags, + head->stub->args.iobref, NULL); - return; -enomem: - wb_inode_err (wb_inode, ENOMEM); + return 0; +err: + if (!fderr) { + /* frame creation failure */ + fderr = ENOMEM; + wb_fulfill_err (head, fderr); + } wb_head_done (head); - return; + return fderr; } #define NEXT_HEAD(head, req) do { \ if (head) \ - wb_fulfill_head (wb_inode, head); \ + ret |= wb_fulfill_head (wb_inode, head); \ head = req; \ - expected_offset = req->stub->args.writev.off + \ + expected_offset = req->stub->args.offset + \ req->write_size; \ curr_aggregate = 0; \ vector_count = 0; \ } while (0) -void +int wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities) { wb_request_t *req = NULL; @@ -753,6 +830,7 @@ wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities) off_t expected_offset = 0; size_t curr_aggregate = 0; size_t vector_count = 0; + int ret = 0; conf = wb_inode->this->private; @@ -774,7 +852,7 @@ wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities) continue; } - if (expected_offset != req->stub->args.writev.off) { + if (expected_offset != req->stub->args.offset) { NEXT_HEAD (head, req); continue; } @@ -784,7 +862,7 @@ wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities) continue; } - if (vector_count + req->stub->args.writev.count > + if (vector_count + req->stub->args.count > MAX_VECTOR_COUNT) { NEXT_HEAD (head, req); continue; @@ -792,12 +870,13 @@ wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities) list_add_tail (&req->winds, &head->winds); curr_aggregate += req->write_size; - vector_count += req->stub->args.writev.count; + vector_count += req->stub->args.count; } if (head) - wb_fulfill_head (wb_inode, head); - return; + ret |= wb_fulfill_head (wb_inode, head); + + return ret; } @@ -861,10 +940,20 @@ __wb_collapse_small_writes (wb_request_t *holder, wb_request_t *req) struct iobuf *iobuf = NULL; struct iobref *iobref = NULL; int ret = -1; + ssize_t required_size = 0; + size_t holder_len = 0; + size_t req_len = 0; if (!holder->iobref) { - /* TODO: check the required size */ - iobuf = iobuf_get (req->wb_inode->this->ctx->iobuf_pool); + holder_len = iov_length (holder->stub->args.vector, + holder->stub->args.count); + req_len = iov_length (req->stub->args.vector, + req->stub->args.count); + + required_size = max ((THIS->ctx->page_size), + (holder_len + req_len)); + iobuf = iobuf_get2 (req->wb_inode->this->ctx->iobuf_pool, + required_size); if (iobuf == NULL) { goto out; } @@ -885,25 +974,25 @@ __wb_collapse_small_writes (wb_request_t *holder, wb_request_t *req) goto out; } - iov_unload (iobuf->ptr, holder->stub->args.writev.vector, - holder->stub->args.writev.count); - holder->stub->args.writev.vector[0].iov_base = iobuf->ptr; - holder->stub->args.writev.count = 1; + iov_unload (iobuf->ptr, holder->stub->args.vector, + holder->stub->args.count); + holder->stub->args.vector[0].iov_base = iobuf->ptr; + holder->stub->args.count = 1; - iobref_unref (holder->stub->args.writev.iobref); - holder->stub->args.writev.iobref = iobref; + iobref_unref (holder->stub->args.iobref); + holder->stub->args.iobref = iobref; iobuf_unref (iobuf); holder->iobref = iobref_ref (iobref); } - ptr = holder->stub->args.writev.vector[0].iov_base + holder->write_size; + ptr = holder->stub->args.vector[0].iov_base + holder->write_size; - iov_unload (ptr, req->stub->args.writev.vector, - req->stub->args.writev.count); + iov_unload (ptr, req->stub->args.vector, + req->stub->args.count); - holder->stub->args.writev.vector[0].iov_len += req->write_size; + holder->stub->args.vector[0].iov_len += req->write_size; holder->write_size += req->write_size; holder->ordering.size += req->write_size; @@ -917,13 +1006,13 @@ void __wb_preprocess_winds (wb_inode_t *wb_inode) { off_t offset_expected = 0; - size_t space_left = 0; + ssize_t space_left = 0; wb_request_t *req = NULL; wb_request_t *tmp = NULL; wb_request_t *holder = NULL; wb_conf_t *conf = NULL; int ret = 0; - size_t page_size = 0; + ssize_t page_size = 0; /* With asynchronous IO from a VM guest (as a file), there can be two sequential writes happening in two regions @@ -953,10 +1042,10 @@ __wb_preprocess_winds (wb_inode_t *wb_inode) continue; } - offset_expected = holder->stub->args.writev.off + offset_expected = holder->stub->args.offset + holder->write_size; - if (req->stub->args.writev.off != offset_expected) { + if (req->stub->args.offset != offset_expected) { holder->ordering.go = 1; holder = req; continue; @@ -968,6 +1057,12 @@ __wb_preprocess_winds (wb_inode_t *wb_inode) continue; } + if (req->fd != holder->fd) { + holder->ordering.go = 1; + holder = req; + continue; + } + space_left = page_size - holder->write_size; if (space_left < req->write_size) { @@ -1022,6 +1117,18 @@ __wb_pick_winds (wb_inode_t *wb_inode, list_head_t *tasks, /* wait some more */ continue; + if (req->stub->fop == GF_FOP_WRITE) { + if (wb_wip_has_conflict (wb_inode, req)) + continue; + + list_add_tail (&req->wip, &wb_inode->wip); + + if (!req->ordering.tempted) + /* unrefed in wb_writev_cbk */ + req->stub->frame->local = + __wb_request_ref (req); + } + list_del_init (&req->todo); if (req->ordering.tempted) @@ -1054,38 +1161,69 @@ wb_process_queue (wb_inode_t *wb_inode) list_head_t tasks = {0, }; list_head_t lies = {0, }; list_head_t liabilities = {0, }; + int retry = 0; INIT_LIST_HEAD (&tasks); INIT_LIST_HEAD (&lies); INIT_LIST_HEAD (&liabilities); - LOCK (&wb_inode->lock); - { - __wb_preprocess_winds (wb_inode); + do { + LOCK (&wb_inode->lock); + { + __wb_preprocess_winds (wb_inode); - __wb_pick_winds (wb_inode, &tasks, &liabilities); + __wb_pick_winds (wb_inode, &tasks, &liabilities); - __wb_pick_unwinds (wb_inode, &lies); + __wb_pick_unwinds (wb_inode, &lies); - } - UNLOCK (&wb_inode->lock); + } + UNLOCK (&wb_inode->lock); - wb_do_unwinds (wb_inode, &lies); + wb_do_unwinds (wb_inode, &lies); - wb_do_winds (wb_inode, &tasks); + wb_do_winds (wb_inode, &tasks); - wb_fulfill (wb_inode, &liabilities); + /* fd might've been marked bad due to previous errors. + * Since, caller of wb_process_queue might be the last fop on + * inode, make sure we keep processing request queue, till there + * are no requests left. + */ + retry = wb_fulfill (wb_inode, &liabilities); + } while (retry); return; } int +wb_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ + wb_request_t *req = NULL; + wb_inode_t *wb_inode; + + req = frame->local; + frame->local = NULL; + wb_inode = req->wb_inode; + + wb_request_unref (req); + + /* requests could be pending while this was in progress */ + wb_process_queue(wb_inode); + + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; +} + + +int wb_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata) { - STACK_WIND (frame, default_writev_cbk, + STACK_WIND (frame, wb_writev_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev, fd, vector, count, offset, flags, iobref, xdata); return 0; @@ -1102,10 +1240,15 @@ wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, gf_boolean_t wb_disabled = 0; call_stub_t *stub = NULL; int ret = -1; - int op_errno = EINVAL; + int32_t op_errno = EINVAL; int o_direct = O_DIRECT; conf = this->private; + + if (wb_fd_err (fd, this, &op_errno)) { + goto unwind; + } + wb_inode = wb_inode_create (this, fd->inode); if (!wb_inode) { op_errno = ENOMEM; @@ -1118,24 +1261,9 @@ wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, if (fd->flags & (O_SYNC|O_DSYNC|o_direct)) wb_disabled = 1; - if (flags & (O_SYNC|O_DSYNC|O_DIRECT)) - /* O_DIRECT flag in params of writev must _always_ be honored */ + if (flags & (O_SYNC|O_DSYNC|o_direct)) wb_disabled = 1; - op_errno = 0; - LOCK (&wb_inode->lock); - { - /* pick up a previous error in fulfillment */ - if (wb_inode->op_ret < 0) - op_errno = wb_inode->op_errno; - - wb_inode->op_ret = 0; - } - UNLOCK (&wb_inode->lock); - - if (op_errno) - goto unwind; - if (wb_disabled) stub = fop_writev_stub (frame, wb_writev_helper, fd, vector, count, offset, flags, iobref, xdata); @@ -1233,7 +1361,7 @@ wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) wb_conf_t *conf = NULL; wb_inode_t *wb_inode = NULL; call_frame_t *bg_frame = NULL; - int op_errno = 0; + int32_t op_errno = 0; int op_ret = 0; conf = this->private; @@ -1245,19 +1373,10 @@ wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) goto unwind; } - LOCK (&wb_inode->lock); - { - if (wb_inode->op_ret < 0) { - op_ret = -1; - op_errno = wb_inode->op_errno; - } - - wb_inode->op_ret = 0; - } - UNLOCK (&wb_inode->lock); - - if (op_errno) + if (wb_fd_err (fd, this, &op_errno)) { + op_ret = -1; goto unwind; + } if (conf->flush_behind) goto flushbehind; @@ -1301,7 +1420,7 @@ wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) if (!wb_enqueue (wb_inode, stub)) goto unwind; - wb_process_queue (wb_inode); + wb_process_queue (wb_inode); return 0; @@ -1334,6 +1453,10 @@ wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, { wb_inode_t *wb_inode = NULL; call_stub_t *stub = NULL; + int32_t op_errno = EINVAL; + + if (wb_fd_err (fd, this, &op_errno)) + goto unwind; wb_inode = wb_inode_ctx_get (this, fd->inode); if (!wb_inode) @@ -1351,7 +1474,7 @@ wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, return 0; unwind: - STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, NULL, NULL, NULL); + STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, NULL); return 0; @@ -1511,25 +1634,35 @@ wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, { wb_inode_t *wb_inode = NULL; call_stub_t *stub = NULL; + int32_t op_errno = 0; wb_inode = wb_inode_create (this, fd->inode); - if (!wb_inode) + if (!wb_inode) { + op_errno = ENOMEM; + goto unwind; + } + + if (wb_fd_err (fd, this, &op_errno)) goto unwind; stub = fop_ftruncate_stub (frame, wb_ftruncate_helper, fd, offset, xdata); - if (!stub) + if (!stub) { + op_errno = ENOMEM; goto unwind; + } - if (!wb_enqueue (wb_inode, stub)) + if (!wb_enqueue (wb_inode, stub)) { + op_errno = ENOMEM; goto unwind; + } wb_process_queue (wb_inode); return 0; unwind: - STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL); + STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno, NULL, NULL, NULL); if (stub) call_stub_destroy (stub); @@ -1642,13 +1775,9 @@ wb_forget (xlator_t *this, inode_t *inode) if (!wb_inode) return 0; - LOCK (&wb_inode->lock); - { - GF_ASSERT (list_empty (&wb_inode->todo)); - GF_ASSERT (list_empty (&wb_inode->liability)); - GF_ASSERT (list_empty (&wb_inode->temptation)); - } - UNLOCK (&wb_inode->lock); + GF_ASSERT (list_empty (&wb_inode->todo)); + GF_ASSERT (list_empty (&wb_inode->liability)); + GF_ASSERT (list_empty (&wb_inode->temptation)); GF_FREE (wb_inode); @@ -1657,6 +1786,17 @@ wb_forget (xlator_t *this, inode_t *inode) int +wb_release (xlator_t *this, fd_t *fd) +{ + uint64_t tmp = 0; + + fd_ctx_del (fd, this, &tmp); + + return 0; +} + + +int wb_priv_dump (xlator_t *this) { wb_conf_t *conf = NULL; @@ -1711,7 +1851,7 @@ __wb_dump_requests (struct list_head *head, char *prefix) req->write_size); gf_proc_dump_write ("offset", "%"PRId64, - req->stub->args.writev.off); + req->stub->args.offset); flag = req->ordering.lied; gf_proc_dump_write ("lied", "%d", flag); @@ -1768,9 +1908,6 @@ wb_inode_dump (xlator_t *this, inode_t *inode) gf_proc_dump_write ("window_current", "%"GF_PRI_SIZET, wb_inode->window_current); - gf_proc_dump_write ("op_ret", "%d", wb_inode->op_ret); - - gf_proc_dump_write ("op_errno", "%d", wb_inode->op_errno); ret = TRY_LOCK (&wb_inode->lock); if (!ret) @@ -1943,6 +2080,7 @@ struct xlator_fops fops = { struct xlator_cbks cbks = { .forget = wb_forget, + .release = wb_release }; @@ -1977,6 +2115,8 @@ struct volume_options options[] = { { .key = {"strict-O_DIRECT"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "off", + .description = "This option when set to off, ignores the " + "O_DIRECT flag." }, { .key = {"strict-write-ordering"}, .type = GF_OPTION_TYPE_BOOL, |
