diff options
Diffstat (limited to 'xlators/performance')
| -rw-r--r-- | xlators/performance/Makefile.am | 2 | ||||
| -rw-r--r-- | xlators/performance/io-cache/src/io-cache.c | 53 | ||||
| -rw-r--r-- | xlators/performance/io-threads/src/io-threads.c | 55 | ||||
| -rw-r--r-- | xlators/performance/md-cache/src/md-cache.c | 223 | ||||
| -rw-r--r-- | xlators/performance/open-behind/src/open-behind.c | 25 | ||||
| -rw-r--r-- | xlators/performance/read-ahead/src/read-ahead.c | 52 | ||||
| -rw-r--r-- | xlators/performance/readdir-ahead/Makefile.am | 3 | ||||
| -rw-r--r-- | xlators/performance/readdir-ahead/src/Makefile.am | 15 | ||||
| -rw-r--r-- | xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h | 24 | ||||
| -rw-r--r-- | xlators/performance/readdir-ahead/src/readdir-ahead.c | 560 | ||||
| -rw-r--r-- | xlators/performance/readdir-ahead/src/readdir-ahead.h | 46 | ||||
| -rw-r--r-- | xlators/performance/write-behind/src/write-behind.c | 55 |
12 files changed, 1070 insertions, 43 deletions
diff --git a/xlators/performance/Makefile.am b/xlators/performance/Makefile.am index f99e11829..a494190ba 100644 --- a/xlators/performance/Makefile.am +++ b/xlators/performance/Makefile.am @@ -1,3 +1,3 @@ -SUBDIRS = write-behind read-ahead io-threads io-cache symlink-cache quick-read md-cache open-behind +SUBDIRS = write-behind read-ahead readdir-ahead io-threads io-cache symlink-cache quick-read md-cache open-behind CLEANFILES = diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c index d0667e469..201777b38 100644 --- a/xlators/performance/io-cache/src/io-cache.c +++ b/xlators/performance/io-cache/src/io-cache.c @@ -316,9 +316,11 @@ ioc_forget (xlator_t *this, inode_t *inode) static int32_t ioc_invalidate(xlator_t *this, inode_t *inode) { + uint64_t ioc_addr = 0; ioc_inode_t *ioc_inode = NULL; - inode_ctx_get(inode, this, (uint64_t *) &ioc_inode); + inode_ctx_get(inode, this, (uint64_t *) &ioc_addr); + ioc_inode = (void *) ioc_addr; if (ioc_inode) ioc_inode_flush(ioc_inode); @@ -1447,6 +1449,31 @@ ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, return 0; } +static int32_t +ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + STACK_UNWIND_STRICT(zerofill, frame, op_ret, + op_errno, pre, post, xdata); + return 0; +} + +static int32_t +ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + uint64_t ioc_inode = 0; + + inode_ctx_get (fd->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + return 0; +} int32_t @@ -1912,11 +1939,11 @@ int ioc_inode_dump (xlator_t *this, inode_t *inode) { - char *path = NULL; + char *path = NULL; int ret = -1; char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; uint64_t tmp_ioc_inode = 0; - ioc_inode_t *ioc_inode = NULL; + ioc_inode_t *ioc_inode = NULL; gf_boolean_t section_added = _gf_false; char uuid_str[64] = {0,}; @@ -1930,9 +1957,6 @@ ioc_inode_dump (xlator_t *this, inode_t *inode) if (ioc_inode == NULL) goto out; - gf_proc_dump_add_section (key_prefix); - section_added = _gf_true; - /* Similar to ioc_page_dump function its better to use * pthread_mutex_trylock and not to use gf_log in statedump * to avoid deadlocks. @@ -1940,24 +1964,30 @@ ioc_inode_dump (xlator_t *this, inode_t *inode) ret = pthread_mutex_trylock (&ioc_inode->inode_lock); if (ret) goto out; - else + { - gf_proc_dump_write ("inode.weight", "%d", ioc_inode->weight); + if (uuid_is_null (ioc_inode->inode->gfid)) + goto unlock; + + gf_proc_dump_add_section (key_prefix); + section_added = _gf_true; - //inode_path takes blocking lock on the itable. __inode_path (ioc_inode->inode, NULL, &path); + gf_proc_dump_write ("inode.weight", "%d", ioc_inode->weight); + if (path) { gf_proc_dump_write ("path", "%s", path); GF_FREE (path); } + gf_proc_dump_write ("uuid", "%s", uuid_utoa_r (ioc_inode->inode->gfid, uuid_str)); __ioc_cache_dump (ioc_inode, key_prefix); __ioc_inode_waitq_dump (ioc_inode, key_prefix); - - pthread_mutex_unlock (&ioc_inode->inode_lock); } +unlock: + pthread_mutex_unlock (&ioc_inode->inode_lock); out: if (ret && ioc_inode) { @@ -2072,6 +2102,7 @@ struct xlator_fops fops = { .readdirp = ioc_readdirp, .discard = ioc_discard, + .zerofill = ioc_zerofill, }; diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index c6bdc3754..bbcf4ed26 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -309,6 +309,7 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub) case GF_FOP_RCHECKSUM: case GF_FOP_FALLOCATE: case GF_FOP_DISCARD: + case GF_FOP_ZEROFILL: pri = IOT_PRI_LO; break; @@ -323,9 +324,9 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub) break; } out: - ret = do_iot_schedule (this->private, stub, pri); gf_log (this->name, GF_LOG_DEBUG, "%s scheduled as %s fop", gf_fop_list[stub->fop], iot_get_pri_meaning (pri)); + ret = do_iot_schedule (this->private, stub, pri); return ret; } @@ -2510,6 +2511,55 @@ out: } int +iot_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preop, struct iatt *postop, dict_t *xdata) +{ + STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, preop, postop, + xdata); + return 0; +} + +int +iot_zerofill_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, + off_t offset, size_t len, dict_t *xdata) +{ + STACK_WIND (frame, iot_zerofill_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->zerofill, fd, offset, len, xdata); + return 0; +} + +int +iot_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + call_stub_t *stub = NULL; + int ret = -1; + + stub = fop_zerofill_stub(frame, iot_zerofill_wrapper, fd, + offset, len, xdata); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot create zerofill stub" + "(out of memory)"); + ret = -ENOMEM; + goto out; + } + + ret = iot_schedule (frame, this, stub); + +out: + if (ret < 0) { + STACK_UNWIND_STRICT (zerofill, frame, -1, -ret, NULL, NULL, + NULL); + if (stub != NULL) { + call_stub_destroy (stub); + } + } + return 0; +} + + +int __iot_workers_scale (iot_conf_t *conf) { int scale = 0; @@ -2534,7 +2584,7 @@ __iot_workers_scale (iot_conf_t *conf) while (diff) { diff --; - ret = pthread_create (&thread, &conf->w_attr, iot_worker, conf); + ret = gf_thread_create (&thread, &conf->w_attr, iot_worker, conf); if (ret == 0) { conf->curr_count++; gf_log (conf->this->name, GF_LOG_DEBUG, @@ -2840,6 +2890,7 @@ struct xlator_fops fops = { .rchecksum = iot_rchecksum, .fallocate = iot_fallocate, .discard = iot_discard, + .zerofill = iot_zerofill, }; struct xlator_cbks cbks; diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c index 2f52cbe58..84c363ad9 100644 --- a/xlators/performance/md-cache/src/md-cache.c +++ b/xlators/performance/md-cache/src/md-cache.c @@ -18,6 +18,7 @@ #include "dict.h" #include "xlator.h" #include "md-cache-mem-types.h" +#include "glusterfs-acl.h" #include <assert.h> #include <sys/time.h> @@ -42,12 +43,12 @@ static struct mdc_key { int check; } mdc_keys[] = { { - .name = "system.posix_acl_access", + .name = POSIX_ACL_ACCESS_XATTR, .load = 0, .check = 1, }, { - .name = "system.posix_acl_default", + .name = POSIX_ACL_DEFAULT_XATTR, .load = 0, .check = 1, }, @@ -132,6 +133,7 @@ struct mdc_local { loc_t loc2; fd_t *fd; char *linkname; + char *key; dict_t *xattr; }; @@ -174,7 +176,7 @@ __mdc_inode_ctx_set (xlator_t *this, inode_t *inode, struct md_cache *mdc) uint64_t mdc_int = 0; mdc_int = (long) mdc; - ret = __inode_ctx_set2 (inode, this, &mdc_int, 0); + ret = __inode_ctx_set (inode, this, &mdc_int); return ret; } @@ -229,6 +231,8 @@ mdc_local_wipe (xlator_t *this, mdc_local_t *local) GF_FREE (local->linkname); + GF_FREE (local->key); + if (local->xattr) dict_unref (local->xattr); @@ -585,6 +589,31 @@ out: int +mdc_inode_xatt_unset (xlator_t *this, inode_t *inode, char *name) +{ + int ret = -1; + struct md_cache *mdc = NULL; + + mdc = mdc_inode_prep (this, inode); + if (!mdc) + goto out; + + if (!name) + goto out; + + LOCK (&mdc->lock); + { + dict_del (mdc->xattr, name); + } + UNLOCK (&mdc->lock); + + ret = 0; +out: + return ret; +} + + +int mdc_inode_xatt_get (xlator_t *this, inode_t *inode, dict_t **dict) { int ret = -1; @@ -616,6 +645,46 @@ out: } +int +mdc_inode_iatt_invalidate (xlator_t *this, inode_t *inode) +{ + int ret = -1; + struct md_cache *mdc = NULL; + + if (mdc_inode_ctx_get (this, inode, &mdc) != 0) + goto out; + + LOCK (&mdc->lock); + { + mdc->ia_time = 0; + } + UNLOCK (&mdc->lock); + +out: + return ret; +} + + +int +mdc_inode_xatt_invalidate (xlator_t *this, inode_t *inode) +{ + int ret = -1; + struct md_cache *mdc = NULL; + + if (mdc_inode_ctx_get (this, inode, &mdc) != 0) + goto out; + + LOCK (&mdc->lock); + { + mdc->xa_time = 0; + } + UNLOCK (&mdc->lock); + +out: + return ret; +} + + void mdc_load_reqs (xlator_t *this, dict_t *dict) { @@ -731,6 +800,13 @@ mdc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, if (!local) goto uncached; + if (!loc->name) + /* A nameless discovery is dangerous to cache. We + perform nameless lookup with the intention of + re-establishing an inode "properly" + */ + goto uncached; + loc_copy (&local->loc, loc); ret = mdc_inode_iatt_get (this, loc->inode, &stbuf); @@ -1579,6 +1655,8 @@ mdc_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, mdc_inode_xatt_update (this, local->loc.inode, local->xattr); + mdc_inode_iatt_invalidate (this, local->loc.inode); + out: MDC_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); @@ -1620,6 +1698,7 @@ mdc_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, mdc_inode_xatt_update (this, local->fd->inode, local->xattr); + mdc_inode_iatt_invalidate (this, local->fd->inode); out: MDC_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata); @@ -1689,7 +1768,7 @@ mdc_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, if (ret != 0) goto uncached; - if (!xattr || dict_get (xattr, (char *)key)) { + if (!xattr || !dict_get (xattr, (char *)key)) { ret = -1; op_errno = ENODATA; } @@ -1751,7 +1830,7 @@ mdc_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, if (ret != 0) goto uncached; - if (!xattr || dict_get (xattr, (char *)key)) { + if (!xattr || !dict_get (xattr, (char *)key)) { ret = -1; op_errno = ENODATA; } @@ -1767,6 +1846,97 @@ uncached: return 0; } +int +mdc_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + if (local->key) + mdc_inode_xatt_unset (this, local->loc.inode, local->key); + else + mdc_inode_xatt_invalidate (this, local->loc.inode); + + mdc_inode_iatt_invalidate (this, local->loc.inode); +out: + MDC_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata); + + return 0; +} + + +int +mdc_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + loc_copy (&local->loc, loc); + + local->key = gf_strdup (name); + + STACK_WIND (frame, mdc_removexattr_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->removexattr, + loc, name, xdata); + return 0; +} + + +int +mdc_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + if (local->key) + mdc_inode_xatt_unset (this, local->fd->inode, local->key); + else + mdc_inode_xatt_invalidate (this, local->fd->inode); + + mdc_inode_iatt_invalidate (this, local->fd->inode); +out: + MDC_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata); + + return 0; +} + + +int +mdc_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + local->fd = fd_ref (fd); + + local->key = gf_strdup (name); + + STACK_WIND (frame, mdc_fremovexattr_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->fremovexattr, + fd, name, xdata); + return 0; +} + int mdc_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, @@ -1928,6 +2098,46 @@ int mdc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, } int +mdc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); + +out: + MDC_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + +int mdc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + mdc_local_t *local; + + local = mdc_local_get(frame); + local->fd = fd_ref(fd); + + STACK_WIND(frame, mdc_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, + xdata); + + return 0; +} + + +int mdc_forget (xlator_t *this, inode_t *inode) { mdc_inode_wipe (this, inode); @@ -2053,10 +2263,13 @@ struct xlator_fops fops = { .fsetxattr = mdc_fsetxattr, .getxattr = mdc_getxattr, .fgetxattr = mdc_fgetxattr, + .removexattr = mdc_removexattr, + .fremovexattr= mdc_fremovexattr, .readdirp = mdc_readdirp, .readdir = mdc_readdir, .fallocate = mdc_fallocate, .discard = mdc_discard, + .zerofill = mdc_zerofill, }; diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c index 26b684e88..7e5b57278 100644 --- a/xlators/performance/open-behind/src/open-behind.c +++ b/xlators/performance/open-behind/src/open-behind.c @@ -720,6 +720,26 @@ err: } int +ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + call_stub_t *stub; + + stub = fop_zerofill_stub(frame, default_zerofill_resume, fd, + offset, len, xdata); + if (!stub) + goto err; + + open_and_resume(this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; +} + + +int ob_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, dict_t *xdata) { @@ -734,6 +754,8 @@ ob_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, fd = fd_lookup (loc->inode, 0); open_and_resume (this, fd, stub); + if (fd) + fd_unref (fd); return 0; err: @@ -758,6 +780,8 @@ ob_rename (call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst, fd = fd_lookup (dst->inode, 0); open_and_resume (this, fd, stub); + if (fd) + fd_unref (fd); return 0; err: @@ -942,6 +966,7 @@ struct xlator_fops fops = { .fsetattr = ob_fsetattr, .fallocate = ob_fallocate, .discard = ob_discard, + .zerofill = ob_zerofill, .unlink = ob_unlink, .rename = ob_rename, .lk = ob_lk, diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c index 241fa477f..069ab1f1a 100644 --- a/xlators/performance/read-ahead/src/read-ahead.c +++ b/xlators/performance/read-ahead/src/read-ahead.c @@ -993,6 +993,57 @@ unwind: } int +ra_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + GF_ASSERT (frame); + + STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + return 0; +} + +static int +ra_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + ra_file_t *file = NULL; + fd_t *iter_fd = NULL; + inode_t *inode = NULL; + uint64_t tmp_file = 0; + int32_t op_errno = EINVAL; + + GF_ASSERT (frame); + GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); + GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); + + inode = fd->inode; + + LOCK (&inode->lock); + { + list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { + fd_ctx_get (iter_fd, this, &tmp_file); + file = (ra_file_t *)(long)tmp_file; + if (!file) + continue; + + flush_region(frame, file, offset, len, 1); + } + } + UNLOCK (&inode->lock); + + STACK_WIND (frame, ra_zerofill_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->zerofill, fd, + offset, len, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT (zerofill, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + +int ra_priv_dump (xlator_t *this) { ra_conf_t *conf = NULL; @@ -1173,6 +1224,7 @@ struct xlator_fops fops = { .ftruncate = ra_ftruncate, .fstat = ra_fstat, .discard = ra_discard, + .zerofill = ra_zerofill, }; struct xlator_cbks cbks = { diff --git a/xlators/performance/readdir-ahead/Makefile.am b/xlators/performance/readdir-ahead/Makefile.am new file mode 100644 index 000000000..a985f42a8 --- /dev/null +++ b/xlators/performance/readdir-ahead/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/performance/readdir-ahead/src/Makefile.am b/xlators/performance/readdir-ahead/src/Makefile.am new file mode 100644 index 000000000..cdabd1428 --- /dev/null +++ b/xlators/performance/readdir-ahead/src/Makefile.am @@ -0,0 +1,15 @@ +xlator_LTLIBRARIES = readdir-ahead.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance + +readdir_ahead_la_LDFLAGS = -module -avoidversion + +readdir_ahead_la_SOURCES = readdir-ahead.c +readdir_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = readdir-ahead.h readdir-ahead-mem-types.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h new file mode 100644 index 000000000..39e2c5369 --- /dev/null +++ b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h @@ -0,0 +1,24 @@ +/* + Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef __RDA_MEM_TYPES_H__ +#define __RDA_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_rda_mem_types_ { + gf_rda_mt_rda_local = gf_common_mt_end + 1, + gf_rda_mt_rda_fd_ctx, + gf_rda_mt_rda_priv, + gf_rda_mt_end +}; + +#endif diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c new file mode 100644 index 000000000..53e6756f0 --- /dev/null +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c @@ -0,0 +1,560 @@ +/* + Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +/* + * performance/readdir-ahead preloads a local buffer with directory entries + * on opendir. The optimization involves using maximum sized gluster rpc + * requests (128k) to minimize overhead of smaller client requests. + * + * For example, fuse currently supports a maximum readdir buffer of 4k + * (regardless of the filesystem client's buffer size). readdir-ahead should + * effectively convert these smaller requests into fewer, larger sized requests + * for simple, sequential workloads (i.e., ls). + * + * The translator is currently designed to handle the simple, sequential case + * only. If a non-sequential directory read occurs, readdir-ahead disables + * preloads on the directory. + */ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "xlator.h" +#include "call-stub.h" +#include "readdir-ahead.h" +#include "readdir-ahead-mem-types.h" +#include "defaults.h" + +static int rda_fill_fd(call_frame_t *, xlator_t *, fd_t *); + +/* + * Get (or create) the fd context for storing prepopulated directory + * entries. + */ +static struct +rda_fd_ctx *get_rda_fd_ctx(fd_t *fd, xlator_t *this) +{ + uint64_t val; + struct rda_fd_ctx *ctx; + + LOCK(&fd->lock); + + if (__fd_ctx_get(fd, this, &val) < 0) { + ctx = GF_CALLOC(1, sizeof(struct rda_fd_ctx), + gf_rda_mt_rda_fd_ctx); + if (!ctx) + goto out; + + LOCK_INIT(&ctx->lock); + INIT_LIST_HEAD(&ctx->entries.list); + ctx->state = RDA_FD_NEW; + /* ctx offset values initialized to 0 */ + + if (__fd_ctx_set(fd, this, (uint64_t) ctx) < 0) { + GF_FREE(ctx); + ctx = NULL; + goto out; + } + } else { + ctx = (struct rda_fd_ctx *) val; + } +out: + UNLOCK(&fd->lock); + return ctx; +} + +/* + * Reset the tracking state of the context. + */ +static void +rda_reset_ctx(struct rda_fd_ctx *ctx) +{ + ctx->state = RDA_FD_NEW; + ctx->cur_offset = 0; + ctx->cur_size = 0; + ctx->next_offset = 0; + gf_dirent_free(&ctx->entries); +} + +/* + * Check whether we can handle a request. Offset verification is done by the + * caller, so we only check whether the preload buffer has completion status + * (including an error) or has some data to return. + */ +static gf_boolean_t +rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size) +{ + if ((ctx->state & RDA_FD_EOD) || + (ctx->state & RDA_FD_ERROR) || + (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0))) + return _gf_true; + + return _gf_false; +} + +/* + * Serve a request from the fd dentry list based on the size of the request + * buffer. ctx must be locked. + */ +static int32_t +__rda_serve_readdirp(xlator_t *this, gf_dirent_t *entries, size_t request_size, + struct rda_fd_ctx *ctx) +{ + gf_dirent_t *dirent, *tmp; + size_t dirent_size, size = 0; + int32_t count = 0; + struct rda_priv *priv = this->private; + + list_for_each_entry_safe(dirent, tmp, &ctx->entries.list, list) { + dirent_size = gf_dirent_size(dirent->d_name); + if (size + dirent_size > request_size) + break; + + size += dirent_size; + list_del_init(&dirent->list); + ctx->cur_size -= dirent_size; + + list_add_tail(&dirent->list, &entries->list); + ctx->cur_offset = dirent->d_off; + count++; + } + + if (ctx->cur_size <= priv->rda_low_wmark) + ctx->state |= RDA_FD_PLUGGED; + + return count; +} + +static int32_t +rda_readdirp_stub(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) +{ + gf_dirent_t entries; + int32_t ret; + struct rda_fd_ctx *ctx; + int op_errno = 0; + + ctx = get_rda_fd_ctx(fd, this); + INIT_LIST_HEAD(&entries.list); + ret = __rda_serve_readdirp(this, &entries, size, ctx); + + if (!ret && (ctx->state & RDA_FD_ERROR)) { + ret = -1; + op_errno = ctx->op_errno; + ctx->state &= ~RDA_FD_ERROR; + + /* + * the preload has stopped running in the event of an error, so + * pass all future requests along + */ + ctx->state |= RDA_FD_BYPASS; + } + + STACK_UNWIND_STRICT(readdirp, frame, ret, op_errno, &entries, xdata); + gf_dirent_free(&entries); + + return 0; +} + +static int32_t +rda_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + struct rda_fd_ctx *ctx; + call_stub_t *stub; + int fill = 0; + + ctx = get_rda_fd_ctx(fd, this); + if (!ctx) + goto err; + + if (ctx->state & RDA_FD_BYPASS) + goto bypass; + + LOCK(&ctx->lock); + + /* recheck now that we have the lock */ + if (ctx->state & RDA_FD_BYPASS) { + UNLOCK(&ctx->lock); + goto bypass; + } + + /* + * If a new read comes in at offset 0 and the buffer has been + * completed, reset the context and kickstart the filler again. + */ + if (!off && (ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)) { + rda_reset_ctx(ctx); + fill = 1; + } + + /* + * If a readdir occurs at an unexpected offset or we already have a + * request pending, admit defeat and just get out of the way. + */ + if (off != ctx->cur_offset || ctx->stub) { + ctx->state |= RDA_FD_BYPASS; + UNLOCK(&ctx->lock); + goto bypass; + } + + stub = fop_readdirp_stub(frame, rda_readdirp_stub, fd, size, off, xdata); + if (!stub) { + UNLOCK(&ctx->lock); + goto err; + } + + /* + * If we haven't bypassed the preload, this means we can either serve + * the request out of the preload or the request that enables us to do + * so is in flight... + */ + if (rda_can_serve_readdirp(ctx, size)) + call_resume(stub); + else + ctx->stub = stub; + + UNLOCK(&ctx->lock); + + if (fill) + rda_fill_fd(frame, this, fd); + + return 0; + +bypass: + STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata); + return 0; + +err: + STACK_UNWIND_STRICT(readdirp, frame, -1, ENOMEM, NULL, NULL); + return 0; +} + +static int32_t +rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + gf_dirent_t *dirent, *tmp; + struct rda_local *local = frame->local; + struct rda_fd_ctx *ctx = local->ctx; + struct rda_priv *priv = this->private; + int fill = 1; + + LOCK(&ctx->lock); + + /* Verify that the preload buffer is still pending on this data. */ + if (ctx->next_offset != local->offset) { + gf_log(this->name, GF_LOG_ERROR, + "Out of sequence directory preload."); + ctx->state |= (RDA_FD_BYPASS|RDA_FD_ERROR); + ctx->op_errno = EUCLEAN; + + goto out; + } + + if (entries) { + list_for_each_entry_safe(dirent, tmp, &entries->list, list) { + list_del_init(&dirent->list); + /* must preserve entry order */ + list_add_tail(&dirent->list, &ctx->entries.list); + + ctx->cur_size += gf_dirent_size(dirent->d_name); + ctx->next_offset = dirent->d_off; + } + } + + if (ctx->cur_size >= priv->rda_high_wmark) + ctx->state &= ~RDA_FD_PLUGGED; + + if (!op_ret) { + /* we've hit eod */ + ctx->state &= ~RDA_FD_RUNNING; + ctx->state |= RDA_FD_EOD; + } else if (op_ret == -1) { + /* kill the preload and pend the error */ + ctx->state &= ~RDA_FD_RUNNING; + ctx->state |= RDA_FD_ERROR; + ctx->op_errno = op_errno; + } + + /* + * NOTE: The strict bypass logic in readdirp() means a pending request + * is always based on ctx->cur_offset. + */ + if (ctx->stub && + rda_can_serve_readdirp(ctx, ctx->stub->args.size)) { + call_resume(ctx->stub); + ctx->stub = NULL; + } + +out: + /* + * If we have been marked for bypass and have no pending stub, clear the + * run state so we stop preloading the context with entries. + */ + if ((ctx->state & RDA_FD_BYPASS) && !ctx->stub) + ctx->state &= ~RDA_FD_RUNNING; + + if (!(ctx->state & RDA_FD_RUNNING)) { + fill = 0; + STACK_DESTROY(ctx->fill_frame->root); + ctx->fill_frame = NULL; + } + + UNLOCK(&ctx->lock); + + if (fill) + rda_fill_fd(frame, this, local->fd); + + return 0; +} + +/* + * Start prepopulating the fd context with directory entries. + */ +static int +rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd) +{ + call_frame_t *nframe = NULL; + struct rda_local *local = NULL; + struct rda_fd_ctx *ctx; + off_t offset; + struct rda_priv *priv = this->private; + + ctx = get_rda_fd_ctx(fd, this); + if (!ctx) + goto err; + + LOCK(&ctx->lock); + + if (ctx->state & RDA_FD_NEW) { + ctx->state &= ~RDA_FD_NEW; + ctx->state |= RDA_FD_RUNNING; + if (priv->rda_low_wmark) + ctx->state |= RDA_FD_PLUGGED; + } + + offset = ctx->next_offset; + + if (!ctx->fill_frame) { + nframe = copy_frame(frame); + if (!nframe) { + UNLOCK(&ctx->lock); + goto err; + } + + local = mem_get0(this->local_pool); + if (!local) { + UNLOCK(&ctx->lock); + goto err; + } + + local->ctx = ctx; + local->fd = fd; + nframe->local = local; + + ctx->fill_frame = nframe; + } else { + nframe = ctx->fill_frame; + local = nframe->local; + } + + local->offset = offset; + + UNLOCK(&ctx->lock); + + STACK_WIND(nframe, rda_fill_fd_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, priv->rda_req_size, + offset, NULL); + + return 0; + +err: + if (nframe) + FRAME_DESTROY(nframe); + + return -1; +} + +static int32_t +rda_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + if (!op_ret) + rda_fill_fd(frame, this, fd); + + STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +static int32_t +rda_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) +{ + STACK_WIND(frame, rda_opendir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); + return 0; +} + +static int32_t +rda_releasedir(xlator_t *this, fd_t *fd) +{ + uint64_t val; + struct rda_fd_ctx *ctx; + + if (fd_ctx_del(fd, this, &val) < 0) + return -1; + + ctx = (struct rda_fd_ctx *) val; + if (!ctx) + return 0; + + rda_reset_ctx(ctx); + + if (ctx->fill_frame) + STACK_DESTROY(ctx->fill_frame->root); + + if (ctx->stub) + gf_log(this->name, GF_LOG_ERROR, + "released a directory with a pending stub"); + + GF_FREE(ctx); + return 0; +} + +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + if (!this) + goto out; + + ret = xlator_mem_acct_init(this, gf_rda_mt_end + 1); + + if (ret != 0) + gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + +out: + return ret; +} + +int +reconfigure(xlator_t *this, dict_t *options) +{ + struct rda_priv *priv = this->private; + + GF_OPTION_RECONF("rda-request-size", priv->rda_req_size, options, + uint32, err); + GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size, + err); + GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options, size, + err); + + return 0; +err: + return -1; +} + +int +init(xlator_t *this) +{ + struct rda_priv *priv = NULL; + + GF_VALIDATE_OR_GOTO("readdir-ahead", this, err); + + if (!this->children || this->children->next) { + gf_log(this->name, GF_LOG_ERROR, + "FATAL: readdir-ahead not configured with exactly one" + " child"); + goto err; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + priv = GF_CALLOC(1, sizeof(struct rda_priv), gf_rda_mt_rda_priv); + if (!priv) + goto err; + this->private = priv; + + this->local_pool = mem_pool_new(struct rda_local, 32); + if (!this->local_pool) + goto err; + + GF_OPTION_INIT("rda-request-size", priv->rda_req_size, uint32, err); + GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size, err); + GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size, err); + + return 0; + +err: + if (this->local_pool) + mem_pool_destroy(this->local_pool); + if (priv) + GF_FREE(priv); + + return -1; +} + + +void +fini(xlator_t *this) +{ + GF_VALIDATE_OR_GOTO ("readdir-ahead", this, out); + + GF_FREE(this->private); + +out: + return; +} + +struct xlator_fops fops = { + .opendir = rda_opendir, + .readdirp = rda_readdirp, +}; + +struct xlator_cbks cbks = { + .releasedir = rda_releasedir, +}; + +struct volume_options options[] = { + { .key = {"rda-request-size"}, + .type = GF_OPTION_TYPE_INT, + .min = 4096, + .max = 131072, + .default_value = "131072", + .description = "readdir-ahead request size", + }, + { .key = {"rda-low-wmark"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 0, + .max = 10 * GF_UNIT_MB, + .default_value = "4096", + .description = "the value under which we plug", + }, + { .key = {"rda-high-wmark"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 0, + .max = 100 * GF_UNIT_MB, + .default_value = "131072", + .description = "the value over which we unplug", + }, + { .key = {NULL} }, +}; + diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.h b/xlators/performance/readdir-ahead/src/readdir-ahead.h new file mode 100644 index 000000000..e48786dae --- /dev/null +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.h @@ -0,0 +1,46 @@ +/* + Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __READDIR_AHEAD_H +#define __READDIR_AHEAD_H + +/* state flags */ +#define RDA_FD_NEW (1 << 0) +#define RDA_FD_RUNNING (1 << 1) +#define RDA_FD_EOD (1 << 2) +#define RDA_FD_ERROR (1 << 3) +#define RDA_FD_BYPASS (1 << 4) +#define RDA_FD_PLUGGED (1 << 5) + +struct rda_fd_ctx { + off_t cur_offset; /* current head of the ctx */ + size_t cur_size; /* current size of the preload */ + off_t next_offset; /* tail of the ctx */ + uint32_t state; + gf_lock_t lock; + gf_dirent_t entries; + call_frame_t *fill_frame; + call_stub_t *stub; + int op_errno; +}; + +struct rda_local { + struct rda_fd_ctx *ctx; + fd_t *fd; + off_t offset; +}; + +struct rda_priv { + uint32_t rda_req_size; + uint64_t rda_low_wmark; + uint64_t rda_high_wmark; +}; + +#endif /* __READDIR_AHEAD_H */ diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c index 6a2fcc40f..95c5921c6 100644 --- a/xlators/performance/write-behind/src/write-behind.c +++ b/xlators/performance/write-behind/src/write-behind.c @@ -213,11 +213,7 @@ wb_fd_err (fd_t *fd, xlator_t *this, int32_t *op_errno) int32_t tmp = 0; if (fd_ctx_get (fd, this, &value) == 0) { - if (value != EBADF) { - fd_ctx_set (fd, this, EBADF); - } - - if (op_errno != NULL) { + if (op_errno) { tmp = value; *op_errno = tmp; } @@ -749,7 +745,7 @@ wb_fulfill_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } while (0) -void +int wb_fulfill_head (wb_inode_t *wb_inode, wb_request_t *head) { struct iovec vector[MAX_VECTOR_COUNT]; @@ -799,22 +795,23 @@ wb_fulfill_head (wb_inode_t *wb_inode, wb_request_t *head) head->stub->args.flags, head->stub->args.iobref, NULL); - return; + return 0; err: if (!fderr) { /* frame creation failure */ - wb_fulfill_err (head, ENOMEM); + fderr = ENOMEM; + wb_fulfill_err (head, fderr); } wb_head_done (head); - return; + return fderr; } #define NEXT_HEAD(head, req) do { \ if (head) \ - wb_fulfill_head (wb_inode, head); \ + ret |= wb_fulfill_head (wb_inode, head); \ head = req; \ expected_offset = req->stub->args.offset + \ req->write_size; \ @@ -823,7 +820,7 @@ err: } while (0) -void +int wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities) { wb_request_t *req = NULL; @@ -833,6 +830,7 @@ wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities) off_t expected_offset = 0; size_t curr_aggregate = 0; size_t vector_count = 0; + int ret = 0; conf = wb_inode->this->private; @@ -876,8 +874,9 @@ wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities) } if (head) - wb_fulfill_head (wb_inode, head); - return; + ret |= wb_fulfill_head (wb_inode, head); + + return ret; } @@ -1162,27 +1161,35 @@ wb_process_queue (wb_inode_t *wb_inode) list_head_t tasks = {0, }; list_head_t lies = {0, }; list_head_t liabilities = {0, }; + int retry = 0; INIT_LIST_HEAD (&tasks); INIT_LIST_HEAD (&lies); INIT_LIST_HEAD (&liabilities); - LOCK (&wb_inode->lock); - { - __wb_preprocess_winds (wb_inode); + do { + LOCK (&wb_inode->lock); + { + __wb_preprocess_winds (wb_inode); - __wb_pick_winds (wb_inode, &tasks, &liabilities); + __wb_pick_winds (wb_inode, &tasks, &liabilities); - __wb_pick_unwinds (wb_inode, &lies); + __wb_pick_unwinds (wb_inode, &lies); - } - UNLOCK (&wb_inode->lock); + } + UNLOCK (&wb_inode->lock); - wb_do_unwinds (wb_inode, &lies); + wb_do_unwinds (wb_inode, &lies); - wb_do_winds (wb_inode, &tasks); + wb_do_winds (wb_inode, &tasks); - wb_fulfill (wb_inode, &liabilities); + /* fd might've been marked bad due to previous errors. + * Since, caller of wb_process_queue might be the last fop on + * inode, make sure we keep processing request queue, till there + * are no requests left. + */ + retry = wb_fulfill (wb_inode, &liabilities); + } while (retry); return; } @@ -1413,7 +1420,7 @@ wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) if (!wb_enqueue (wb_inode, stub)) goto unwind; - wb_process_queue (wb_inode); + wb_process_queue (wb_inode); return 0; |
