diff options
Diffstat (limited to 'xlators/performance')
42 files changed, 7627 insertions, 10904 deletions
diff --git a/xlators/performance/Makefile.am b/xlators/performance/Makefile.am index e91d5f6ef..a494190ba 100644 --- a/xlators/performance/Makefile.am +++ b/xlators/performance/Makefile.am @@ -1,3 +1,3 @@ -SUBDIRS = write-behind read-ahead io-threads io-cache symlink-cache quick-read stat-prefetch +SUBDIRS = write-behind read-ahead readdir-ahead io-threads io-cache symlink-cache quick-read md-cache open-behind CLEANFILES = diff --git a/xlators/performance/io-cache/src/Makefile.am b/xlators/performance/io-cache/src/Makefile.am index 6dd270e8f..155be9988 100644 --- a/xlators/performance/io-cache/src/Makefile.am +++ b/xlators/performance/io-cache/src/Makefile.am @@ -1,14 +1,16 @@ xlator_LTLIBRARIES = io-cache.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance -io_cache_la_LDFLAGS = -module -avoidversion +io_cache_la_LDFLAGS = -module -avoid-version io_cache_la_SOURCES = io-cache.c page.c ioc-inode.c io_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la noinst_HEADERS = io-cache.h ioc-mem-types.h -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ - -I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/rbtree -shared -nostartfiles $(GF_CFLAGS) +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(CONTRIBDIR)/rbtree + +AM_CFLAGS = -Wall $(GF_CFLAGS) CLEANFILES = diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c index 90c14ea7d..201777b38 100644 --- a/xlators/performance/io-cache/src/io-cache.c +++ b/xlators/performance/io-cache/src/io-cache.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef _CONFIG_H @@ -37,13 +28,10 @@ int ioc_log2_page_size; uint32_t ioc_get_priority (ioc_table_t *table, const char *path); -uint32_t -ioc_get_priority (ioc_table_t *table, const char *path); - struct volume_options options[]; -inline uint32_t +static inline uint32_t ioc_hashfn (void *data, int len) { off_t offset; @@ -53,7 +41,7 @@ ioc_hashfn (void *data, int len) return (offset >> ioc_log2_page_size); } -inline ioc_inode_t * +static inline ioc_inode_t * ioc_inode_reupdate (ioc_inode_t *ioc_inode) { ioc_table_t *table = NULL; @@ -66,7 +54,7 @@ ioc_inode_reupdate (ioc_inode_t *ioc_inode) return ioc_inode; } -inline ioc_inode_t * +static inline ioc_inode_t * ioc_get_inode (dict_t *dict, char *name) { ioc_inode_t *ioc_inode = NULL; @@ -157,15 +145,16 @@ ioc_inode_flush (ioc_inode_t *ioc_inode) int32_t ioc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop) + struct iatt *preop, struct iatt *postop, dict_t *xdata) { - STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop); + STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, + xdata); return 0; } int32_t ioc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid) + struct iatt *stbuf, int32_t valid, dict_t *xdata) { uint64_t ioc_inode = 0; @@ -177,7 +166,7 @@ ioc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); STACK_WIND (frame, ioc_setattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid); + FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata); return 0; } @@ -185,7 +174,7 @@ ioc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t ioc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *stbuf, dict_t *dict, struct iatt *postparent) + struct iatt *stbuf, dict_t *xdata, struct iatt *postparent) { ioc_inode_t *ioc_inode = NULL; ioc_table_t *table = NULL; @@ -264,19 +253,18 @@ out: } STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, stbuf, - dict, postparent); + xdata, postparent); return 0; } int32_t ioc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xattr_req) + dict_t *xdata) { ioc_local_t *local = NULL; int32_t op_errno = -1, ret = -1; - local = GF_CALLOC (1, sizeof (*local), - gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { op_errno = ENOMEM; gf_log (this->name, GF_LOG_ERROR, "out of memory"); @@ -293,7 +281,7 @@ ioc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, frame->local = local; STACK_WIND (frame, ioc_lookup_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->lookup, loc, xattr_req); + FIRST_CHILD (this)->fops->lookup, loc, xdata); return 0; @@ -325,6 +313,20 @@ ioc_forget (xlator_t *this, inode_t *inode) return 0; } +static int32_t +ioc_invalidate(xlator_t *this, inode_t *inode) +{ + uint64_t ioc_addr = 0; + ioc_inode_t *ioc_inode = NULL; + + inode_ctx_get(inode, this, (uint64_t *) &ioc_addr); + ioc_inode = (void *) ioc_addr; + + if (ioc_inode) + ioc_inode_flush(ioc_inode); + + return 0; +} /* * ioc_cache_validate_cbk - @@ -339,7 +341,8 @@ ioc_forget (xlator_t *this, inode_t *inode) */ int32_t ioc_cache_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *stbuf) + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + dict_t *xdata) { ioc_local_t *local = NULL; ioc_inode_t *ioc_inode = NULL; @@ -455,8 +458,7 @@ ioc_cache_validate (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, int32_t ret = 0; local = frame->local; - validate_local = GF_CALLOC (1, sizeof (ioc_local_t), - gf_ioc_mt_ioc_local_t); + validate_local = mem_get0 (THIS->local_pool); if (validate_local == NULL) { ret = -1; local->op_ret = -1; @@ -471,7 +473,7 @@ ioc_cache_validate (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, ret = -1; local->op_ret = -1; local->op_errno = ENOMEM; - GF_FREE (validate_local); + mem_put (validate_local); gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR, "out of memory"); goto out; @@ -483,13 +485,13 @@ ioc_cache_validate (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, STACK_WIND (validate_frame, ioc_cache_validate_cbk, FIRST_CHILD (frame->this), - FIRST_CHILD (frame->this)->fops->fstat, fd); + FIRST_CHILD (frame->this)->fops->fstat, fd, NULL); out: return ret; } -inline uint32_t +static inline uint32_t is_match (const char *path, const char *pattern) { int32_t ret = 0; @@ -530,7 +532,7 @@ ioc_get_priority (ioc_table_t *table, const char *path) */ int32_t ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, fd_t *fd) + int32_t op_errno, fd_t *fd, dict_t *xdata) { uint64_t tmp_ioc_inode = 0; ioc_local_t *local = NULL; @@ -551,6 +553,13 @@ ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, inode_ctx_get (fd->inode, this, &tmp_ioc_inode); ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; + //TODO: see why inode context is NULL and handle it. + if (!ioc_inode) { + gf_log (this->name, GF_LOG_ERROR, "inode context is " + "NULL (%s)", uuid_utoa (fd->inode->gfid)); + goto out; + } + ioc_table_lock (ioc_inode->table); { list_move_tail (&ioc_inode->inode_lru, @@ -575,10 +584,6 @@ ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, */ fd_ctx_set (fd, this, 1); } - if ((local->wbflags & GF_OPEN_NOWB) != 0) { - /* disable caching as asked by NFS */ - fd_ctx_set (fd, this, 1); - } /* weight = 0, we disable caching on it */ if (weight == 0) { @@ -589,10 +594,10 @@ ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, } out: - GF_FREE (local); + mem_put (local); frame->local = NULL; - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd); + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); return 0; } @@ -614,7 +619,7 @@ int32_t ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { ioc_local_t *local = NULL; ioc_table_t *table = NULL; @@ -649,6 +654,10 @@ ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, || ((table->max_file_size > 0) && (table->max_file_size < ioc_inode->ia_size))) { ret = fd_ctx_set (fd, this, 1); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set fd ctx", + local->file_loc.path); } } ioc_inode_unlock (ioc_inode); @@ -657,25 +666,35 @@ ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, (uint64_t)(long)ioc_inode); /* If O_DIRECT open, we disable caching on it */ - if (local->flags & O_DIRECT) + if (local->flags & O_DIRECT) { /* * O_DIRECT is only for one fd, not the inode * as a whole */ ret = fd_ctx_set (fd, this, 1); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set fd ctx", + local->file_loc.path); + } /* if weight == 0, we disable caching on it */ - if (!weight) + if (!weight) { /* we allow a pattern-matched cache disable this way */ ret = fd_ctx_set (fd, this, 1); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set fd ctx", + local->file_loc.path); + } } out: frame->local = NULL; - GF_FREE (local); + mem_put (local); STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, - preparent, postparent); + preparent, postparent, xdata); return 0; } @@ -685,7 +704,7 @@ int32_t ioc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { ioc_local_t *local = NULL; ioc_table_t *table = NULL; @@ -725,23 +744,22 @@ out: frame->local = NULL; loc_wipe (&local->file_loc); - GF_FREE (local); + mem_put (local); STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf, - preparent, postparent); + preparent, postparent, xdata); return 0; } int ioc_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, dict_t *params) + dev_t rdev, mode_t umask, dict_t *xdata) { ioc_local_t *local = NULL; int32_t op_errno = -1, ret = -1; - local = GF_CALLOC (1, sizeof (*local), - gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { op_errno = ENOMEM; gf_log (this->name, GF_LOG_ERROR, "out of memory"); @@ -760,17 +778,17 @@ ioc_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, STACK_WIND (frame, ioc_mknod_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, - loc, mode, rdev, params); + loc, mode, rdev, umask, xdata); return 0; unwind: if (local != NULL) { loc_wipe (&local->file_loc); - GF_FREE (local); + mem_put (local); } STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL, - NULL, NULL); + NULL, NULL, NULL); return 0; } @@ -786,27 +804,27 @@ unwind: */ int32_t ioc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, int32_t wbflags) + fd_t *fd, dict_t *xdata) { ioc_local_t *local = NULL; - local = GF_CALLOC (1, sizeof (ioc_local_t), gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { gf_log (this->name, GF_LOG_ERROR, "out of memory"); - STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL); + STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL, NULL); return 0; } local->flags = flags; local->file_loc.path = loc->path; local->file_loc.inode = loc->inode; - local->wbflags = wbflags; frame->local = local; STACK_WIND (frame, ioc_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags); + FIRST_CHILD(this)->fops->open, loc, flags, fd, + xdata); return 0; } @@ -823,15 +841,15 @@ ioc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, */ int32_t ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, fd_t *fd, dict_t *params) + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { ioc_local_t *local = NULL; - local = GF_CALLOC (1, sizeof (ioc_local_t), gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { gf_log (this->name, GF_LOG_ERROR, "out of memory"); STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, NULL, NULL, - NULL, NULL, NULL); + NULL, NULL, NULL, NULL); return 0; } @@ -841,7 +859,7 @@ ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, STACK_WIND (frame, ioc_create_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, loc, flags, mode, - fd, params); + umask, fd, xdata); return 0; } @@ -878,10 +896,10 @@ int32_t ioc_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, int32_t count, struct iatt *stbuf, - struct iobref *iobref) + struct iobref *iobref, dict_t *xdata) { STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, - stbuf, iobref); + stbuf, iobref, xdata); return 0; } @@ -988,7 +1006,8 @@ ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, "cache hit for trav_offset=%" PRId64"/local_offset=%"PRId64"", trav_offset, local_offset); - waitq = __ioc_page_wakeup (trav); + waitq = __ioc_page_wakeup (trav, + trav->op_errno); } else { /* if waitq already exists, fstat * revalidate is @@ -1005,7 +1024,8 @@ ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, local->op_errno = -ret; need_validate = 0; - waitq = __ioc_page_wakeup (trav); + waitq = __ioc_page_wakeup (trav, + trav->op_errno); ioc_inode_unlock (ioc_inode); ioc_waitq_return (waitq); @@ -1031,13 +1051,14 @@ ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, need_validate = 0; gf_log (frame->this->name, GF_LOG_TRACE, "sending validate request for " - "inode(%"PRId64") at offset=%"PRId64"", - fd->inode->ino, trav_offset); + "inode(%s) at offset=%"PRId64"", + uuid_utoa (fd->inode->gfid), trav_offset); ret = ioc_cache_validate (frame, ioc_inode, fd, trav); if (ret == -1) { ioc_inode_lock (ioc_inode); { - waitq = __ioc_page_wakeup (trav); + waitq = __ioc_page_wakeup (trav, + trav->op_errno); } ioc_inode_unlock (ioc_inode); @@ -1073,14 +1094,13 @@ out: */ int32_t ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t offset) + size_t size, off_t offset, uint32_t flags, dict_t *xdata) { uint64_t tmp_ioc_inode = 0; ioc_inode_t *ioc_inode = NULL; ioc_local_t *local = NULL; uint32_t weight = 0; ioc_table_t *table = NULL; - uint32_t num_pages = 0; int32_t op_errno = -1; if (!this) { @@ -1094,7 +1114,7 @@ ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, STACK_WIND (frame, ioc_readv_disabled_cbk, FIRST_CHILD (frame->this), FIRST_CHILD (frame->this)->fops->readv, fd, size, - offset); + offset, flags, xdata); return 0; } @@ -1107,29 +1127,6 @@ ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, goto out; } - - ioc_table_lock (table); - { - if (!table->mem_pool) { - - num_pages = (table->cache_size / table->page_size) - + ((table->cache_size % table->page_size) - ? 1 : 0); - - table->mem_pool - = mem_pool_new (rbthash_entry_t, num_pages); - - if (!table->mem_pool) { - gf_log (this->name, GF_LOG_ERROR, - "Unable to allocate mem_pool"); - op_errno = ENOMEM; - ioc_table_unlock (table); - goto out; - } - } - } - ioc_table_unlock (table); - ioc_inode_lock (ioc_inode); { if (!ioc_inode->cache.page_table) { @@ -1153,12 +1150,11 @@ ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, STACK_WIND (frame, ioc_readv_disabled_cbk, FIRST_CHILD (frame->this), FIRST_CHILD (frame->this)->fops->readv, fd, size, - offset); + offset, flags, xdata); return 0; } - local = (ioc_local_t *) GF_CALLOC (1, sizeof (ioc_local_t), - gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { gf_log (this->name, GF_LOG_ERROR, "out of memory"); op_errno = ENOMEM; @@ -1191,7 +1187,8 @@ ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, return 0; out: - STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL); + STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, + NULL); return 0; } @@ -1208,7 +1205,7 @@ out: int32_t ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { ioc_local_t *local = NULL; uint64_t ioc_inode = 0; @@ -1219,7 +1216,8 @@ ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (ioc_inode) ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf); + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); return 0; } @@ -1237,16 +1235,16 @@ ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, - struct iobref *iobref) + uint32_t flags, struct iobref *iobref, dict_t *xdata) { ioc_local_t *local = NULL; uint64_t ioc_inode = 0; - local = GF_CALLOC (1, sizeof (ioc_local_t), gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { gf_log (this->name, GF_LOG_ERROR, "out of memory"); - STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL); + STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } @@ -1260,7 +1258,7 @@ ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, STACK_WIND (frame, ioc_writev_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, - iobref); + flags, iobref, xdata); return 0; } @@ -1279,11 +1277,11 @@ ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t ioc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, - postbuf); + postbuf, xdata); return 0; } @@ -1302,11 +1300,11 @@ ioc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t ioc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, - postbuf); + postbuf, xdata); return 0; } @@ -1321,7 +1319,8 @@ ioc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, * */ int32_t -ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) +ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { uint64_t ioc_inode = 0; @@ -1331,7 +1330,7 @@ ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); STACK_WIND (frame, ioc_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset); + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); return 0; } @@ -1345,7 +1344,8 @@ ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) * */ int32_t -ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) +ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { uint64_t ioc_inode = 0; @@ -1355,21 +1355,21 @@ ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); STACK_WIND (frame, ioc_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset); + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); return 0; } int32_t ioc_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct gf_flock *lock) + int32_t op_errno, struct gf_flock *lock, dict_t *xdata) { - STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock); + STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata); return 0; } int32_t ioc_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, - struct gf_flock *lock) + struct gf_flock *lock, dict_t *xdata) { ioc_inode_t *ioc_inode = NULL; uint64_t tmp_inode = 0; @@ -1379,7 +1379,7 @@ ioc_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, if (!ioc_inode) { gf_log (this->name, GF_LOG_DEBUG, "inode context is NULL: returning EBADFD"); - STACK_UNWIND_STRICT (lk, frame, -1, EBADFD, NULL); + STACK_UNWIND_STRICT (lk, frame, -1, EBADFD, NULL, NULL); return 0; } @@ -1390,11 +1390,92 @@ ioc_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, ioc_inode_unlock (ioc_inode); STACK_WIND (frame, ioc_lk_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->lk, fd, cmd, lock); + FIRST_CHILD (this)->fops->lk, fd, cmd, lock, xdata); + + return 0; +} + +int +ioc_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata) +{ + gf_dirent_t *entry = NULL; + + if (op_ret <= 0) + goto unwind; + + list_for_each_entry (entry, &entries->list, list) { + /* TODO: fill things */ + } + +unwind: + STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata); + + return 0; +} +int +ioc_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *dict) +{ + STACK_WIND (frame, ioc_readdirp_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, + fd, size, offset, dict); + + return 0; +} +static int32_t +ioc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata); + return 0; +} + +static int32_t +ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + uint64_t ioc_inode = 0; + + inode_ctx_get (fd->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND(frame, ioc_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); + return 0; +} + +static int32_t +ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + STACK_UNWIND_STRICT(zerofill, frame, op_ret, + op_errno, pre, post, xdata); return 0; } +static int32_t +ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + uint64_t ioc_inode = 0; + + inode_ctx_get (fd->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + return 0; +} + + int32_t ioc_get_priority_list (const char *opt_str, struct list_head *first) { @@ -1475,13 +1556,9 @@ ioc_get_priority_list (const char *opt_str, struct list_head *first) stripe_str = strtok_r (NULL, ",", &tmp_str); } out: - if (string != NULL) { - GF_FREE (string); - } + GF_FREE (string); - if (dup_str != NULL) { - GF_FREE (dup_str); - } + GF_FREE (dup_str); if (max_pri == -1) { list_for_each_entry_safe (curr, tmp, first, list) { @@ -1514,13 +1591,50 @@ mem_acct_init (xlator_t *this) } +static gf_boolean_t +check_cache_size_ok (xlator_t *this, uint64_t cache_size) +{ + gf_boolean_t ret = _gf_true; + uint64_t total_mem = 0; + uint64_t max_cache_size = 0; + volume_option_t *opt = NULL; + + GF_ASSERT (this); + opt = xlator_volume_option_get (this, "cache-size"); + if (!opt) { + ret = _gf_false; + gf_log (this->name, GF_LOG_ERROR, + "could not get cache-size option"); + goto out; + } + + total_mem = get_mem_size (); + if (-1 == total_mem) + max_cache_size = opt->max; + else + max_cache_size = total_mem; + + gf_log (this->name, GF_LOG_DEBUG, "Max cache size is %"PRIu64, + max_cache_size); + + if (cache_size > max_cache_size) { + ret = _gf_false; + gf_log (this->name, GF_LOG_ERROR, "Cache size %"PRIu64 + " is greater than the max size of %"PRIu64, + cache_size, max_cache_size); + goto out; + } +out: + return ret; +} + int reconfigure (xlator_t *this, dict_t *options) { data_t *data = NULL; ioc_table_t *table = NULL; int ret = -1; - + uint64_t cache_size_new = 0; if (!this || !this->private) goto out; @@ -1531,9 +1645,6 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("cache-timeout", table->cache_timeout, options, int32, unlock); - GF_OPTION_RECONF ("cache-size", table->cache_size, - options, size, unlock); - data = dict_get (options, "priority"); if (data) { char *option_list = data_to_str (data); @@ -1566,6 +1677,16 @@ reconfigure (xlator_t *this, dict_t *options) goto unlock; } + GF_OPTION_RECONF ("cache-size", cache_size_new, + options, size, unlock); + if (!check_cache_size_ok (this, cache_size_new)) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Not reconfiguring cache-size"); + goto unlock; + } + table->cache_size = cache_size_new; + ret = 0; } unlock: @@ -1589,6 +1710,7 @@ init (xlator_t *this) int32_t ret = -1; glusterfs_ctx_t *ctx = NULL; data_t *data = 0; + uint32_t num_pages = 0; xl_options = this->options; @@ -1621,6 +1743,11 @@ init (xlator_t *this) GF_OPTION_INIT ("max-file-size", table->max_file_size, size, out); + if (!check_cache_size_ok (this, table->cache_size)) { + ret = -1; + goto out; + } + INIT_LIST_HEAD (&table->priority_list); table->max_pri = 1; data = dict_get (xl_options, "priority"); @@ -1659,8 +1786,28 @@ init (xlator_t *this) for (index = 0; index < (table->max_pri); index++) INIT_LIST_HEAD (&table->inode_lru[index]); + this->local_pool = mem_pool_new (ioc_local_t, 64); + if (!this->local_pool) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + goto out; + } + pthread_mutex_init (&table->table_lock, NULL); this->private = table; + + num_pages = (table->cache_size / table->page_size) + + ((table->cache_size % table->page_size) + ? 1 : 0); + + table->mem_pool = mem_pool_new (rbthash_entry_t, num_pages); + if (!table->mem_pool) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to allocate mem_pool"); + goto out; + } + ret = 0; ctx = this->ctx; @@ -1689,8 +1836,7 @@ ioc_page_waitq_dump (ioc_page_t *page, char *prefix) while (trav) { frame = trav->data; - gf_proc_dump_build_key (key, prefix, - "waitq.frame[%d]", i++); + sprintf (key, "waitq.frame[%d]", i++); gf_proc_dump_write (key, "%"PRId64, frame->root->unique); trav = trav->next; @@ -1709,9 +1855,8 @@ __ioc_inode_waitq_dump (ioc_inode_t *ioc_inode, char *prefix) while (trav) { page = trav->data; - gf_proc_dump_build_key (key, prefix, - "cache-validation-waitq.page[%d].offset", - i++); + + sprintf (key, "cache-validation-waitq.page[%d].offset", i++); gf_proc_dump_write (key, "%"PRId64, page->offset); trav = trav->next; @@ -1721,21 +1866,32 @@ __ioc_inode_waitq_dump (ioc_inode_t *ioc_inode, char *prefix) void __ioc_page_dump (ioc_page_t *page, char *prefix) { - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; - ioc_page_lock (page); + int ret = -1; + + if (!page) + return; + /* ioc_page_lock can be used to hold the mutex. But in statedump + * its better to use trylock to avoid deadlocks. + */ + ret = pthread_mutex_trylock (&page->page_lock); + if (ret) + goto out; { - gf_proc_dump_build_key (key, prefix, "offset"); - gf_proc_dump_write (key, "%"PRId64, page->offset); - gf_proc_dump_build_key (key, prefix, "size"); - gf_proc_dump_write (key, "%"PRId64, page->size); - gf_proc_dump_build_key (key, prefix, "dirty"); - gf_proc_dump_write (key, "%s", page->dirty ? "yes" : "no"); - gf_proc_dump_build_key (key, prefix, "ready"); - gf_proc_dump_write (key, "%s", page->ready ? "yes" : "no"); + gf_proc_dump_write ("offset", "%"PRId64, page->offset); + gf_proc_dump_write ("size", "%"PRId64, page->size); + gf_proc_dump_write ("dirty", "%s", page->dirty ? "yes" : "no"); + gf_proc_dump_write ("ready", "%s", page->ready ? "yes" : "no"); ioc_page_waitq_dump (page, prefix); } - ioc_page_unlock (page); + pthread_mutex_unlock (&page->page_lock); + +out: + if (ret && page) + gf_proc_dump_write ("Unable to dump the page information", + "(Lock acquisition failed) %p", page); + + return; } void @@ -1745,7 +1901,6 @@ __ioc_cache_dump (ioc_inode_t *ioc_inode, char *prefix) ioc_table_t *table = NULL; ioc_page_t *page = NULL; int i = 0; - struct tm *tm = NULL; char key[GF_DUMP_MAX_BUF_LEN] = {0, }; char timestr[256] = {0, }; @@ -1755,13 +1910,15 @@ __ioc_cache_dump (ioc_inode_t *ioc_inode, char *prefix) table = ioc_inode->table; - tm = localtime (&ioc_inode->cache.tv.tv_sec); - strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm); - snprintf (timestr + strlen (timestr), 256 - strlen (timestr), - ".%"GF_PRI_SUSECONDS, ioc_inode->cache.tv.tv_usec); + if (ioc_inode->cache.tv.tv_sec) { + gf_time_fmt (timestr, sizeof timestr, + ioc_inode->cache.tv.tv_sec, gf_timefmt_FT); + snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr), + ".%"GF_PRI_SUSECONDS, ioc_inode->cache.tv.tv_usec); - gf_proc_dump_build_key (key, prefix, "last-cache-validation-time"); - gf_proc_dump_write (key, "%s", timestr); + gf_proc_dump_write ("last-cache-validation-time", "%s", + timestr); + } for (offset = 0; offset < ioc_inode->ia_size; offset += table->page_size) { @@ -1770,9 +1927,7 @@ __ioc_cache_dump (ioc_inode_t *ioc_inode, char *prefix) continue; } - gf_proc_dump_build_key (key, prefix, - "inode.cache.page[%d]", i++); - + sprintf (key, "inode.cache.page[%d]", i++); __ioc_page_dump (page, key); } out: @@ -1780,72 +1935,113 @@ out: } -void -ioc_inode_dump (ioc_inode_t *ioc_inode, char *prefix) +int +ioc_inode_dump (xlator_t *this, inode_t *inode) { - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; - char uuidbuf[256] = {0, }; - if ((ioc_inode == NULL) || (prefix == NULL)) { + char *path = NULL; + int ret = -1; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; + uint64_t tmp_ioc_inode = 0; + ioc_inode_t *ioc_inode = NULL; + gf_boolean_t section_added = _gf_false; + char uuid_str[64] = {0,}; + + if (this == NULL || inode == NULL) + goto out; + + gf_proc_dump_build_key (key_prefix, "io-cache", "inode"); + + inode_ctx_get (inode, this, &tmp_ioc_inode); + ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; + if (ioc_inode == NULL) + goto out; + + /* Similar to ioc_page_dump function its better to use + * pthread_mutex_trylock and not to use gf_log in statedump + * to avoid deadlocks. + */ + ret = pthread_mutex_trylock (&ioc_inode->inode_lock); + if (ret) goto out; - } - ioc_inode_lock (ioc_inode); { - gf_proc_dump_build_key (key, prefix, "\ninode.gfid"); - uuid_unparse (ioc_inode->inode->gfid, uuidbuf); - gf_proc_dump_write (key, "%s", uuidbuf); - gf_proc_dump_build_key (key, prefix, "inode.ino"); - gf_proc_dump_write (key, "%ld", ioc_inode->inode->ino); - gf_proc_dump_build_key (key, prefix, "inode.weight"); - gf_proc_dump_write (key, "%d", ioc_inode->weight); - __ioc_cache_dump (ioc_inode, prefix); - __ioc_inode_waitq_dump (ioc_inode, prefix); + if (uuid_is_null (ioc_inode->inode->gfid)) + goto unlock; + + gf_proc_dump_add_section (key_prefix); + section_added = _gf_true; + + __inode_path (ioc_inode->inode, NULL, &path); + + gf_proc_dump_write ("inode.weight", "%d", ioc_inode->weight); + + if (path) { + gf_proc_dump_write ("path", "%s", path); + GF_FREE (path); + } + + gf_proc_dump_write ("uuid", "%s", uuid_utoa_r + (ioc_inode->inode->gfid, uuid_str)); + __ioc_cache_dump (ioc_inode, key_prefix); + __ioc_inode_waitq_dump (ioc_inode, key_prefix); } - ioc_inode_unlock (ioc_inode); +unlock: + pthread_mutex_unlock (&ioc_inode->inode_lock); + out: - return; + if (ret && ioc_inode) { + if (section_added == _gf_false) + gf_proc_dump_add_section (key_prefix); + gf_proc_dump_write ("Unable to print the status of ioc_inode", + "(Lock acquisition failed) %s", + uuid_utoa (inode->gfid)); + } + return ret; } int ioc_priv_dump (xlator_t *this) { ioc_table_t *priv = NULL; - ioc_inode_t *ioc_inode = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; + int ret = -1; + gf_boolean_t add_section = _gf_false; if (!this || !this->private) goto out; priv = this->private; - gf_proc_dump_build_key (key_prefix, "xlator.performance.io-cache", - "priv"); + + gf_proc_dump_build_key (key_prefix, "io-cache", "priv"); gf_proc_dump_add_section (key_prefix); + add_section = _gf_true; - ioc_table_lock (priv); + ret = pthread_mutex_trylock (&priv->table_lock); + if (ret) + goto out; { - gf_proc_dump_build_key (key, key_prefix, "page_size"); - gf_proc_dump_write (key, "%ld", priv->page_size); - gf_proc_dump_build_key (key, key_prefix, "cache_size"); - gf_proc_dump_write (key, "%ld", priv->cache_size); - gf_proc_dump_build_key (key, key_prefix, "cache_used"); - gf_proc_dump_write (key, "%ld", priv->cache_used); - gf_proc_dump_build_key (key, key_prefix, "inode_count"); - gf_proc_dump_write (key, "%u", priv->inode_count); - gf_proc_dump_build_key (key, key_prefix, "cache_timeout"); - gf_proc_dump_write (key, "%u", priv->cache_timeout); - gf_proc_dump_build_key (key, key_prefix, "min-file-size"); - gf_proc_dump_write (key, "%u", priv->min_file_size); - gf_proc_dump_build_key (key, key_prefix, "max-file-size"); - gf_proc_dump_write (key, "%u", priv->max_file_size); - - list_for_each_entry (ioc_inode, &priv->inodes, inode_list) { - ioc_inode_dump (ioc_inode, key_prefix); + gf_proc_dump_write ("page_size", "%ld", priv->page_size); + gf_proc_dump_write ("cache_size", "%ld", priv->cache_size); + gf_proc_dump_write ("cache_used", "%ld", priv->cache_used); + gf_proc_dump_write ("inode_count", "%u", priv->inode_count); + gf_proc_dump_write ("cache_timeout", "%u", priv->cache_timeout); + gf_proc_dump_write ("min-file-size", "%u", priv->min_file_size); + gf_proc_dump_write ("max-file-size", "%u", priv->max_file_size); + } + pthread_mutex_unlock (&priv->table_lock); +out: + if (ret && priv) { + if (!add_section) { + gf_proc_dump_build_key (key_prefix, "xlator." + "performance.io-cache", "priv"); + gf_proc_dump_add_section (key_prefix); } + gf_proc_dump_write ("Unable to dump the state of private " + "structure of io-cache xlator", "(Lock " + "acquisition failed) %s", this->name); } - ioc_table_unlock (priv); -out: + return 0; } @@ -1858,18 +2054,33 @@ out: void fini (xlator_t *this) { - ioc_table_t *table = NULL; + ioc_table_t *table = NULL; + struct ioc_priority *curr = NULL, *tmp = NULL; + int i = 0; table = this->private; if (table == NULL) return; + this->private = NULL; + if (table->mem_pool != NULL) { mem_pool_destroy (table->mem_pool); table->mem_pool = NULL; } + list_for_each_entry_safe (curr, tmp, &table->priority_list, list) { + list_del_init (&curr->list); + GF_FREE (curr->pattern); + GF_FREE (curr); + } + + for (i = 0; i < table->max_pri; i++) { + GF_ASSERT (list_empty (&table->inode_lru[i])); + } + + GF_ASSERT (list_empty (&table->inodes)); pthread_mutex_destroy (&table->table_lock); GF_FREE (table); @@ -1887,22 +2098,28 @@ struct xlator_fops fops = { .lookup = ioc_lookup, .lk = ioc_lk, .setattr = ioc_setattr, - .mknod = ioc_mknod + .mknod = ioc_mknod, + + .readdirp = ioc_readdirp, + .discard = ioc_discard, + .zerofill = ioc_zerofill, }; struct xlator_dumpops dumpops = { .priv = ioc_priv_dump, + .inodectx = ioc_inode_dump, }; struct xlator_cbks cbks = { .forget = ioc_forget, - .release = ioc_release + .release = ioc_release, + .invalidate = ioc_invalidate, }; struct volume_options options[] = { { .key = {"priority"}, - .type = GF_OPTION_TYPE_ANY, + .type = GF_OPTION_TYPE_PRIORITY_LIST, .default_value = "", .description = "Assigns priority to filenames with specific " "patterns so that when a page needs to be ejected " @@ -1921,7 +2138,7 @@ struct volume_options options[] = { { .key = {"cache-size"}, .type = GF_OPTION_TYPE_SIZET, .min = 4 * GF_UNIT_MB, - .max = 6 * GF_UNIT_GB, + .max = 32 * GF_UNIT_GB, .default_value = "32MB", .description = "Size of the read cache." }, diff --git a/xlators/performance/io-cache/src/io-cache.h b/xlators/performance/io-cache/src/io-cache.h index eec24f143..46d758a66 100644 --- a/xlators/performance/io-cache/src/io-cache.h +++ b/xlators/performance/io-cache/src/io-cache.h @@ -1,20 +1,11 @@ /* - Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef __IO_CACHE_H @@ -84,7 +75,6 @@ struct ioc_fill { struct ioc_local { mode_t mode; int32_t flags; - int32_t wbflags; loc_t file_loc; off_t offset; size_t size; @@ -126,6 +116,8 @@ struct ioc_page { struct ioc_waitq *waitq; struct iobref *iobref; pthread_mutex_t page_lock; + int32_t op_errno; + char stale; }; struct ioc_cache { @@ -196,7 +188,7 @@ int32_t ioc_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, int32_t count, struct iatt *stbuf, - struct iobref *iobref); + struct iobref *iobref, dict_t *xdata); ioc_page_t * __ioc_page_get (ioc_inode_t *ioc_inode, off_t offset); @@ -212,7 +204,7 @@ __ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset, size_t size); ioc_waitq_t * -__ioc_page_wakeup (ioc_page_t *page); +__ioc_page_wakeup (ioc_page_t *page, int32_t op_errno); void ioc_page_flush (ioc_page_t *page); @@ -228,7 +220,7 @@ ioc_waitq_return (ioc_waitq_t *waitq); int32_t ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset, - size_t size); + size_t size, int32_t op_errno); #define ioc_inode_lock(ioc_inode) \ do { \ @@ -337,6 +329,4 @@ ioc_prune (ioc_table_t *table); int32_t ioc_need_prune (ioc_table_t *table); -inline uint32_t -ioc_hashfn (void *data, int len); #endif /* __IO_CACHE_H */ diff --git a/xlators/performance/io-cache/src/ioc-inode.c b/xlators/performance/io-cache/src/ioc-inode.c index 4c48c41d1..86a54bb14 100644 --- a/xlators/performance/io-cache/src/ioc-inode.c +++ b/xlators/performance/io-cache/src/ioc-inode.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef _CONFIG_H @@ -123,7 +114,8 @@ ioc_inode_wakeup (call_frame_t *frame, ioc_inode_t *ioc_inode, ioc_inode_lock (ioc_inode); { page_waitq = - __ioc_page_wakeup (waiter_page); + __ioc_page_wakeup (waiter_page, + waiter_page->op_errno); } ioc_inode_unlock (ioc_inode); if (page_waitq) diff --git a/xlators/performance/io-cache/src/ioc-mem-types.h b/xlators/performance/io-cache/src/ioc-mem-types.h index 421485e26..9b68f9fce 100644 --- a/xlators/performance/io-cache/src/ioc-mem-types.h +++ b/xlators/performance/io-cache/src/ioc-mem-types.h @@ -1,20 +1,11 @@ /* - Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef __IOC_MT_H__ @@ -26,7 +17,6 @@ enum gf_ioc_mem_types_ { gf_ioc_mt_iovec = gf_common_mt_end + 1, gf_ioc_mt_ioc_table_t, gf_ioc_mt_char, - gf_ioc_mt_ioc_local_t, gf_ioc_mt_ioc_waitq_t, gf_ioc_mt_ioc_priority, gf_ioc_mt_list_head, diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c index 9afaf3062..b2e20ba65 100644 --- a/xlators/performance/io-cache/src/page.c +++ b/xlators/performance/io-cache/src/page.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef _CONFIG_H @@ -111,6 +102,7 @@ __ioc_page_destroy (ioc_page_t *page) if (page->waitq) { /* frames waiting on this page, do not destroy this page */ page_size = -1; + page->stale = 1; } else { rbthash_remove (page->inode->cache.page_table, &page->offset, sizeof (page->offset)); @@ -416,7 +408,8 @@ ioc_waitq_return (ioc_waitq_t *waitq) int ioc_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct iatt *stbuf, struct iobref *iobref) + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) { ioc_local_t *local = NULL; off_t offset = 0; @@ -517,6 +510,7 @@ ioc_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this, * byte replies */ page_size = iov_length(vector, count); page->size = page_size; + page->op_errno = op_errno; iobref_page_size = iobref_size (page->iobref); @@ -524,7 +518,8 @@ ioc_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this, /* wake up all the frames waiting on * this page, including * the frame which triggered fault */ - waitq = __ioc_page_wakeup (page); + waitq = __ioc_page_wakeup (page, + op_errno); } /* if(page->waitq) */ } /* if(!page)...else */ } /* if(op_ret < 0)...else */ @@ -602,8 +597,7 @@ ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd, goto err; } - fault_local = GF_CALLOC (1, sizeof (ioc_local_t), - gf_ioc_mt_ioc_local_t); + fault_local = mem_get0 (THIS->local_pool); if (fault_local == NULL) { op_ret = -1; op_errno = ENOMEM; @@ -630,7 +624,7 @@ ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd, STACK_WIND (fault_frame, ioc_fault_cbk, FIRST_CHILD(fault_frame->this), FIRST_CHILD(fault_frame->this)->fops->readv, fd, - table->page_size, offset); + table->page_size, offset, 0, NULL); return; err: @@ -651,7 +645,7 @@ err: int32_t __ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset, - size_t size) + size_t size, int32_t op_errno) { ioc_local_t *local = NULL; ioc_fill_t *fill = NULL; @@ -686,7 +680,13 @@ __ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset, /* immediately move this page to the end of the page_lru list */ list_move_tail (&page->page_lru, &ioc_inode->cache.page_lru); /* fill local->pending_size bytes from local->pending_offset */ - if (local->op_ret != -1 && page->size) { + if (local->op_ret != -1) { + local->op_errno = op_errno; + + if (page->size == 0) { + goto done; + } + if (offset > page->offset) /* offset is offset in file, convert it to offset in * page */ @@ -779,6 +779,7 @@ __ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset, local->op_ret += copy_size; } +done: ret = 0; out: return ret; @@ -803,7 +804,7 @@ ioc_frame_unwind (call_frame_t *frame) int32_t copied = 0; struct iobref *iobref = NULL; struct iatt stbuf = {0,}; - int32_t op_ret = 0; + int32_t op_ret = 0, op_errno = 0; GF_ASSERT (frame); @@ -812,16 +813,21 @@ ioc_frame_unwind (call_frame_t *frame) gf_log (frame->this->name, GF_LOG_WARNING, "local is NULL"); op_ret = -1; - local->op_errno = ENOMEM; + op_errno = ENOMEM; + goto unwind; + } + + if (local->op_ret < 0) { + op_ret = local->op_ret; + op_errno = local->op_errno; goto unwind; } // ioc_local_lock (local); - frame->local = NULL; iobref = iobref_new (); if (iobref == NULL) { op_ret = -1; - local->op_errno = ENOMEM; + op_errno = ENOMEM; } if (list_empty (&local->fill_list)) { @@ -838,7 +844,7 @@ ioc_frame_unwind (call_frame_t *frame) vector = GF_CALLOC (count, sizeof (*vector), gf_ioc_mt_iovec); if (vector == NULL) { op_ret = -1; - local->op_errno = ENOMEM; + op_errno = ENOMEM; } list_for_each_entry_safe (fill, next, &local->fill_list, list) { @@ -868,8 +874,9 @@ unwind: // ioc_local_unlock (local); - STACK_UNWIND_STRICT (readv, frame, op_ret, local->op_errno, vector, - count, &stbuf, iobref); + frame->local = NULL; + STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, + count, &stbuf, iobref, NULL); if (iobref != NULL) { iobref_unref (iobref); @@ -881,7 +888,8 @@ unwind: } pthread_mutex_destroy (&local->local_lock); - GF_FREE (local); + if (local) + mem_put (local); return; } @@ -923,7 +931,7 @@ ioc_frame_return (call_frame_t *frame) * to be called only when a frame is waiting on an in-transit page */ ioc_waitq_t * -__ioc_page_wakeup (ioc_page_t *page) +__ioc_page_wakeup (ioc_page_t *page, int32_t op_errno) { ioc_waitq_t *waitq = NULL, *trav = NULL; call_frame_t *frame = NULL; @@ -942,12 +950,16 @@ __ioc_page_wakeup (ioc_page_t *page) for (trav = waitq; trav; trav = trav->next) { frame = trav->data; ret = __ioc_frame_fill (page, frame, trav->pending_offset, - trav->pending_size); + trav->pending_size, op_errno); if (ret == -1) { break; } } + if (page->stale) { + __ioc_page_destroy (page); + } + out: return waitq; } diff --git a/xlators/performance/io-threads/src/Makefile.am b/xlators/performance/io-threads/src/Makefile.am index 72f9a8012..d63042e7c 100644 --- a/xlators/performance/io-threads/src/Makefile.am +++ b/xlators/performance/io-threads/src/Makefile.am @@ -1,14 +1,15 @@ xlator_LTLIBRARIES = io-threads.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance -io_threads_la_LDFLAGS = -module -avoidversion +io_threads_la_LDFLAGS = -module -avoid-version io_threads_la_SOURCES = io-threads.c io_threads_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la noinst_HEADERS = io-threads.h iot-mem-types.h -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ - -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) CLEANFILES = diff --git a/xlators/performance/io-threads/src/io-threads.c b/xlators/performance/io-threads/src/io-threads.c index d6d0ada23..bbcf4ed26 100644 --- a/xlators/performance/io-threads/src/io-threads.c +++ b/xlators/performance/io-threads/src/io-threads.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef _CONFIG_H @@ -39,16 +30,69 @@ int __iot_workers_scale (iot_conf_t *conf); struct volume_options options[]; call_stub_t * -__iot_dequeue (iot_conf_t *conf, int *pri) +__iot_dequeue (iot_conf_t *conf, int *pri, struct timespec *sleep) { call_stub_t *stub = NULL; int i = 0; + struct timeval curtv = {0,}, difftv = {0,}; *pri = -1; + sleep->tv_sec = 0; + sleep->tv_nsec = 0; for (i = 0; i < IOT_PRI_MAX; i++) { if (list_empty (&conf->reqs[i]) || (conf->ac_iot_count[i] >= conf->ac_iot_limit[i])) continue; + + if (i == IOT_PRI_LEAST) { + pthread_mutex_lock(&conf->throttle.lock); + if (!conf->throttle.sample_time.tv_sec) { + /* initialize */ + gettimeofday(&conf->throttle.sample_time, NULL); + } else { + /* + * Maintain a running count of least priority + * operations that are handled over a particular + * time interval. The count is provided via + * state dump and is used as a measure against + * least priority op throttling. + */ + gettimeofday(&curtv, NULL); + timersub(&curtv, &conf->throttle.sample_time, + &difftv); + if (difftv.tv_sec >= IOT_LEAST_THROTTLE_DELAY) { + conf->throttle.cached_rate = + conf->throttle.sample_cnt; + conf->throttle.sample_cnt = 0; + conf->throttle.sample_time = curtv; + } + + /* + * If we're over the configured rate limit, + * provide an absolute time to the caller that + * represents the soonest we're allowed to + * return another least priority request. + */ + if (conf->throttle.rate_limit && + conf->throttle.sample_cnt >= + conf->throttle.rate_limit) { + struct timeval delay; + delay.tv_sec = IOT_LEAST_THROTTLE_DELAY; + delay.tv_usec = 0; + + timeradd(&conf->throttle.sample_time, + &delay, &curtv); + TIMEVAL_TO_TIMESPEC(&curtv, sleep); + + pthread_mutex_unlock( + &conf->throttle.lock); + break; + } + } + conf->throttle.sample_cnt++; + pthread_mutex_unlock(&conf->throttle.lock); + } + stub = list_entry (conf->reqs[i].next, call_stub_t, list); conf->ac_iot_count[i]++; *pri = i; @@ -59,6 +103,7 @@ __iot_dequeue (iot_conf_t *conf, int *pri) return NULL; conf->queue_size--; + conf->queue_sizes[*pri]--; list_del_init (&stub->list); return stub; @@ -74,6 +119,7 @@ __iot_enqueue (iot_conf_t *conf, call_stub_t *stub, int pri) list_add_tail (&stub->list, &conf->reqs[pri]); conf->queue_size++; + conf->queue_sizes[pri]++; return; } @@ -90,6 +136,7 @@ iot_worker (void *data) int pri = -1; char timeout = 0; char bye = 0; + struct timespec sleep = {0,}; conf = data; this = conf->this; @@ -130,7 +177,13 @@ iot_worker (void *data) } } - stub = __iot_dequeue (conf, &pri); + stub = __iot_dequeue (conf, &pri, &sleep); + if (!stub && (sleep.tv_sec || sleep.tv_nsec)) { + pthread_cond_timedwait(&conf->cond, + &conf->mutex, &sleep); + pthread_mutex_unlock(&conf->mutex); + continue; + } } pthread_mutex_unlock (&conf->mutex); @@ -199,8 +252,9 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub) { int ret = -1; iot_pri_t pri = IOT_PRI_MAX - 1; + iot_conf_t *conf = this->private; - if (frame->root->pid < 0) { + if ((frame->root->pid < GF_CLIENT_PID_MAX) && conf->least_priority) { pri = IOT_PRI_LEAST; goto out; } @@ -240,6 +294,7 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub) case GF_FOP_FGETXATTR: case GF_FOP_FSETXATTR: case GF_FOP_REMOVEXATTR: + case GF_FOP_FREMOVEXATTR: pri = IOT_PRI_NORMAL; break; @@ -251,11 +306,11 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub) case GF_FOP_FSYNCDIR: case GF_FOP_XATTROP: case GF_FOP_FXATTROP: - pri = IOT_PRI_LO; - break; - case GF_FOP_RCHECKSUM: - pri = IOT_PRI_LEAST; + case GF_FOP_FALLOCATE: + case GF_FOP_DISCARD: + case GF_FOP_ZEROFILL: + pri = IOT_PRI_LO; break; case GF_FOP_NULL: @@ -269,19 +324,19 @@ iot_schedule (call_frame_t *frame, xlator_t *this, call_stub_t *stub) break; } out: - ret = do_iot_schedule (this->private, stub, pri); gf_log (this->name, GF_LOG_DEBUG, "%s scheduled as %s fop", gf_fop_list[stub->fop], iot_get_pri_meaning (pri)); + ret = do_iot_schedule (this->private, stub, pri); return ret; } int iot_lookup_cbk (call_frame_t *frame, void * cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, - inode_t *inode, struct iatt *buf, dict_t *xattr, + inode_t *inode, struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xattr, + STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xdata, postparent); return 0; } @@ -289,23 +344,23 @@ iot_lookup_cbk (call_frame_t *frame, void * cookie, xlator_t *this, int iot_lookup_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xattr_req) + dict_t *xdata) { STACK_WIND (frame, iot_lookup_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->lookup, - loc, xattr_req); + loc, xdata); return 0; } int -iot_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) +iot_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_lookup_stub (frame, iot_lookup_wrapper, loc, xattr_req); + stub = fop_lookup_stub (frame, iot_lookup_wrapper, loc, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create lookup stub (out of memory)"); @@ -331,33 +386,35 @@ out: int iot_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop) + struct iatt *preop, struct iatt *postop, dict_t *xdata) { - STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop); + STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, + xdata); return 0; } int iot_setattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid) + struct iatt *stbuf, int32_t valid, dict_t *xdata) { STACK_WIND (frame, iot_setattr_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->setattr, - loc, stbuf, valid); + loc, stbuf, valid, xdata); return 0; } int iot_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid) + struct iatt *stbuf, int32_t valid, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_setattr_stub (frame, iot_setattr_wrapper, loc, stbuf, valid); + stub = fop_setattr_stub (frame, iot_setattr_wrapper, loc, stbuf, valid, + xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "Cannot create setattr stub" "(Out of memory)"); @@ -373,7 +430,7 @@ out: call_stub_destroy (stub); } - STACK_UNWIND_STRICT (setattr, frame, -1, -ret, NULL, NULL); + STACK_UNWIND_STRICT (setattr, frame, -1, -ret, NULL, NULL, NULL); } return 0; @@ -383,32 +440,34 @@ out: int iot_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop) + struct iatt *preop, struct iatt *postop, dict_t *xdata) { - STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, preop, postop); + STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, preop, postop, + xdata); return 0; } int iot_fsetattr_wrapper (call_frame_t *frame, xlator_t *this, - fd_t *fd, struct iatt *stbuf, int32_t valid) + fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata) { STACK_WIND (frame, iot_fsetattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid); + FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, + xdata); return 0; } int iot_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid) + struct iatt *stbuf, int32_t valid, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; stub = fop_fsetattr_stub (frame, iot_fsetattr_wrapper, fd, stbuf, - valid); + valid, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create fsetattr stub" "(out of memory)"); @@ -420,7 +479,8 @@ iot_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, out: if (ret < 0) { - STACK_UNWIND_STRICT (fsetattr, frame, -1, -ret, NULL, NULL); + STACK_UNWIND_STRICT (fsetattr, frame, -1, -ret, NULL, NULL, + NULL); if (stub != NULL) { call_stub_destroy (stub); } @@ -431,30 +491,31 @@ out: int iot_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - STACK_UNWIND_STRICT (access, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata); return 0; } int iot_access_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, - int32_t mask) + int32_t mask, dict_t *xdata) { STACK_WIND (frame, iot_access_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->access, loc, mask); + FIRST_CHILD (this)->fops->access, loc, mask, xdata); return 0; } int -iot_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask) +iot_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, + dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_access_stub (frame, iot_access_wrapper, loc, mask); + stub = fop_access_stub (frame, iot_access_wrapper, loc, mask, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create access stub" "(out of memory)"); @@ -465,7 +526,7 @@ iot_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask) ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (access, frame, -1, -ret); + STACK_UNWIND_STRICT (access, frame, -1, -ret, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -478,32 +539,33 @@ out: int iot_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, const char *path, - struct iatt *stbuf) + struct iatt *stbuf, dict_t *xdata) { - STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, stbuf); + STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, stbuf, + xdata); return 0; } int iot_readlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, - size_t size) + size_t size, dict_t *xdata) { STACK_WIND (frame, iot_readlink_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->readlink, - loc, size); + loc, size, xdata); return 0; } int -iot_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size) +iot_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_readlink_stub (frame, iot_readlink_wrapper, loc, size); + stub = fop_readlink_stub (frame, iot_readlink_wrapper, loc, size, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create readlink stub" "(out of memory)"); @@ -515,7 +577,7 @@ iot_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size) out: if (ret < 0) { - STACK_UNWIND_STRICT (readlink, frame, -1, -ret, NULL, NULL); + STACK_UNWIND_STRICT (readlink, frame, -1, -ret, NULL, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -530,33 +592,34 @@ int iot_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf, - preparent, postparent); + preparent, postparent, xdata); return 0; } int iot_mknod_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, dict_t *params) + dev_t rdev, mode_t umask, dict_t *xdata) { STACK_WIND (frame, iot_mknod_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->mknod, loc, mode, rdev, params); + FIRST_CHILD (this)->fops->mknod, loc, mode, rdev, umask, + xdata); return 0; } int iot_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, dict_t *params) + dev_t rdev, mode_t umask, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; stub = fop_mknod_stub (frame, iot_mknod_wrapper, loc, mode, rdev, - params); + umask, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create mknod stub" "(out of memory)"); @@ -569,7 +632,7 @@ iot_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, out: if (ret < 0) { STACK_UNWIND_STRICT (mknod, frame, -1, -ret, NULL, NULL, NULL, - NULL); + NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -583,32 +646,33 @@ int iot_mkdir_cbk (call_frame_t *frame, void * cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf, - preparent, postparent); + preparent, postparent, xdata); return 0; } int iot_mkdir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dict_t *params) + mode_t umask, dict_t *xdata) { STACK_WIND (frame, iot_mkdir_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->mkdir, loc, mode, params); + FIRST_CHILD (this)->fops->mkdir, loc, mode, umask, xdata); return 0; } int iot_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dict_t *params) + mode_t umask, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_mkdir_stub (frame, iot_mkdir_wrapper, loc, mode, params); + stub = fop_mkdir_stub (frame, iot_mkdir_wrapper, loc, mode, umask, + xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create mkdir stub" "(out of memory)"); @@ -621,7 +685,7 @@ iot_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, out: if (ret < 0) { STACK_UNWIND_STRICT (mkdir, frame, -1, -ret, NULL, NULL, NULL, - NULL); + NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -634,30 +698,30 @@ out: int iot_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent, - postparent); + postparent, xdata); return 0; } int -iot_rmdir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags) +iot_rmdir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata) { STACK_WIND (frame, iot_rmdir_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->rmdir, loc, flags); + FIRST_CHILD (this)->fops->rmdir, loc, flags, xdata); return 0; } int -iot_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags) +iot_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_rmdir_stub (frame, iot_rmdir_wrapper, loc, flags); + stub = fop_rmdir_stub (frame, iot_rmdir_wrapper, loc, flags, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create rmdir stub" "(out of memory)"); @@ -668,7 +732,7 @@ iot_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags) ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (rmdir, frame, -1, -ret, NULL, NULL); + STACK_UNWIND_STRICT (rmdir, frame, -1, -ret, NULL, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -682,33 +746,34 @@ int iot_symlink_cbk (call_frame_t *frame, void * cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf, - preparent, postparent); + preparent, postparent, xdata); return 0; } int iot_symlink_wrapper (call_frame_t *frame, xlator_t *this, const char *linkname, - loc_t *loc, dict_t *params) + loc_t *loc, mode_t umask, dict_t *xdata) { STACK_WIND (frame, iot_symlink_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->symlink, linkname, loc, params); + FIRST_CHILD (this)->fops->symlink, linkname, loc, umask, + xdata); return 0; } int iot_symlink (call_frame_t *frame, xlator_t *this, const char *linkname, - loc_t *loc, dict_t *params) + loc_t *loc, mode_t umask, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; stub = fop_symlink_stub (frame, iot_symlink_wrapper, linkname, loc, - params); + umask, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create symlink stub" "(out of memory)"); @@ -721,7 +786,7 @@ iot_symlink (call_frame_t *frame, xlator_t *this, const char *linkname, out: if (ret < 0) { STACK_UNWIND_STRICT (symlink, frame, -1, -ret, NULL, NULL, NULL, - NULL); + NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); } @@ -735,31 +800,33 @@ int iot_rename_cbk (call_frame_t *frame, void * cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *buf, struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent) + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, preoldparent, - postoldparent, prenewparent, postnewparent); + postoldparent, prenewparent, postnewparent, xdata); return 0; } int iot_rename_wrapper (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc) + loc_t *newloc, dict_t *xdata) { STACK_WIND (frame, iot_rename_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->rename, oldloc, newloc); + FIRST_CHILD (this)->fops->rename, oldloc, newloc, xdata); return 0; } int -iot_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) +iot_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_rename_stub (frame, iot_rename_wrapper, oldloc, newloc); + stub = fop_rename_stub (frame, iot_rename_wrapper, oldloc, newloc, xdata); if (!stub) { gf_log (this->name, GF_LOG_DEBUG, "cannot create rename stub" "(out of memory)"); @@ -772,7 +839,7 @@ iot_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) out: if (ret < 0) { STACK_UNWIND_STRICT (rename, frame, -1, -ret, NULL, NULL, NULL, - NULL, NULL); + NULL, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); } @@ -784,31 +851,33 @@ out: int iot_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, fd_t *fd) + int32_t op_errno, fd_t *fd, dict_t *xdata) { - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd); + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); return 0; } int iot_open_wrapper (call_frame_t * frame, xlator_t * this, loc_t *loc, - int32_t flags, fd_t * fd, int32_t wbflags) + int32_t flags, fd_t * fd, dict_t *xdata) { STACK_WIND (frame, iot_open_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->open, loc, flags, fd, wbflags); + FIRST_CHILD (this)->fops->open, loc, flags, fd, + xdata); return 0; } int iot_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, int32_t wbflags) + fd_t *fd, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_open_stub (frame, iot_open_wrapper, loc, flags, fd, wbflags); + stub = fop_open_stub (frame, iot_open_wrapper, loc, flags, fd, + xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create open call stub" @@ -821,7 +890,7 @@ iot_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, out: if (ret < 0) { - STACK_UNWIND_STRICT (open, frame, -1, -ret, NULL); + STACK_UNWIND_STRICT (open, frame, -1, -ret, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -836,35 +905,36 @@ int iot_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, stbuf, - preparent, postparent); + preparent, postparent, xdata); return 0; } int iot_create_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, - int32_t flags, mode_t mode, fd_t *fd, dict_t *params) + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, + dict_t *xdata) { STACK_WIND (frame, iot_create_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, - loc, flags, mode, fd, params); + loc, flags, mode, umask, fd, xdata); return 0; } int iot_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, fd_t *fd, dict_t *params) + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; stub = fop_create_stub (frame, iot_create_wrapper, loc, flags, mode, - fd, params); + umask, fd, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create \"create\" call stub" @@ -878,7 +948,7 @@ iot_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, out: if (ret < 0) { STACK_UNWIND_STRICT (create, frame, -1, -ret, NULL, NULL, NULL, - NULL, NULL); + NULL, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -892,10 +962,11 @@ out: int iot_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct iatt *stbuf, struct iobref *iobref) + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) { STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, - stbuf, iobref); + stbuf, iobref, xdata); return 0; } @@ -903,24 +974,25 @@ iot_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int iot_readv_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset) + off_t offset, uint32_t flags, dict_t *xdata) { STACK_WIND (frame, iot_readv_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, - fd, size, offset); + fd, size, offset, flags, xdata); return 0; } int iot_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset) + off_t offset, uint32_t flags, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_readv_stub (frame, iot_readv_wrapper, fd, size, offset); + stub = fop_readv_stub (frame, iot_readv_wrapper, fd, size, offset, + flags, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create readv call stub" @@ -934,7 +1006,7 @@ iot_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, out: if (ret < 0) { STACK_UNWIND_STRICT (readv, frame, -1, -ret, NULL, -1, NULL, - NULL); + NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); } @@ -945,31 +1017,31 @@ out: int iot_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata); return 0; } int -iot_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd) +iot_flush_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { STACK_WIND (frame, iot_flush_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->flush, - fd); + fd, xdata); return 0; } int -iot_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) +iot_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_flush_stub (frame, iot_flush_wrapper, fd); + stub = fop_flush_stub (frame, iot_flush_wrapper, fd, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create flush_cbk call stub" @@ -981,7 +1053,7 @@ iot_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (flush, frame, -1, -ret); + STACK_UNWIND_STRICT (flush, frame, -1, -ret, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -994,32 +1066,34 @@ out: int iot_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { - STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf); + STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, + xdata); return 0; } int iot_fsync_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, - int32_t datasync) + int32_t datasync, dict_t *xdata) { STACK_WIND (frame, iot_fsync_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->fsync, - fd, datasync); + fd, datasync, xdata); return 0; } int -iot_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync) +iot_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_fsync_stub (frame, iot_fsync_wrapper, fd, datasync); + stub = fop_fsync_stub (frame, iot_fsync_wrapper, fd, datasync, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create fsync_cbk call stub" @@ -1032,7 +1106,7 @@ iot_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync) out: if (ret < 0) { - STACK_UNWIND_STRICT (fsync, frame, -1, -ret, NULL, NULL); + STACK_UNWIND_STRICT (fsync, frame, -1, -ret, NULL, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1045,9 +1119,10 @@ out: int iot_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf); + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); return 0; } @@ -1055,12 +1130,13 @@ iot_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int iot_writev_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, - off_t offset, struct iobref *iobref) + off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) { STACK_WIND (frame, iot_writev_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, - fd, vector, count, offset, iobref); + fd, vector, count, offset, flags, iobref, xdata); return 0; } @@ -1068,13 +1144,13 @@ iot_writev_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, int iot_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, - struct iobref *iobref) + uint32_t flags, struct iobref *iobref, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_writev_stub (frame, iot_writev_wrapper, - fd, vector, count, offset, iobref); + stub = fop_writev_stub (frame, iot_writev_wrapper, fd, vector, + count, offset, flags, iobref, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, @@ -1087,7 +1163,7 @@ iot_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (writev, frame, -1, -ret, NULL, NULL); + STACK_UNWIND_STRICT (writev, frame, -1, -ret, NULL, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1100,33 +1176,34 @@ out: int32_t iot_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct gf_flock *flock) + int32_t op_ret, int32_t op_errno, struct gf_flock *flock, + dict_t *xdata) { - STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock); + STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock, xdata); return 0; } int iot_lk_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, - int32_t cmd, struct gf_flock *flock) + int32_t cmd, struct gf_flock *flock, dict_t *xdata) { STACK_WIND (frame, iot_lk_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk, - fd, cmd, flock); + fd, cmd, flock, xdata); return 0; } int iot_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, - struct gf_flock *flock) + struct gf_flock *flock, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_lk_stub (frame, iot_lk_wrapper, fd, cmd, flock); + stub = fop_lk_stub (frame, iot_lk_wrapper, fd, cmd, flock, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, @@ -1139,7 +1216,7 @@ iot_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (lk, frame, -1, -ret, NULL); + STACK_UNWIND_STRICT (lk, frame, -1, -ret, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1151,31 +1228,31 @@ out: int iot_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) + int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) { - STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf); + STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata); return 0; } int -iot_stat_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc) +iot_stat_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { STACK_WIND (frame, iot_stat_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat, - loc); + loc, xdata); return 0; } int -iot_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) +iot_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_stat_stub (frame, iot_stat_wrapper, loc); + stub = fop_stat_stub (frame, iot_stat_wrapper, loc, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create fop_stat call stub" @@ -1188,7 +1265,7 @@ iot_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) out: if (ret < 0) { - STACK_UNWIND_STRICT (stat, frame, -1, -ret, NULL); + STACK_UNWIND_STRICT (stat, frame, -1, -ret, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1200,31 +1277,31 @@ out: int iot_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) + int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) { - STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf); + STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata); return 0; } int -iot_fstat_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd) +iot_fstat_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { STACK_WIND (frame, iot_fstat_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat, - fd); + fd, xdata); return 0; } int -iot_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd) +iot_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_fstat_stub (frame, iot_fstat_wrapper, fd); + stub = fop_fstat_stub (frame, iot_fstat_wrapper, fd, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create fop_fstat call stub" @@ -1236,7 +1313,7 @@ iot_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd) ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (fstat, frame, -1, -ret, NULL); + STACK_UNWIND_STRICT (fstat, frame, -1, -ret, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1249,34 +1326,35 @@ out: int iot_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, - postbuf); + postbuf, xdata); return 0; } int iot_truncate_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, - off_t offset) + off_t offset, dict_t *xdata) { STACK_WIND (frame, iot_truncate_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate, - loc, offset); + loc, offset, xdata); return 0; } int -iot_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) +iot_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { call_stub_t *stub; int ret = -1; - stub = fop_truncate_stub (frame, iot_truncate_wrapper, loc, offset); - + stub = fop_truncate_stub (frame, iot_truncate_wrapper, loc, offset, + xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create fop_stat call stub" @@ -1289,7 +1367,8 @@ iot_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) out: if (ret < 0) { - STACK_UNWIND_STRICT (truncate, frame, -1, -ret, NULL, NULL); + STACK_UNWIND_STRICT (truncate, frame, -1, -ret, NULL, NULL, + NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1303,33 +1382,35 @@ out: int iot_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, - postbuf); + postbuf, xdata); return 0; } int iot_ftruncate_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset) + off_t offset, dict_t *xdata) { STACK_WIND (frame, iot_ftruncate_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate, - fd, offset); + fd, offset, xdata); return 0; } int -iot_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) +iot_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_ftruncate_stub (frame, iot_ftruncate_wrapper, fd, offset); + stub = fop_ftruncate_stub (frame, iot_ftruncate_wrapper, fd, offset, + xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create fop_ftruncate call stub" @@ -1341,7 +1422,7 @@ iot_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (ftruncate, frame, -1, -ret, NULL, NULL); + STACK_UNWIND_STRICT (ftruncate, frame, -1, -ret, NULL, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1355,32 +1436,34 @@ out: int iot_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent, - postparent); + postparent, xdata); return 0; } int -iot_unlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc) +iot_unlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, + int32_t xflag, dict_t *xdata) { STACK_WIND (frame, iot_unlink_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, - loc); + loc, xflag, xdata); return 0; } int -iot_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) +iot_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag, + dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_unlink_stub (frame, iot_unlink_wrapper, loc); + stub = fop_unlink_stub (frame, iot_unlink_wrapper, loc, xflag, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create fop_unlink call stub" @@ -1393,7 +1476,7 @@ iot_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) out: if (ret < 0) { - STACK_UNWIND_STRICT (unlink, frame, -1, -ret, NULL, NULL); + STACK_UNWIND_STRICT (unlink, frame, -1, -ret, NULL, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1407,31 +1490,34 @@ out: int iot_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, struct iatt *postparent) + struct iatt *buf, struct iatt *preparent, struct iatt *postparent, + dict_t *xdata) { STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf, - preparent, postparent); + preparent, postparent, xdata); return 0; } int -iot_link_wrapper (call_frame_t *frame, xlator_t *this, loc_t *old, loc_t *new) +iot_link_wrapper (call_frame_t *frame, xlator_t *this, loc_t *old, loc_t *new, + dict_t *xdata) { STACK_WIND (frame, iot_link_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->link, old, new); + FIRST_CHILD (this)->fops->link, old, new, xdata); return 0; } int -iot_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) +iot_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, + dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_link_stub (frame, iot_link_wrapper, oldloc, newloc); + stub = fop_link_stub (frame, iot_link_wrapper, oldloc, newloc, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create link stub" "(out of memory)"); @@ -1443,7 +1529,7 @@ iot_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) out: if (ret < 0) { STACK_UNWIND_STRICT (link, frame, -1, -ret, NULL, NULL, NULL, - NULL); + NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1455,29 +1541,31 @@ out: int iot_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd) + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd); + STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata); return 0; } int -iot_opendir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) +iot_opendir_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) { STACK_WIND (frame, iot_opendir_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->opendir, loc, fd); + FIRST_CHILD (this)->fops->opendir, loc, fd, xdata); return 0; } int -iot_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) +iot_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_opendir_stub (frame, iot_opendir_wrapper, loc, fd); + stub = fop_opendir_stub (frame, iot_opendir_wrapper, loc, fd, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create opendir stub" "(out of memory)"); @@ -1488,7 +1576,7 @@ iot_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (opendir, frame, -1, -ret, NULL); + STACK_UNWIND_STRICT (opendir, frame, -1, -ret, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1500,30 +1588,32 @@ out: int iot_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata); return 0; } int iot_fsyncdir_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, - int datasync) + int datasync, dict_t *xdata) { STACK_WIND (frame, iot_fsyncdir_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsyncdir, fd, datasync); + FIRST_CHILD (this)->fops->fsyncdir, fd, datasync, xdata); return 0; } int -iot_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync) +iot_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, + dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_fsyncdir_stub (frame, iot_fsyncdir_wrapper, fd, datasync); + stub = fop_fsyncdir_stub (frame, iot_fsyncdir_wrapper, fd, datasync, + xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create fsyncdir stub" "(out of memory)"); @@ -1534,7 +1624,7 @@ iot_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync) ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (fsyncdir, frame, -1, -ret); + STACK_UNWIND_STRICT (fsyncdir, frame, -1, -ret, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1546,29 +1636,31 @@ out: int iot_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct statvfs *buf) + int32_t op_ret, int32_t op_errno, struct statvfs *buf, + dict_t *xdata) { - STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf); + STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata); return 0; } int -iot_statfs_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc) +iot_statfs_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) { STACK_WIND (frame, iot_statfs_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->statfs, loc); + FIRST_CHILD (this)->fops->statfs, loc, xdata); return 0; } int -iot_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc) +iot_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_statfs_stub (frame, iot_statfs_wrapper, loc); + stub = fop_statfs_stub (frame, iot_statfs_wrapper, loc, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create statfs stub" "(out of memory)"); @@ -1579,7 +1671,7 @@ iot_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc) ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (statfs, frame, -1, -ret, NULL); + STACK_UNWIND_STRICT (statfs, frame, -1, -ret, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1591,32 +1683,32 @@ out: int iot_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata); return 0; } int iot_setxattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *dict, int32_t flags) + dict_t *dict, int32_t flags, dict_t *xdata) { STACK_WIND (frame, iot_setxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setxattr, loc, dict, flags); + FIRST_CHILD (this)->fops->setxattr, loc, dict, flags, xdata); return 0; } int iot_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags) + int32_t flags, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; stub = fop_setxattr_stub (frame, iot_setxattr_wrapper, loc, dict, - flags); + flags, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create setxattr stub" "(out of memory)"); @@ -1628,7 +1720,7 @@ iot_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, out: if (ret < 0) { - STACK_UNWIND_STRICT (setxattr, frame, -1, -ret); + STACK_UNWIND_STRICT (setxattr, frame, -1, -ret, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1640,31 +1732,31 @@ out: int iot_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) + int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata) { - STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict); + STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata); return 0; } int iot_getxattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name) + const char *name, dict_t *xdata) { STACK_WIND (frame, iot_getxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->getxattr, loc, name); + FIRST_CHILD (this)->fops->getxattr, loc, name, xdata); return 0; } int iot_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name) + const char *name, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_getxattr_stub (frame, iot_getxattr_wrapper, loc, name); + stub = fop_getxattr_stub (frame, iot_getxattr_wrapper, loc, name, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create getxattr stub" "(out of memory)"); @@ -1676,7 +1768,7 @@ iot_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, out: if (ret < 0) { - STACK_UNWIND_STRICT (getxattr, frame, -1, -ret, NULL); + STACK_UNWIND_STRICT (getxattr, frame, -1, -ret, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1688,31 +1780,32 @@ out: int iot_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) + int32_t op_ret, int32_t op_errno, dict_t *dict, + dict_t *xdata) { - STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict); + STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata); return 0; } int iot_fgetxattr_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name) + const char *name, dict_t *xdata) { STACK_WIND (frame, iot_fgetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fgetxattr, fd, name); + FIRST_CHILD (this)->fops->fgetxattr, fd, name, xdata); return 0; } int iot_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name) + const char *name, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_fgetxattr_stub (frame, iot_fgetxattr_wrapper, fd, name); + stub = fop_fgetxattr_stub (frame, iot_fgetxattr_wrapper, fd, name, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create fgetxattr stub" "(out of memory)"); @@ -1723,7 +1816,7 @@ iot_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (fgetxattr, frame, -1, -ret, NULL); + STACK_UNWIND_STRICT (fgetxattr, frame, -1, -ret, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1735,32 +1828,33 @@ out: int iot_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata); return 0; } int iot_fsetxattr_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, - dict_t *dict, int32_t flags) + dict_t *dict, int32_t flags, dict_t *xdata) { STACK_WIND (frame, iot_fsetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags); + FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags, + xdata); return 0; } int iot_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags) + int32_t flags, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; stub = fop_fsetxattr_stub (frame, iot_fsetxattr_wrapper, fd, dict, - flags); + flags, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create fsetxattr stub" "(out of memory)"); @@ -1771,7 +1865,7 @@ iot_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (fsetxattr, frame, -1, -ret); + STACK_UNWIND_STRICT (fsetxattr, frame, -1, -ret, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1783,32 +1877,32 @@ out: int iot_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata); return 0; } int iot_removexattr_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name) + const char *name, dict_t *xdata) { STACK_WIND (frame, iot_removexattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->removexattr, loc, name); + FIRST_CHILD (this)->fops->removexattr, loc, name, xdata); return 0; } int iot_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name) + const char *name, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; stub = fop_removexattr_stub (frame, iot_removexattr_wrapper, loc, - name); + name, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR,"cannot get removexattr fop" "(out of memory)"); @@ -1819,7 +1913,54 @@ iot_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (removexattr, frame, -1, -ret); + STACK_UNWIND_STRICT (removexattr, frame, -1, -ret, NULL); + + if (stub != NULL) { + call_stub_destroy (stub); + } + } + return 0; +} + +int +iot_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata); + return 0; +} + + +int +iot_fremovexattr_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + STACK_WIND (frame, iot_fremovexattr_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fremovexattr, fd, name, xdata); + return 0; +} + + +int +iot_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + call_stub_t *stub = NULL; + int ret = -1; + + stub = fop_fremovexattr_stub (frame, iot_fremovexattr_wrapper, fd, + name, xdata); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR,"cannot get fremovexattr fop" + "(out of memory)"); + ret = -ENOMEM; + goto out; + } + + ret = iot_schedule (frame, this, stub); +out: + if (ret < 0) { + STACK_UNWIND_STRICT (fremovexattr, frame, -1, -ret, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1831,32 +1972,33 @@ out: int iot_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries) + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) { - STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries); + STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata); return 0; } int iot_readdirp_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t offset) + size_t size, off_t offset, dict_t *xdata) { STACK_WIND (frame, iot_readdirp_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->readdirp, fd, size, offset); + FIRST_CHILD (this)->fops->readdirp, fd, size, offset, xdata); return 0; } int iot_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset) + off_t offset, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; stub = fop_readdirp_stub (frame, iot_readdirp_wrapper, fd, size, - offset); + offset, xdata); if (!stub) { gf_log (this->private, GF_LOG_ERROR,"cannot get readdir stub" "(out of memory)"); @@ -1867,7 +2009,7 @@ iot_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (readdirp, frame, -1, -ret, NULL); + STACK_UNWIND_STRICT (readdirp, frame, -1, -ret, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1879,31 +2021,33 @@ out: int iot_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries) + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) { - STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries); + STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata); return 0; } int iot_readdir_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t offset) + size_t size, off_t offset, dict_t *xdata) { STACK_WIND (frame, iot_readdir_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->readdir, fd, size, offset); + FIRST_CHILD (this)->fops->readdir, fd, size, offset, xdata); return 0; } int iot_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset) + off_t offset, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; - stub = fop_readdir_stub (frame, iot_readdir_wrapper, fd, size, offset); + stub = fop_readdir_stub (frame, iot_readdir_wrapper, fd, size, offset, + xdata); if (!stub) { gf_log (this->private, GF_LOG_ERROR,"cannot get readdir stub" "(out of memory)"); @@ -1914,7 +2058,7 @@ iot_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (readdir, frame, -1, -ret, NULL); + STACK_UNWIND_STRICT (readdir, frame, -1, -ret, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1924,33 +2068,36 @@ out: } int -iot_inodelk_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, int32_t op_errno) +iot_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata); return 0; } int iot_inodelk_wrapper (call_frame_t *frame, xlator_t *this, const char *volume, - loc_t *loc, int32_t cmd, struct gf_flock *lock) + loc_t *loc, int32_t cmd, struct gf_flock *lock, + dict_t *xdata) { STACK_WIND (frame, iot_inodelk_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->inodelk, volume, loc, cmd, lock); + FIRST_CHILD (this)->fops->inodelk, volume, loc, cmd, lock, + xdata); return 0; } int iot_inodelk (call_frame_t *frame, xlator_t *this, - const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock) + const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock, + dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; stub = fop_inodelk_stub (frame, iot_inodelk_wrapper, - volume, loc, cmd, lock); + volume, loc, cmd, lock, xdata); if (!stub) { ret = -ENOMEM; goto out; @@ -1959,7 +2106,7 @@ iot_inodelk (call_frame_t *frame, xlator_t *this, ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (inodelk, frame, -1, -ret); + STACK_UNWIND_STRICT (inodelk, frame, -1, -ret, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -1970,9 +2117,9 @@ out: int iot_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata); return 0; } @@ -1980,23 +2127,25 @@ iot_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int iot_finodelk_wrapper (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, int32_t cmd, - struct gf_flock *lock) + struct gf_flock *lock, dict_t *xdata) { STACK_WIND (frame, iot_finodelk_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->finodelk, volume, fd, cmd, lock); + FIRST_CHILD (this)->fops->finodelk, volume, fd, cmd, lock, + xdata); return 0; } int iot_finodelk (call_frame_t *frame, xlator_t *this, - const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock) + const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock, + dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; stub = fop_finodelk_stub (frame, iot_finodelk_wrapper, - volume, fd, cmd, lock); + volume, fd, cmd, lock, xdata); if (!stub) { gf_log (this->private, GF_LOG_ERROR,"cannot get finodelk stub" "(out of memory)"); @@ -2007,7 +2156,7 @@ iot_finodelk (call_frame_t *frame, xlator_t *this, ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (finodelk, frame, -1, -ret); + STACK_UNWIND_STRICT (finodelk, frame, -1, -ret, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -2018,9 +2167,9 @@ out: int iot_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata); return 0; } @@ -2028,11 +2177,11 @@ iot_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int iot_entrylk_wrapper (call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type) + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { STACK_WIND (frame, iot_entrylk_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->entrylk, - volume, loc, basename, cmd, type); + volume, loc, basename, cmd, type, xdata); return 0; } @@ -2040,13 +2189,13 @@ iot_entrylk_wrapper (call_frame_t *frame, xlator_t *this, int iot_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, const char *basename, - entrylk_cmd cmd, entrylk_type type) + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; stub = fop_entrylk_stub (frame, iot_entrylk_wrapper, - volume, loc, basename, cmd, type); + volume, loc, basename, cmd, type, xdata); if (!stub) { gf_log (this->private, GF_LOG_ERROR,"cannot get entrylk stub" "(out of memory)"); @@ -2057,7 +2206,7 @@ iot_entrylk (call_frame_t *frame, xlator_t *this, ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (entrylk, frame, -1, -ret); + STACK_UNWIND_STRICT (entrylk, frame, -1, -ret, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -2068,9 +2217,9 @@ out: int iot_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata); return 0; } @@ -2078,11 +2227,11 @@ iot_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int iot_fentrylk_wrapper (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type) + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { STACK_WIND (frame, iot_fentrylk_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->fentrylk, - volume, fd, basename, cmd, type); + volume, fd, basename, cmd, type, xdata); return 0; } @@ -2090,13 +2239,13 @@ iot_fentrylk_wrapper (call_frame_t *frame, xlator_t *this, int iot_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, const char *basename, - entrylk_cmd cmd, entrylk_type type) + entrylk_cmd cmd, entrylk_type type, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; stub = fop_fentrylk_stub (frame, iot_fentrylk_wrapper, - volume, fd, basename, cmd, type); + volume, fd, basename, cmd, type, xdata); if (!stub) { gf_log (this->private, GF_LOG_ERROR,"cannot get fentrylk stub" "(out of memory)"); @@ -2107,7 +2256,7 @@ iot_fentrylk (call_frame_t *frame, xlator_t *this, ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (fentrylk, frame, -1, -ret); + STACK_UNWIND_STRICT (fentrylk, frame, -1, -ret, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -2119,32 +2268,32 @@ out: int iot_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xattr) + int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata) { - STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr); + STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, xattr, xdata); return 0; } int iot_xattrop_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, - gf_xattrop_flags_t optype, dict_t *xattr) + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { STACK_WIND (frame, iot_xattrop_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->xattrop, loc, optype, xattr); + FIRST_CHILD (this)->fops->xattrop, loc, optype, xattr, xdata); return 0; } int iot_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, - gf_xattrop_flags_t optype, dict_t *xattr) + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; stub = fop_xattrop_stub (frame, iot_xattrop_wrapper, loc, optype, - xattr); + xattr, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create xattrop stub" "(out of memory)"); @@ -2155,7 +2304,7 @@ iot_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (xattrop, frame, -1, -ret, NULL); + STACK_UNWIND_STRICT (xattrop, frame, -1, -ret, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); @@ -2167,31 +2316,31 @@ out: int iot_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xattr) + int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata) { - STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, xattr); + STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, xattr, xdata); return 0; } int iot_fxattrop_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, - gf_xattrop_flags_t optype, dict_t *xattr) + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { STACK_WIND (frame, iot_fxattrop_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fxattrop, fd, optype, xattr); + FIRST_CHILD (this)->fops->fxattrop, fd, optype, xattr, xdata); return 0; } int iot_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd, - gf_xattrop_flags_t optype, dict_t *xattr) + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; stub = fop_fxattrop_stub (frame, iot_fxattrop_wrapper, fd, optype, - xattr); + xattr, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create fxattrop stub" "(out of memory)"); @@ -2202,7 +2351,7 @@ iot_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd, ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (fxattrop, frame, -1, -ret, NULL); + STACK_UNWIND_STRICT (fxattrop, frame, -1, -ret, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); } @@ -2214,33 +2363,33 @@ out: int32_t iot_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, uint32_t weak_checksum, - uint8_t *strong_checksum) + uint8_t *strong_checksum, dict_t *xdata) { STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum, - strong_checksum); + strong_checksum, xdata); return 0; } int32_t iot_rchecksum_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset, int32_t len) + off_t offset, int32_t len, dict_t *xdata) { STACK_WIND (frame, iot_rchecksum_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rchecksum, fd, offset, len); + FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata); return 0; } int32_t iot_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, - int32_t len) + int32_t len, dict_t *xdata) { call_stub_t *stub = NULL; int ret = -1; stub = fop_rchecksum_stub (frame, iot_rchecksum_wrapper, fd, offset, - len); + len, xdata); if (!stub) { gf_log (this->name, GF_LOG_ERROR, "cannot create rchecksum stub" "(out of memory)"); @@ -2251,7 +2400,7 @@ iot_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, ret = iot_schedule (frame, this, stub); out: if (ret < 0) { - STACK_UNWIND_STRICT (rchecksum, frame, -1, -ret, -1, NULL); + STACK_UNWIND_STRICT (rchecksum, frame, -1, -ret, -1, NULL, NULL); if (stub != NULL) { call_stub_destroy (stub); } @@ -2260,24 +2409,172 @@ out: return 0; } +int +iot_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preop, struct iatt *postop, dict_t *xdata) +{ + STACK_UNWIND_STRICT (fallocate, frame, op_ret, op_errno, preop, postop, + xdata); + return 0; +} + + +int +iot_fallocate_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + STACK_WIND (frame, iot_fallocate_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->fallocate, fd, mode, offset, len, + xdata); + return 0; +} + + +int +iot_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + call_stub_t *stub = NULL; + int ret = -1; + + stub = fop_fallocate_stub(frame, iot_fallocate_wrapper, fd, mode, offset, + len, xdata); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot create fallocate stub" + "(out of memory)"); + ret = -ENOMEM; + goto out; + } + + ret = iot_schedule (frame, this, stub); + +out: + if (ret < 0) { + STACK_UNWIND_STRICT (fallocate, frame, -1, -ret, NULL, NULL, + NULL); + if (stub != NULL) { + call_stub_destroy (stub); + } + } + return 0; +} + +int +iot_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preop, struct iatt *postop, dict_t *xdata) +{ + STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, preop, postop, + xdata); + return 0; +} + + +int +iot_discard_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + STACK_WIND (frame, iot_discard_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->discard, fd, offset, len, xdata); + return 0; +} + + +int +iot_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + call_stub_t *stub = NULL; + int ret = -1; + + stub = fop_discard_stub(frame, iot_discard_wrapper, fd, offset, len, + xdata); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot create discard stub" + "(out of memory)"); + ret = -ENOMEM; + goto out; + } + + ret = iot_schedule (frame, this, stub); + +out: + if (ret < 0) { + STACK_UNWIND_STRICT (discard, frame, -1, -ret, NULL, NULL, + NULL); + if (stub != NULL) { + call_stub_destroy (stub); + } + } + return 0; +} + +int +iot_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preop, struct iatt *postop, dict_t *xdata) +{ + STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, preop, postop, + xdata); + return 0; +} + +int +iot_zerofill_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, + off_t offset, size_t len, dict_t *xdata) +{ + STACK_WIND (frame, iot_zerofill_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->zerofill, fd, offset, len, xdata); + return 0; +} + +int +iot_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + call_stub_t *stub = NULL; + int ret = -1; + + stub = fop_zerofill_stub(frame, iot_zerofill_wrapper, fd, + offset, len, xdata); + if (!stub) { + gf_log (this->name, GF_LOG_ERROR, "cannot create zerofill stub" + "(out of memory)"); + ret = -ENOMEM; + goto out; + } + + ret = iot_schedule (frame, this, stub); + +out: + if (ret < 0) { + STACK_UNWIND_STRICT (zerofill, frame, -1, -ret, NULL, NULL, + NULL); + if (stub != NULL) { + call_stub_destroy (stub); + } + } + return 0; +} + int __iot_workers_scale (iot_conf_t *conf) { - int log2 = 0; int scale = 0; int diff = 0; pthread_t thread; int ret = 0; + int i = 0; - log2 = log_base2 (conf->queue_size); - - scale = log2; + for (i = 0; i < IOT_PRI_MAX; i++) + scale += min (conf->queue_sizes[i], conf->ac_iot_limit[i]); - if (log2 < IOT_MIN_THREADS) + if (scale < IOT_MIN_THREADS) scale = IOT_MIN_THREADS; - if (log2 > conf->max_count) + if (scale > conf->max_count) scale = conf->max_count; if (conf->curr_count < scale) { @@ -2287,7 +2584,7 @@ __iot_workers_scale (iot_conf_t *conf) while (diff) { diff --; - ret = pthread_create (&thread, &conf->w_attr, iot_worker, conf); + ret = gf_thread_create (&thread, &conf->w_attr, iot_worker, conf); if (ret == 0) { conf->curr_count++; gf_log (conf->this->name, GF_LOG_DEBUG, @@ -2328,13 +2625,24 @@ set_stack_size (iot_conf_t *conf) { int err = 0; size_t stacksize = IOT_THREAD_STACK_SIZE; + xlator_t *this = NULL; + + this = THIS; pthread_attr_init (&conf->w_attr); err = pthread_attr_setstacksize (&conf->w_attr, stacksize); if (err == EINVAL) { - gf_log (conf->this->name, GF_LOG_WARNING, - "Using default thread stack size"); + err = pthread_attr_getstacksize (&conf->w_attr, &stacksize); + if (!err) + gf_log (this->name, GF_LOG_WARNING, + "Using default thread stack size %zd", + stacksize); + else + gf_log (this->name, GF_LOG_WARNING, + "Using default thread stack size"); } + + conf->stack_size = stacksize; } @@ -2357,6 +2665,44 @@ mem_acct_init (xlator_t *this) return ret; } +int +iot_priv_dump (xlator_t *this) +{ + iot_conf_t *conf = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + + if (!this) + return 0; + + conf = this->private; + if (!conf) + return 0; + + snprintf (key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, + this->name); + + gf_proc_dump_add_section(key_prefix); + + gf_proc_dump_write("maximum_threads_count", "%d", conf->max_count); + gf_proc_dump_write("current_threads_count", "%d", conf->curr_count); + gf_proc_dump_write("sleep_count", "%d", conf->sleep_count); + gf_proc_dump_write("idle_time", "%d", conf->idle_time); + gf_proc_dump_write("stack_size", "%zd", conf->stack_size); + gf_proc_dump_write("high_priority_threads", "%d", + conf->ac_iot_limit[IOT_PRI_HI]); + gf_proc_dump_write("normal_priority_threads", "%d", + conf->ac_iot_limit[IOT_PRI_NORMAL]); + gf_proc_dump_write("low_priority_threads", "%d", + conf->ac_iot_limit[IOT_PRI_LO]); + gf_proc_dump_write("least_priority_threads", "%d", + conf->ac_iot_limit[IOT_PRI_LEAST]); + + gf_proc_dump_write("cached least rate", "%u", + conf->throttle.cached_rate); + gf_proc_dump_write("least rate limit", "%u", conf->throttle.rate_limit); + + return 0; +} int reconfigure (xlator_t *this, dict_t *options) @@ -2383,6 +2729,11 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("least-prio-threads", conf->ac_iot_limit[IOT_PRI_LEAST], options, int32, out); + GF_OPTION_RECONF ("enable-least-priority", conf->least_priority, + options, bool, out); + + GF_OPTION_RECONF("least-rate-limit", conf->throttle.rate_limit, options, + int32, out); ret = 0; out: @@ -2393,9 +2744,9 @@ out: int init (xlator_t *this) { - iot_conf_t *conf = NULL; - int ret = -1; - int i = 0; + iot_conf_t *conf = NULL; + int ret = -1; + int i = 0; if (!this->children || this->children->next) { gf_log ("io-threads", GF_LOG_ERROR, @@ -2445,6 +2796,16 @@ init (xlator_t *this) conf->ac_iot_limit[IOT_PRI_LEAST], int32, out); GF_OPTION_INIT ("idle-time", conf->idle_time, int32, out); + GF_OPTION_INIT ("enable-least-priority", conf->least_priority, + bool, out); + + GF_OPTION_INIT("least-rate-limit", conf->throttle.rate_limit, int32, + out); + if ((ret = pthread_mutex_init(&conf->throttle.lock, NULL)) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "pthread_mutex_init failed (%d)", ret); + goto out; + } conf->this = this; @@ -2457,13 +2818,15 @@ init (xlator_t *this) if (ret == -1) { gf_log (this->name, GF_LOG_ERROR, "cannot initialize worker threads, exiting init"); - GF_FREE (conf); goto out; } this->private = conf; ret = 0; out: + if (ret) + GF_FREE (conf); + return ret; } @@ -2479,6 +2842,9 @@ fini (xlator_t *this) return; } +struct xlator_dumpops dumpops = { + .priv = iot_priv_dump, +}; struct xlator_fops fops = { .open = iot_open, @@ -2512,6 +2878,7 @@ struct xlator_fops fops = { .fgetxattr = iot_fgetxattr, .fsetxattr = iot_fsetxattr, .removexattr = iot_removexattr, + .fremovexattr = iot_fremovexattr, .readdir = iot_readdir, .readdirp = iot_readdirp, .inodelk = iot_inodelk, @@ -2521,10 +2888,12 @@ struct xlator_fops fops = { .xattrop = iot_xattrop, .fxattrop = iot_fxattrop, .rchecksum = iot_rchecksum, + .fallocate = iot_fallocate, + .discard = iot_discard, + .zerofill = iot_zerofill, }; -struct xlator_cbks cbks = { -}; +struct xlator_cbks cbks; struct volume_options options[] = { { .key = {"thread-count"}, @@ -2571,12 +2940,25 @@ struct volume_options options[] = { .description = "Max number of threads in IO threads translator which " "perform least priority IO operations at a given time" }, + { .key = {"enable-least-priority"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "on", + .description = "Enable/Disable least priority" + }, {.key = {"idle-time"}, .type = GF_OPTION_TYPE_INT, .min = 1, .max = 0x7fffffff, .default_value = "120", }, + {.key = {"least-rate-limit"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = INT_MAX, + .default_value = "0", + .description = "Max number of least priority operations to handle " + "per-second" + }, { .key = {NULL}, }, }; diff --git a/xlators/performance/io-threads/src/io-threads.h b/xlators/performance/io-threads/src/io-threads.h index d09fec94d..1a9dee9ae 100644 --- a/xlators/performance/io-threads/src/io-threads.h +++ b/xlators/performance/io-threads/src/io-threads.h @@ -1,20 +1,11 @@ /* - Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef __IOT_H @@ -37,6 +28,7 @@ #include "locking.h" #include "iot-mem-types.h" #include <semaphore.h> +#include "statedump.h" struct iot_conf; @@ -61,6 +53,14 @@ typedef enum { IOT_PRI_MAX, } iot_pri_t; +#define IOT_LEAST_THROTTLE_DELAY 1 /* sample interval in seconds */ +struct iot_least_throttle { + struct timeval sample_time; /* timestamp of current sample */ + uint32_t sample_cnt; /* sample count for active interval */ + uint32_t cached_rate; /* the most recently measured rate */ + int32_t rate_limit; /* user-specified rate limit */ + pthread_mutex_t lock; +}; struct iot_conf { pthread_mutex_t mutex; @@ -76,10 +76,15 @@ struct iot_conf { int32_t ac_iot_limit[IOT_PRI_MAX]; int32_t ac_iot_count[IOT_PRI_MAX]; + int queue_sizes[IOT_PRI_MAX]; int queue_size; pthread_attr_t w_attr; + gf_boolean_t least_priority; /*Enable/Disable least-priority */ xlator_t *this; + size_t stack_size; + + struct iot_least_throttle throttle; }; typedef struct iot_conf iot_conf_t; diff --git a/xlators/performance/io-threads/src/iot-mem-types.h b/xlators/performance/io-threads/src/iot-mem-types.h index 14400cd08..4fa8302d1 100644 --- a/xlators/performance/io-threads/src/iot-mem-types.h +++ b/xlators/performance/io-threads/src/iot-mem-types.h @@ -1,20 +1,11 @@ /* - Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ diff --git a/xlators/performance/stat-prefetch/Makefile.am b/xlators/performance/md-cache/Makefile.am index af437a64d..af437a64d 100644 --- a/xlators/performance/stat-prefetch/Makefile.am +++ b/xlators/performance/md-cache/Makefile.am diff --git a/xlators/performance/md-cache/src/Makefile.am b/xlators/performance/md-cache/src/Makefile.am new file mode 100644 index 000000000..8c9f5a858 --- /dev/null +++ b/xlators/performance/md-cache/src/Makefile.am @@ -0,0 +1,25 @@ +xlator_LTLIBRARIES = md-cache.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance + +md_cache_la_LDFLAGS = -module -avoid-version + +md_cache_la_SOURCES = md-cache.c +md_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = md-cache-mem-types.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(CONTRIBDIR)/rbtree + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = + + +stat-prefetch-compat: + mkdir -p $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance + rm -rf $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance/stat-prefetch.so + ln -s ./md-cache.so $(DESTDIR)$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance/stat-prefetch.so + + +install-exec-local: stat-prefetch-compat diff --git a/xlators/performance/md-cache/src/md-cache-mem-types.h b/xlators/performance/md-cache/src/md-cache-mem-types.h new file mode 100644 index 000000000..6634cf962 --- /dev/null +++ b/xlators/performance/md-cache/src/md-cache-mem-types.h @@ -0,0 +1,24 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef __MDC_MEM_TYPES_H__ +#define __MDC_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_mdc_mem_types_ { + gf_mdc_mt_mdc_local_t = gf_common_mt_end + 1, + gf_mdc_mt_md_cache_t, + gf_mdc_mt_mdc_conf_t, + gf_mdc_mt_end +}; +#endif + diff --git a/xlators/performance/md-cache/src/md-cache.c b/xlators/performance/md-cache/src/md-cache.c new file mode 100644 index 000000000..84c363ad9 --- /dev/null +++ b/xlators/performance/md-cache/src/md-cache.c @@ -0,0 +1,2303 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "md-cache-mem-types.h" +#include "glusterfs-acl.h" +#include <assert.h> +#include <sys/time.h> + + +/* TODO: + - cache symlink() link names and nuke symlink-cache + - send proper postbuf in setattr_cbk even when op_ret = -1 +*/ + + +struct mdc_conf { + int timeout; + gf_boolean_t cache_posix_acl; + gf_boolean_t cache_selinux; + gf_boolean_t force_readdirp; +}; + + +static struct mdc_key { + const char *name; + int load; + int check; +} mdc_keys[] = { + { + .name = POSIX_ACL_ACCESS_XATTR, + .load = 0, + .check = 1, + }, + { + .name = POSIX_ACL_DEFAULT_XATTR, + .load = 0, + .check = 1, + }, + { + .name = GF_SELINUX_XATTR_KEY, + .load = 0, + .check = 1, + }, + { + .name = "security.capability", + .load = 0, + .check = 1, + }, + { + .name = "gfid-req", + .load = 0, + .check = 1, + }, + { + .name = NULL, + .load = 0, + .check = 0, + } +}; + + +static uint64_t +gfid_to_ino (uuid_t gfid) +{ + uint64_t ino = 0; + int i = 0, j = 0; + + for (i = 15; i > (15 - 8); i--) { + ino += (uint64_t)(gfid[i]) << j; + j += 8; + } + + return ino; +} + + +struct mdc_local; +typedef struct mdc_local mdc_local_t; + +#define MDC_STACK_UNWIND(fop, frame, params ...) do { \ + mdc_local_t *__local = NULL; \ + xlator_t *__xl = NULL; \ + if (frame) { \ + __xl = frame->this; \ + __local = frame->local; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT (fop, frame, params); \ + mdc_local_wipe (__xl, __local); \ + } while (0) + + +struct md_cache { + ia_prot_t md_prot; + uint32_t md_nlink; + uint32_t md_uid; + uint32_t md_gid; + uint32_t md_atime; + uint32_t md_atime_nsec; + uint32_t md_mtime; + uint32_t md_mtime_nsec; + uint32_t md_ctime; + uint32_t md_ctime_nsec; + uint64_t md_rdev; + uint64_t md_size; + uint64_t md_blocks; + dict_t *xattr; + char *linkname; + time_t ia_time; + time_t xa_time; + gf_lock_t lock; +}; + + +struct mdc_local { + loc_t loc; + loc_t loc2; + fd_t *fd; + char *linkname; + char *key; + dict_t *xattr; +}; + + +int +__mdc_inode_ctx_get (xlator_t *this, inode_t *inode, struct md_cache **mdc_p) +{ + int ret = 0; + struct md_cache *mdc = NULL; + uint64_t mdc_int = 0; + + ret = __inode_ctx_get (inode, this, &mdc_int); + mdc = (void *) (long) (mdc_int); + if (ret == 0 && mdc_p) + *mdc_p = mdc; + + return ret; +} + + +int +mdc_inode_ctx_get (xlator_t *this, inode_t *inode, struct md_cache **mdc_p) +{ + int ret; + + LOCK(&inode->lock); + { + ret = __mdc_inode_ctx_get (this, inode, mdc_p); + } + UNLOCK(&inode->lock); + + return ret; +} + + +int +__mdc_inode_ctx_set (xlator_t *this, inode_t *inode, struct md_cache *mdc) +{ + int ret = 0; + uint64_t mdc_int = 0; + + mdc_int = (long) mdc; + ret = __inode_ctx_set (inode, this, &mdc_int); + + return ret; +} + + +int +mdc_inode_ctx_set (xlator_t *this, inode_t *inode, struct md_cache *mdc) +{ + int ret; + + LOCK(&inode->lock); + { + ret = __mdc_inode_ctx_set (this, inode, mdc); + } + UNLOCK(&inode->lock); + + return ret; +} + + +mdc_local_t * +mdc_local_get (call_frame_t *frame) +{ + mdc_local_t *local = NULL; + + local = frame->local; + if (local) + goto out; + + local = GF_CALLOC (sizeof (*local), 1, gf_mdc_mt_mdc_local_t); + if (!local) + goto out; + + frame->local = local; +out: + return local; +} + + +void +mdc_local_wipe (xlator_t *this, mdc_local_t *local) +{ + if (!local) + return; + + loc_wipe (&local->loc); + + loc_wipe (&local->loc2); + + if (local->fd) + fd_unref (local->fd); + + GF_FREE (local->linkname); + + GF_FREE (local->key); + + if (local->xattr) + dict_unref (local->xattr); + + GF_FREE (local); + return; +} + + +int +mdc_inode_wipe (xlator_t *this, inode_t *inode) +{ + int ret = 0; + uint64_t mdc_int = 0; + struct md_cache *mdc = NULL; + + ret = inode_ctx_del (inode, this, &mdc_int); + if (ret != 0) + goto out; + + mdc = (void *) (long) mdc_int; + + if (mdc->xattr) + dict_unref (mdc->xattr); + + GF_FREE (mdc->linkname); + + GF_FREE (mdc); + + ret = 0; +out: + return ret; +} + + +struct md_cache * +mdc_inode_prep (xlator_t *this, inode_t *inode) +{ + int ret = 0; + struct md_cache *mdc = NULL; + + LOCK (&inode->lock); + { + ret = __mdc_inode_ctx_get (this, inode, &mdc); + if (ret == 0) + goto unlock; + + mdc = GF_CALLOC (sizeof (*mdc), 1, gf_mdc_mt_md_cache_t); + if (!mdc) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + goto unlock; + } + + LOCK_INIT (&mdc->lock); + + ret = __mdc_inode_ctx_set (this, inode, mdc); + if (ret) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory :("); + GF_FREE (mdc); + mdc = NULL; + } + } +unlock: + UNLOCK (&inode->lock); + + return mdc; +} + + +static gf_boolean_t +is_md_cache_iatt_valid (xlator_t *this, struct md_cache *mdc) +{ + struct mdc_conf *conf = NULL; + time_t now = 0; + gf_boolean_t ret = _gf_true; + conf = this->private; + + time (&now); + + LOCK (&mdc->lock); + { + if (now >= (mdc->ia_time + conf->timeout)) + ret = _gf_false; + } + UNLOCK (&mdc->lock); + + return ret; +} + + +static gf_boolean_t +is_md_cache_xatt_valid (xlator_t *this, struct md_cache *mdc) +{ + struct mdc_conf *conf = NULL; + time_t now = 0; + gf_boolean_t ret = _gf_true; + + conf = this->private; + + time (&now); + + LOCK (&mdc->lock); + { + if (now >= (mdc->xa_time + conf->timeout)) + ret = _gf_false; + } + UNLOCK (&mdc->lock); + + return ret; +} + + +void +mdc_from_iatt (struct md_cache *mdc, struct iatt *iatt) +{ + mdc->md_prot = iatt->ia_prot; + mdc->md_nlink = iatt->ia_nlink; + mdc->md_uid = iatt->ia_uid; + mdc->md_gid = iatt->ia_gid; + mdc->md_atime = iatt->ia_atime; + mdc->md_atime_nsec = iatt->ia_atime_nsec; + mdc->md_mtime = iatt->ia_mtime; + mdc->md_mtime_nsec = iatt->ia_mtime_nsec; + mdc->md_ctime = iatt->ia_ctime; + mdc->md_ctime_nsec = iatt->ia_ctime_nsec; + mdc->md_rdev = iatt->ia_rdev; + mdc->md_size = iatt->ia_size; + mdc->md_blocks = iatt->ia_blocks; +} + + +void +mdc_to_iatt (struct md_cache *mdc, struct iatt *iatt) +{ + iatt->ia_prot = mdc->md_prot; + iatt->ia_nlink = mdc->md_nlink; + iatt->ia_uid = mdc->md_uid; + iatt->ia_gid = mdc->md_gid; + iatt->ia_atime = mdc->md_atime; + iatt->ia_atime_nsec = mdc->md_atime_nsec; + iatt->ia_mtime = mdc->md_mtime; + iatt->ia_mtime_nsec = mdc->md_mtime_nsec; + iatt->ia_ctime = mdc->md_ctime; + iatt->ia_ctime_nsec = mdc->md_ctime_nsec; + iatt->ia_rdev = mdc->md_rdev; + iatt->ia_size = mdc->md_size; + iatt->ia_blocks = mdc->md_blocks; +} + + +int +mdc_inode_iatt_set_validate(xlator_t *this, inode_t *inode, struct iatt *prebuf, + struct iatt *iatt) +{ + int ret = -1; + struct md_cache *mdc = NULL; + + mdc = mdc_inode_prep (this, inode); + if (!mdc) + goto out; + + LOCK (&mdc->lock); + { + if (!iatt || !iatt->ia_ctime) { + mdc->ia_time = 0; + goto unlock; + } + + /* + * Invalidate the inode if the mtime or ctime has changed + * and the prebuf doesn't match the value we have cached. + * TODO: writev returns with a NULL iatt due to + * performance/write-behind, causing invalidation on writes. + */ + if (IA_ISREG(inode->ia_type) && + ((iatt->ia_mtime != mdc->md_mtime) || + (iatt->ia_ctime != mdc->md_ctime))) + if (!prebuf || (prebuf->ia_ctime != mdc->md_ctime) || + (prebuf->ia_mtime != mdc->md_mtime)) + inode_invalidate(inode); + + mdc_from_iatt (mdc, iatt); + + time (&mdc->ia_time); + } +unlock: + UNLOCK (&mdc->lock); + ret = 0; +out: + return ret; +} + +int mdc_inode_iatt_set(xlator_t *this, inode_t *inode, struct iatt *iatt) +{ + return mdc_inode_iatt_set_validate(this, inode, NULL, iatt); +} + +int +mdc_inode_iatt_get (xlator_t *this, inode_t *inode, struct iatt *iatt) +{ + int ret = -1; + struct md_cache *mdc = NULL; + + if (mdc_inode_ctx_get (this, inode, &mdc) != 0) + goto out; + + if (!is_md_cache_iatt_valid (this, mdc)) + goto out; + + LOCK (&mdc->lock); + { + mdc_to_iatt (mdc, iatt); + } + UNLOCK (&mdc->lock); + + uuid_copy (iatt->ia_gfid, inode->gfid); + iatt->ia_ino = gfid_to_ino (inode->gfid); + iatt->ia_dev = 42; + iatt->ia_type = inode->ia_type; + + ret = 0; +out: + return ret; +} + +struct updatedict { + dict_t *dict; + int ret; +}; + +static int +updatefn(dict_t *dict, char *key, data_t *value, void *data) +{ + struct updatedict *u = data; + const char *mdc_key; + int i = 0; + + for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) { + if (!mdc_keys[i].check) + continue; + if (strcmp(mdc_key, key)) + continue; + + if (!u->dict) { + u->dict = dict_new(); + if (!u->dict) { + u->ret = -1; + return -1; + } + } + + if (dict_set(u->dict, key, value) < 0) { + u->ret = -1; + return -1; + } + + break; + } + return 0; +} + +static int +mdc_dict_update(dict_t **tgt, dict_t *src) +{ + struct updatedict u = { + .dict = *tgt, + .ret = 0, + }; + + dict_foreach(src, updatefn, &u); + + if (*tgt) + return u.ret; + + if ((u.ret < 0) && u.dict) { + dict_unref(u.dict); + return u.ret; + } + + *tgt = u.dict; + + return u.ret; +} + +int +mdc_inode_xatt_set (xlator_t *this, inode_t *inode, dict_t *dict) +{ + int ret = -1; + struct md_cache *mdc = NULL; + dict_t *newdict = NULL; + + mdc = mdc_inode_prep (this, inode); + if (!mdc) + goto out; + + if (!dict) + goto out; + + LOCK (&mdc->lock); + { + if (mdc->xattr) { + dict_unref (mdc->xattr); + mdc->xattr = NULL; + } + + ret = mdc_dict_update(&newdict, dict); + if (ret < 0) { + UNLOCK(&mdc->lock); + goto out; + } + + if (newdict) + mdc->xattr = newdict; + + time (&mdc->xa_time); + } + UNLOCK (&mdc->lock); + ret = 0; +out: + return ret; +} + + +int +mdc_inode_xatt_update (xlator_t *this, inode_t *inode, dict_t *dict) +{ + int ret = -1; + struct md_cache *mdc = NULL; + + mdc = mdc_inode_prep (this, inode); + if (!mdc) + goto out; + + if (!dict) + goto out; + + LOCK (&mdc->lock); + { + ret = mdc_dict_update(&mdc->xattr, dict); + if (ret < 0) { + UNLOCK(&mdc->lock); + goto out; + } + + time (&mdc->xa_time); + } + UNLOCK (&mdc->lock); + + ret = 0; +out: + return ret; +} + + +int +mdc_inode_xatt_unset (xlator_t *this, inode_t *inode, char *name) +{ + int ret = -1; + struct md_cache *mdc = NULL; + + mdc = mdc_inode_prep (this, inode); + if (!mdc) + goto out; + + if (!name) + goto out; + + LOCK (&mdc->lock); + { + dict_del (mdc->xattr, name); + } + UNLOCK (&mdc->lock); + + ret = 0; +out: + return ret; +} + + +int +mdc_inode_xatt_get (xlator_t *this, inode_t *inode, dict_t **dict) +{ + int ret = -1; + struct md_cache *mdc = NULL; + + if (mdc_inode_ctx_get (this, inode, &mdc) != 0) + goto out; + + if (!is_md_cache_xatt_valid (this, mdc)) + goto out; + + LOCK (&mdc->lock); + { + ret = 0; + /* Missing xattr only means no keys were there, i.e + a negative cache for the "loaded" keys + */ + if (!mdc->xattr) + goto unlock; + + if (dict) + *dict = dict_ref (mdc->xattr); + } +unlock: + UNLOCK (&mdc->lock); + +out: + return ret; +} + + +int +mdc_inode_iatt_invalidate (xlator_t *this, inode_t *inode) +{ + int ret = -1; + struct md_cache *mdc = NULL; + + if (mdc_inode_ctx_get (this, inode, &mdc) != 0) + goto out; + + LOCK (&mdc->lock); + { + mdc->ia_time = 0; + } + UNLOCK (&mdc->lock); + +out: + return ret; +} + + +int +mdc_inode_xatt_invalidate (xlator_t *this, inode_t *inode) +{ + int ret = -1; + struct md_cache *mdc = NULL; + + if (mdc_inode_ctx_get (this, inode, &mdc) != 0) + goto out; + + LOCK (&mdc->lock); + { + mdc->xa_time = 0; + } + UNLOCK (&mdc->lock); + +out: + return ret; +} + + +void +mdc_load_reqs (xlator_t *this, dict_t *dict) +{ + const char *mdc_key = NULL; + int i = 0; + int ret = 0; + + for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) { + if (!mdc_keys[i].load) + continue; + ret = dict_set_int8 (dict, (char *)mdc_key, 0); + if (ret) + return; + } +} + + +struct checkpair { + int ret; + dict_t *rsp; +}; + + +static int +is_mdc_key_satisfied (const char *key) +{ + const char *mdc_key = NULL; + int i = 0; + + if (!key) + return 0; + + for (mdc_key = mdc_keys[i].name; (mdc_key = mdc_keys[i].name); i++) { + if (!mdc_keys[i].load) + continue; + if (strcmp (mdc_key, key) == 0) + return 1; + } + + return 0; +} + + +static int +checkfn (dict_t *this, char *key, data_t *value, void *data) +{ + struct checkpair *pair = data; + + if (!is_mdc_key_satisfied (key)) + pair->ret = 0; + + return 0; +} + + +int +mdc_xattr_satisfied (xlator_t *this, dict_t *req, dict_t *rsp) +{ + struct checkpair pair = { + .ret = 1, + .rsp = rsp, + }; + + dict_foreach (req, checkfn, &pair); + + return pair.ret; +} + + +int +mdc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *dict, struct iatt *postparent) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + if (local->loc.parent) { + mdc_inode_iatt_set (this, local->loc.parent, postparent); + } + + if (local->loc.inode) { + mdc_inode_iatt_set (this, local->loc.inode, stbuf); + mdc_inode_xatt_set (this, local->loc.inode, dict); + } +out: + MDC_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, + dict, postparent); + return 0; +} + + +int +mdc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + int ret = 0; + struct iatt stbuf = {0, }; + struct iatt postparent = {0, }; + dict_t *xattr_rsp = NULL; + dict_t *xattr_alloc = NULL; + mdc_local_t *local = NULL; + + + local = mdc_local_get (frame); + if (!local) + goto uncached; + + if (!loc->name) + /* A nameless discovery is dangerous to cache. We + perform nameless lookup with the intention of + re-establishing an inode "properly" + */ + goto uncached; + + loc_copy (&local->loc, loc); + + ret = mdc_inode_iatt_get (this, loc->inode, &stbuf); + if (ret != 0) + goto uncached; + + if (xdata) { + ret = mdc_inode_xatt_get (this, loc->inode, &xattr_rsp); + if (ret != 0) + goto uncached; + + if (!mdc_xattr_satisfied (this, xdata, xattr_rsp)) + goto uncached; + } + + MDC_STACK_UNWIND (lookup, frame, 0, 0, loc->inode, &stbuf, + xattr_rsp, &postparent); + + if (xattr_rsp) + dict_unref (xattr_rsp); + + return 0; + +uncached: + if (!xdata) + xdata = xattr_alloc = dict_new (); + if (xdata) + mdc_load_reqs (this, xdata); + + STACK_WIND (frame, mdc_lookup_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->lookup, loc, xdata); + + if (xattr_rsp) + dict_unref (xattr_rsp); + if (xattr_alloc) + dict_unref (xattr_alloc); + return 0; +} + + +int +mdc_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + if (op_ret != 0) + goto out; + + local = frame->local; + if (!local) + goto out; + + mdc_inode_iatt_set (this, local->loc.inode, buf); + +out: + MDC_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata); + + return 0; +} + + +int +mdc_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + int ret; + struct iatt stbuf; + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + if (!local) + goto uncached; + + loc_copy (&local->loc, loc); + + ret = mdc_inode_iatt_get (this, loc->inode, &stbuf); + if (ret != 0) + goto uncached; + + MDC_STACK_UNWIND (stat, frame, 0, 0, &stbuf, xdata); + + return 0; + +uncached: + STACK_WIND (frame, mdc_stat_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat, + loc, xdata); + return 0; +} + + +int +mdc_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + dict_t *xdata) +{ + mdc_local_t *local = NULL; + + if (op_ret != 0) + goto out; + + local = frame->local; + if (!local) + goto out; + + mdc_inode_iatt_set (this, local->fd->inode, buf); + +out: + MDC_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata); + + return 0; +} + + +int +mdc_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + int ret; + struct iatt stbuf; + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + if (!local) + goto uncached; + + local->fd = fd_ref (fd); + + ret = mdc_inode_iatt_get (this, fd->inode, &stbuf); + if (ret != 0) + goto uncached; + + MDC_STACK_UNWIND (fstat, frame, 0, 0, &stbuf, xdata); + + return 0; + +uncached: + STACK_WIND (frame, mdc_fstat_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat, + fd, xdata); + return 0; +} + + +int +mdc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf); + +out: + MDC_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + + +int +mdc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, + off_t offset, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + local->loc.inode = inode_ref (loc->inode); + + STACK_WIND (frame, mdc_truncate_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate, + loc, offset, xdata); + return 0; +} + + +int +mdc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); + +out: + MDC_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + + +int +mdc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, + off_t offset, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + local->fd = fd_ref (fd); + + STACK_WIND (frame, mdc_ftruncate_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate, + fd, offset, xdata); + return 0; +} + + +int +mdc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + if (local->loc.parent) { + mdc_inode_iatt_set (this, local->loc.parent, postparent); + } + + if (local->loc.inode) { + mdc_inode_iatt_set (this, local->loc.inode, buf); + mdc_inode_xatt_set (this, local->loc.inode, local->xattr); + } +out: + MDC_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; +} + + +int +mdc_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, + mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + loc_copy (&local->loc, loc); + local->xattr = dict_ref (xdata); + + STACK_WIND (frame, mdc_mknod_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, + loc, mode, rdev, umask, xdata); + return 0; +} + + +int +mdc_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + if (local->loc.parent) { + mdc_inode_iatt_set (this, local->loc.parent, postparent); + } + + if (local->loc.inode) { + mdc_inode_iatt_set (this, local->loc.inode, buf); + mdc_inode_xatt_set (this, local->loc.inode, local->xattr); + } +out: + MDC_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; +} + + +int +mdc_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, + mode_t mode, mode_t umask, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + loc_copy (&local->loc, loc); + local->xattr = dict_ref (xdata); + + STACK_WIND (frame, mdc_mkdir_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, + loc, mode, umask, xdata); + return 0; +} + + +int +mdc_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + if (local->loc.parent) { + mdc_inode_iatt_set (this, local->loc.parent, postparent); + } + + if (local->loc.inode) { + mdc_inode_iatt_set (this, local->loc.inode, NULL); + } + +out: + MDC_STACK_UNWIND (unlink, frame, op_ret, op_errno, + preparent, postparent, xdata); + return 0; +} + + +int +mdc_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag, + dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + loc_copy (&local->loc, loc); + + STACK_WIND (frame, mdc_unlink_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink, + loc, xflag, xdata); + return 0; +} + + +int +mdc_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + if (local->loc.parent) { + mdc_inode_iatt_set (this, local->loc.parent, postparent); + } + +out: + MDC_STACK_UNWIND (rmdir, frame, op_ret, op_errno, + preparent, postparent, xdata); + return 0; +} + + +int +mdc_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag, + dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + loc_copy (&local->loc, loc); + + STACK_WIND (frame, mdc_rmdir_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir, + loc, flag, xdata); + return 0; +} + + +int +mdc_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + if (local->loc.parent) { + mdc_inode_iatt_set (this, local->loc.parent, postparent); + } + + if (local->loc.inode) { + mdc_inode_iatt_set (this, local->loc.inode, buf); + } +out: + MDC_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; +} + + +int +mdc_symlink (call_frame_t *frame, xlator_t *this, const char *linkname, + loc_t *loc, mode_t umask, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + loc_copy (&local->loc, loc); + + local->linkname = gf_strdup (linkname); + + STACK_WIND (frame, mdc_symlink_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink, + linkname, loc, umask, xdata); + return 0; +} + + +int +mdc_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + if (local->loc.parent) { + mdc_inode_iatt_set (this, local->loc.parent, postoldparent); + } + + if (local->loc.inode) { + /* TODO: fix dht_rename() not to return linkfile + attributes before setting attributes here + */ + + mdc_inode_iatt_set (this, local->loc.inode, NULL); + } + + if (local->loc2.parent) { + mdc_inode_iatt_set (this, local->loc2.parent, postnewparent); + } +out: + MDC_STACK_UNWIND (rename, frame, op_ret, op_errno, buf, + preoldparent, postoldparent, prenewparent, + postnewparent, xdata); + return 0; +} + + +int +mdc_rename (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + loc_copy (&local->loc, oldloc); + loc_copy (&local->loc2, newloc); + + STACK_WIND (frame, mdc_rename_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename, + oldloc, newloc, xdata); + return 0; +} + + +int +mdc_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + if (local->loc.inode) { + mdc_inode_iatt_set (this, local->loc.inode, buf); + } + + if (local->loc2.parent) { + mdc_inode_iatt_set (this, local->loc2.parent, postparent); + } +out: + MDC_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; +} + + +int +mdc_link (call_frame_t *frame, xlator_t *this, + loc_t *oldloc, loc_t *newloc, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + loc_copy (&local->loc, oldloc); + loc_copy (&local->loc2, newloc); + + STACK_WIND (frame, mdc_link_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->link, + oldloc, newloc, xdata); + return 0; +} + + +int +mdc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + if (local->loc.parent) { + mdc_inode_iatt_set (this, local->loc.parent, postparent); + } + + if (local->loc.inode) { + mdc_inode_iatt_set (this, inode, buf); + mdc_inode_xatt_set (this, local->loc.inode, local->xattr); + } +out: + MDC_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + return 0; +} + + +int +mdc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + loc_copy (&local->loc, loc); + local->xattr = dict_ref (xdata); + + STACK_WIND (frame, mdc_create_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, + loc, flags, mode, umask, fd, xdata); + return 0; +} + + +int +mdc_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iovec *vector, int32_t count, + struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + mdc_inode_iatt_set (this, local->fd->inode, stbuf); + +out: + MDC_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, + stbuf, iobref, xdata); + + return 0; +} + + +int +mdc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + local->fd = fd_ref (fd); + + STACK_WIND (frame, mdc_readv_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv, + fd, size, offset, flags, xdata); + return 0; +} + + +int +mdc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret == -1) + goto out; + + if (!local) + goto out; + + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); + +out: + MDC_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + + +int +mdc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + local->fd = fd_ref (fd); + + STACK_WIND (frame, mdc_writev_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, + fd, vector, count, offset, flags, iobref, xdata); + return 0; +} + + +int +mdc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) { + mdc_inode_iatt_set (this, local->loc.inode, NULL); + goto out; + } + + if (!local) + goto out; + + mdc_inode_iatt_set_validate(this, local->loc.inode, prebuf, postbuf); + +out: + MDC_STACK_UNWIND (setattr, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + + +int +mdc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int valid, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + loc_copy (&local->loc, loc); + + STACK_WIND (frame, mdc_setattr_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->setattr, + loc, stbuf, valid, xdata); + return 0; +} + + +int +mdc_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); + +out: + MDC_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + + +int +mdc_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int valid, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + local->fd = fd_ref (fd); + + STACK_WIND (frame, mdc_fsetattr_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetattr, + fd, stbuf, valid, xdata); + return 0; +} + + +int +mdc_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); + +out: + MDC_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + + +int +mdc_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, + dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + local->fd = fd_ref (fd); + + STACK_WIND (frame, mdc_fsync_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, + fd, datasync, xdata); + return 0; +} + + +int +mdc_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + mdc_inode_xatt_update (this, local->loc.inode, local->xattr); + + mdc_inode_iatt_invalidate (this, local->loc.inode); + +out: + MDC_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata); + + return 0; +} + + +int +mdc_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xattr, int flags, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + loc_copy (&local->loc, loc); + local->xattr = dict_ref (xattr); + + STACK_WIND (frame, mdc_setxattr_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, + loc, xattr, flags, xdata); + return 0; +} + + +int +mdc_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + mdc_inode_xatt_update (this, local->fd->inode, local->xattr); + + mdc_inode_iatt_invalidate (this, local->fd->inode); +out: + MDC_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata); + + return 0; +} + + +int +mdc_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + dict_t *xattr, int flags, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + local->fd = fd_ref (fd); + local->xattr = dict_ref (xattr); + + STACK_WIND (frame, mdc_fsetxattr_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetxattr, + fd, xattr, flags, xdata); + return 0; +} + +int +mdc_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, + dict_t *xdata) +{ + mdc_local_t *local = NULL; + + if (op_ret != 0) + goto out; + + local = frame->local; + if (!local) + goto out; + + mdc_inode_xatt_update (this, local->loc.inode, xattr); + +out: + MDC_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata); + + return 0; +} + + +int +mdc_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, + dict_t *xdata) +{ + int ret; + int op_errno = ENODATA; + mdc_local_t *local = NULL; + dict_t *xattr = NULL; + + local = mdc_local_get (frame); + if (!local) + goto uncached; + + loc_copy (&local->loc, loc); + + if (!is_mdc_key_satisfied (key)) + goto uncached; + + ret = mdc_inode_xatt_get (this, loc->inode, &xattr); + if (ret != 0) + goto uncached; + + if (!xattr || !dict_get (xattr, (char *)key)) { + ret = -1; + op_errno = ENODATA; + } + + MDC_STACK_UNWIND (getxattr, frame, ret, op_errno, xattr, xdata); + + return 0; + +uncached: + STACK_WIND (frame, mdc_getxattr_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr, + loc, key, xdata); + return 0; +} + + +int +mdc_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xattr, + dict_t *xdata) +{ + mdc_local_t *local = NULL; + + if (op_ret != 0) + goto out; + + local = frame->local; + if (!local) + goto out; + + mdc_inode_xatt_update (this, local->fd->inode, xattr); + +out: + MDC_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, xattr, xdata); + + return 0; +} + + +int +mdc_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key, + dict_t *xdata) +{ + int ret; + mdc_local_t *local = NULL; + dict_t *xattr = NULL; + int op_errno = ENODATA; + + local = mdc_local_get (frame); + if (!local) + goto uncached; + + local->fd = fd_ref (fd); + + if (!is_mdc_key_satisfied (key)) + goto uncached; + + ret = mdc_inode_xatt_get (this, fd->inode, &xattr); + if (ret != 0) + goto uncached; + + if (!xattr || !dict_get (xattr, (char *)key)) { + ret = -1; + op_errno = ENODATA; + } + + MDC_STACK_UNWIND (fgetxattr, frame, ret, op_errno, xattr, xdata); + + return 0; + +uncached: + STACK_WIND (frame, mdc_fgetxattr_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->fgetxattr, + fd, key, xdata); + return 0; +} + +int +mdc_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + if (local->key) + mdc_inode_xatt_unset (this, local->loc.inode, local->key); + else + mdc_inode_xatt_invalidate (this, local->loc.inode); + + mdc_inode_iatt_invalidate (this, local->loc.inode); +out: + MDC_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata); + + return 0; +} + + +int +mdc_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + loc_copy (&local->loc, loc); + + local->key = gf_strdup (name); + + STACK_WIND (frame, mdc_removexattr_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->removexattr, + loc, name, xdata); + return 0; +} + + +int +mdc_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + if (local->key) + mdc_inode_xatt_unset (this, local->fd->inode, local->key); + else + mdc_inode_xatt_invalidate (this, local->fd->inode); + + mdc_inode_iatt_invalidate (this, local->fd->inode); +out: + MDC_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata); + + return 0; +} + + +int +mdc_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = mdc_local_get (frame); + + local->fd = fd_ref (fd); + + local->key = gf_strdup (name); + + STACK_WIND (frame, mdc_fremovexattr_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->fremovexattr, + fd, name, xdata); + return 0; +} + + +int +mdc_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata) +{ + gf_dirent_t *entry = NULL; + + if (op_ret <= 0) + goto unwind; + + list_for_each_entry (entry, &entries->list, list) { + if (!entry->inode) + continue; + mdc_inode_iatt_set (this, entry->inode, &entry->d_stat); + mdc_inode_xatt_set (this, entry->inode, entry->dict); + } + +unwind: + STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata); + return 0; +} + + +int +mdc_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, + size_t size, off_t offset, dict_t *xdata) +{ + dict_t *xattr_alloc = NULL; + + if (!xdata) + xdata = xattr_alloc = dict_new (); + if (xdata) + mdc_load_reqs (this, xdata); + + STACK_WIND (frame, mdc_readdirp_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->readdirp, + fd, size, offset, xdata); + if (xattr_alloc) + dict_unref (xattr_alloc); + return 0; +} + +int +mdc_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, gf_dirent_t *entries, dict_t *xdata) +{ + STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata); + return 0; +} + +int +mdc_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, + size_t size, off_t offset, dict_t *xdata) +{ + int need_unref = 0; + struct mdc_conf *conf = this->private; + + if (!conf->force_readdirp) { + STACK_WIND(frame, mdc_readdir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdir, fd, size, offset, + xdata); + return 0; + } + + if (!xdata) { + xdata = dict_new (); + need_unref = 1; + } + + if (xdata) + mdc_load_reqs (this, xdata); + + STACK_WIND(frame, mdc_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, + xdata); + + if (need_unref && xdata) + dict_unref (xdata); + + return 0; +} + +int +mdc_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); + +out: + MDC_STACK_UNWIND (fallocate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + +int mdc_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + mdc_local_t *local; + + local = mdc_local_get(frame); + local->fd = fd_ref(fd); + + STACK_WIND(frame, mdc_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len, + xdata); + + return 0; +} + +int +mdc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); + +out: + MDC_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + +int mdc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + mdc_local_t *local; + + local = mdc_local_get(frame); + local->fd = fd_ref(fd); + + STACK_WIND(frame, mdc_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, + xdata); + + return 0; +} + +int +mdc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) +{ + mdc_local_t *local = NULL; + + local = frame->local; + + if (op_ret != 0) + goto out; + + if (!local) + goto out; + + mdc_inode_iatt_set_validate(this, local->fd->inode, prebuf, postbuf); + +out: + MDC_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + + return 0; +} + +int mdc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + mdc_local_t *local; + + local = mdc_local_get(frame); + local->fd = fd_ref(fd); + + STACK_WIND(frame, mdc_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, + xdata); + + return 0; +} + + +int +mdc_forget (xlator_t *this, inode_t *inode) +{ + mdc_inode_wipe (this, inode); + + return 0; +} + + +int +is_strpfx (const char *str1, const char *str2) +{ + /* is one of the string a prefix of the other? */ + int i; + + for (i = 0; str1[i] == str2[i]; i++) { + if (!str1[i] || !str2[i]) + break; + } + + return !(str1[i] && str2[i]); +} + + +int +mdc_key_load_set (struct mdc_key *keys, char *pattern, gf_boolean_t val) +{ + struct mdc_key *key = NULL; + + for (key = keys; key->name; key++) { + if (is_strpfx (key->name, pattern)) + key->load = val; + } + + return 0; +} + + +int +reconfigure (xlator_t *this, dict_t *options) +{ + struct mdc_conf *conf = NULL; + + conf = this->private; + + GF_OPTION_RECONF ("md-cache-timeout", conf->timeout, options, int32, out); + + GF_OPTION_RECONF ("cache-selinux", conf->cache_selinux, options, bool, out); + mdc_key_load_set (mdc_keys, "security.", conf->cache_selinux); + + GF_OPTION_RECONF ("cache-posix-acl", conf->cache_posix_acl, options, bool, out); + mdc_key_load_set (mdc_keys, "system.posix_acl_", conf->cache_posix_acl); + + GF_OPTION_RECONF("force-readdirp", conf->force_readdirp, options, bool, out); + +out: + return 0; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + ret = xlator_mem_acct_init (this, gf_mdc_mt_end + 1); + return ret; +} + +int +init (xlator_t *this) +{ + struct mdc_conf *conf = NULL; + + conf = GF_CALLOC (sizeof (*conf), 1, gf_mdc_mt_mdc_conf_t); + if (!conf) { + gf_log (this->name, GF_LOG_ERROR, + "out of memory"); + return -1; + } + + GF_OPTION_INIT ("md-cache-timeout", conf->timeout, int32, out); + + GF_OPTION_INIT ("cache-selinux", conf->cache_selinux, bool, out); + mdc_key_load_set (mdc_keys, "security.", conf->cache_selinux); + + GF_OPTION_INIT ("cache-posix-acl", conf->cache_posix_acl, bool, out); + mdc_key_load_set (mdc_keys, "system.posix_acl_", conf->cache_posix_acl); + + GF_OPTION_INIT("force-readdirp", conf->force_readdirp, bool, out); +out: + this->private = conf; + + return 0; +} + + +void +fini (xlator_t *this) +{ + return; +} + + +struct xlator_fops fops = { + .lookup = mdc_lookup, + .stat = mdc_stat, + .fstat = mdc_fstat, + .truncate = mdc_truncate, + .ftruncate = mdc_ftruncate, + .mknod = mdc_mknod, + .mkdir = mdc_mkdir, + .unlink = mdc_unlink, + .rmdir = mdc_rmdir, + .symlink = mdc_symlink, + .rename = mdc_rename, + .link = mdc_link, + .create = mdc_create, + .readv = mdc_readv, + .writev = mdc_writev, + .setattr = mdc_setattr, + .fsetattr = mdc_fsetattr, + .fsync = mdc_fsync, + .setxattr = mdc_setxattr, + .fsetxattr = mdc_fsetxattr, + .getxattr = mdc_getxattr, + .fgetxattr = mdc_fgetxattr, + .removexattr = mdc_removexattr, + .fremovexattr= mdc_fremovexattr, + .readdirp = mdc_readdirp, + .readdir = mdc_readdir, + .fallocate = mdc_fallocate, + .discard = mdc_discard, + .zerofill = mdc_zerofill, +}; + + +struct xlator_cbks cbks = { + .forget = mdc_forget, +}; + +struct volume_options options[] = { + { .key = {"cache-selinux"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + }, + { .key = {"cache-posix-acl"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + }, + { .key = {"md-cache-timeout"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = 60, + .default_value = "1", + .description = "Time period after which cache has to be refreshed", + }, + { .key = {"force-readdirp"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "true", + .description = "Convert all readdir requests to readdirplus to " + "collect stat info on each entry.", + }, + { .key = {NULL} }, +}; diff --git a/xlators/performance/open-behind/Makefile.am b/xlators/performance/open-behind/Makefile.am new file mode 100644 index 000000000..af437a64d --- /dev/null +++ b/xlators/performance/open-behind/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = src diff --git a/xlators/performance/open-behind/src/Makefile.am b/xlators/performance/open-behind/src/Makefile.am new file mode 100644 index 000000000..125285707 --- /dev/null +++ b/xlators/performance/open-behind/src/Makefile.am @@ -0,0 +1,15 @@ +xlator_LTLIBRARIES = open-behind.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance + +open_behind_la_LDFLAGS = -module -avoid-version + +open_behind_la_SOURCES = open-behind.c +open_behind_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = open-behind-mem-types.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/performance/open-behind/src/open-behind-mem-types.h b/xlators/performance/open-behind/src/open-behind-mem-types.h new file mode 100644 index 000000000..1e94296f4 --- /dev/null +++ b/xlators/performance/open-behind/src/open-behind-mem-types.h @@ -0,0 +1,21 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __OB_MEM_TYPES_H__ +#define __OB_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_ob_mem_types_ { + gf_ob_mt_fd_t = gf_common_mt_end + 1, + gf_ob_mt_conf_t, + gf_ob_mt_end +}; +#endif diff --git a/xlators/performance/open-behind/src/open-behind.c b/xlators/performance/open-behind/src/open-behind.c new file mode 100644 index 000000000..7e5b57278 --- /dev/null +++ b/xlators/performance/open-behind/src/open-behind.c @@ -0,0 +1,1001 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include "open-behind-mem-types.h" +#include "xlator.h" +#include "statedump.h" +#include "call-stub.h" +#include "defaults.h" + +typedef struct ob_conf { + gf_boolean_t use_anonymous_fd; /* use anonymous FDs wherever safe + e.g - fstat() readv() + + whereas for fops like writev(), lk(), + the fd is important for side effects + like mandatory locks + */ + gf_boolean_t lazy_open; /* delay backend open as much as possible */ +} ob_conf_t; + + +typedef struct ob_fd { + call_frame_t *open_frame; + loc_t loc; + dict_t *xdata; + int flags; + int op_errno; + struct list_head list; +} ob_fd_t; + + +ob_fd_t * +__ob_fd_ctx_get (xlator_t *this, fd_t *fd) +{ + uint64_t value = 0; + int ret = -1; + ob_fd_t *ob_fd = NULL; + + ret = __fd_ctx_get (fd, this, &value); + if (ret) + return NULL; + + ob_fd = (void *) ((long) value); + + return ob_fd; +} + + +ob_fd_t * +ob_fd_ctx_get (xlator_t *this, fd_t *fd) +{ + ob_fd_t *ob_fd = NULL; + + LOCK (&fd->lock); + { + ob_fd = __ob_fd_ctx_get (this, fd); + } + UNLOCK (&fd->lock); + + return ob_fd; +} + + +int +__ob_fd_ctx_set (xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) +{ + uint64_t value = 0; + int ret = -1; + + value = (long) ((void *) ob_fd); + + ret = __fd_ctx_set (fd, this, value); + + return ret; +} + + +int +ob_fd_ctx_set (xlator_t *this, fd_t *fd, ob_fd_t *ob_fd) +{ + int ret = -1; + + LOCK (&fd->lock); + { + ret = __ob_fd_ctx_set (this, fd, ob_fd); + } + UNLOCK (&fd->lock); + + return ret; +} + + +ob_fd_t * +ob_fd_new (void) +{ + ob_fd_t *ob_fd = NULL; + + ob_fd = GF_CALLOC (1, sizeof (*ob_fd), gf_ob_mt_fd_t); + + INIT_LIST_HEAD (&ob_fd->list); + + return ob_fd; +} + + +void +ob_fd_free (ob_fd_t *ob_fd) +{ + loc_wipe (&ob_fd->loc); + + if (ob_fd->xdata) + dict_unref (ob_fd->xdata); + + if (ob_fd->open_frame) + STACK_DESTROY (ob_fd->open_frame->root); + + GF_FREE (ob_fd); +} + + +int +ob_wake_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd_ret, dict_t *xdata) +{ + fd_t *fd = NULL; + struct list_head list; + ob_fd_t *ob_fd = NULL; + call_stub_t *stub = NULL, *tmp = NULL; + + fd = frame->local; + frame->local = NULL; + + INIT_LIST_HEAD (&list); + + LOCK (&fd->lock); + { + ob_fd = __ob_fd_ctx_get (this, fd); + + list_splice_init (&ob_fd->list, &list); + + if (op_ret < 0) { + /* mark fd BAD for ever */ + ob_fd->op_errno = op_errno; + } else { + __fd_ctx_del (fd, this, NULL); + ob_fd_free (ob_fd); + } + } + UNLOCK (&fd->lock); + + list_for_each_entry_safe (stub, tmp, &list, list) { + list_del_init (&stub->list); + + if (op_ret < 0) + call_unwind_error (stub, -1, op_errno); + else + call_resume (stub); + } + + fd_unref (fd); + + STACK_DESTROY (frame->root); + + return 0; +} + + +int +ob_fd_wake (xlator_t *this, fd_t *fd) +{ + call_frame_t *frame = NULL; + ob_fd_t *ob_fd = NULL; + + LOCK (&fd->lock); + { + ob_fd = __ob_fd_ctx_get (this, fd); + if (!ob_fd) + goto unlock; + + frame = ob_fd->open_frame; + ob_fd->open_frame = NULL; + } +unlock: + UNLOCK (&fd->lock); + + if (frame) { + frame->local = fd_ref (fd); + + STACK_WIND (frame, ob_wake_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->open, + &ob_fd->loc, ob_fd->flags, fd, ob_fd->xdata); + } + + return 0; +} + + +int +open_and_resume (xlator_t *this, fd_t *fd, call_stub_t *stub) +{ + ob_fd_t *ob_fd = NULL; + int op_errno = 0; + + if (!fd) + goto nofd; + + LOCK (&fd->lock); + { + ob_fd = __ob_fd_ctx_get (this, fd); + if (!ob_fd) + goto unlock; + + if (ob_fd->op_errno) { + op_errno = ob_fd->op_errno; + goto unlock; + } + + list_add_tail (&stub->list, &ob_fd->list); + } +unlock: + UNLOCK (&fd->lock); + +nofd: + if (op_errno) + call_unwind_error (stub, -1, op_errno); + else if (ob_fd) + ob_fd_wake (this, fd); + else + call_resume (stub); + + return 0; +} + + +int +ob_open_behind (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + fd_t *fd, dict_t *xdata) +{ + ob_fd_t *ob_fd = NULL; + int ret = -1; + ob_conf_t *conf = NULL; + + + conf = this->private; + + if (flags & O_TRUNC) { + STACK_WIND (frame, default_open_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->open, + loc, flags, fd, xdata); + return 0; + } + + ob_fd = ob_fd_new (); + if (!ob_fd) + goto enomem; + + ob_fd->open_frame = copy_frame (frame); + if (!ob_fd->open_frame) + goto enomem; + ret = loc_copy (&ob_fd->loc, loc); + if (ret) + goto enomem; + + ob_fd->flags = flags; + if (xdata) + ob_fd->xdata = dict_ref (xdata); + + ret = ob_fd_ctx_set (this, fd, ob_fd); + if (ret) + goto enomem; + + fd_ref (fd); + + STACK_UNWIND_STRICT (open, frame, 0, 0, fd, xdata); + + if (!conf->lazy_open) + ob_fd_wake (this, fd); + + fd_unref (fd); + + return 0; +enomem: + if (ob_fd) { + if (ob_fd->open_frame) + STACK_DESTROY (ob_fd->open_frame->root); + loc_wipe (&ob_fd->loc); + if (ob_fd->xdata) + dict_unref (ob_fd->xdata); + GF_FREE (ob_fd); + } + + return -1; +} + + +int +ob_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + fd_t *fd, dict_t *xdata) +{ + fd_t *old_fd = NULL; + int ret = -1; + int op_errno = 0; + call_stub_t *stub = NULL; + + old_fd = fd_lookup (fd->inode, 0); + if (old_fd) { + /* open-behind only when this is the first FD */ + stub = fop_open_stub (frame, default_open_resume, + loc, flags, fd, xdata); + if (!stub) { + op_errno = ENOMEM; + fd_unref (old_fd); + goto err; + } + + open_and_resume (this, old_fd, stub); + + fd_unref (old_fd); + + return 0; + } + + ret = ob_open_behind (frame, this, loc, flags, fd, xdata); + if (ret) { + op_errno = ENOMEM; + goto err; + } + + return 0; +err: + gf_log (this->name, GF_LOG_ERROR, "%s: %s", loc->path, + strerror (op_errno)); + + STACK_UNWIND_STRICT (open, frame, -1, op_errno, 0, 0); + + return 0; +} + + +fd_t * +ob_get_wind_fd (xlator_t *this, fd_t *fd) +{ + ob_conf_t *conf = NULL; + ob_fd_t *ob_fd = NULL; + + conf = this->private; + + ob_fd = ob_fd_ctx_get (this, fd); + + if (ob_fd && conf->use_anonymous_fd) + return fd_anonymous (fd->inode); + + return fd_ref (fd); +} + + +int +ob_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + call_stub_t *stub = NULL; + fd_t *wind_fd = NULL; + + wind_fd = ob_get_wind_fd (this, fd); + + stub = fop_readv_stub (frame, default_readv_resume, wind_fd, + size, offset, flags, xdata); + fd_unref (wind_fd); + + if (!stub) + goto err; + + open_and_resume (this, wind_fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0); + + return 0; +} + + +int +ob_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov, + int count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_writev_stub (frame, default_writev_resume, fd, iov, count, + offset, flags, iobref, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, 0, 0, 0); + + return 0; +} + + +int +ob_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + call_stub_t *stub = NULL; + fd_t *wind_fd = NULL; + + wind_fd = ob_get_wind_fd (this, fd); + + stub = fop_fstat_stub (frame, default_fstat_resume, wind_fd, xdata); + + fd_unref (wind_fd); + + if (!stub) + goto err; + + open_and_resume (this, wind_fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, 0, 0); + + return 0; +} + + +int +ob_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + call_stub_t *stub = NULL; + ob_fd_t *ob_fd = NULL; + gf_boolean_t unwind = _gf_false; + + LOCK (&fd->lock); + { + ob_fd = __ob_fd_ctx_get (this, fd); + if (ob_fd && ob_fd->open_frame) + /* if open() was never wound to backend, + no need to wind flush() either. + */ + unwind = _gf_true; + } + UNLOCK (&fd->lock); + + if (unwind) + goto unwind; + + stub = fop_flush_stub (frame, default_flush_resume, fd, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, 0); + + return 0; + +unwind: + STACK_UNWIND_STRICT (flush, frame, 0, 0, 0); + + return 0; +} + + +int +ob_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int flag, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_fsync_stub (frame, default_fsync_resume, fd, flag, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, 0, 0, 0); + + return 0; +} + + +int +ob_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd, + struct gf_flock *flock, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_lk_stub (frame, default_lk_resume, fd, cmd, flock, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM, 0, 0); + + return 0; +} + +int +ob_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_ftruncate_stub (frame, default_ftruncate_resume, fd, offset, + xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, 0, 0, 0); + + return 0; +} + + +int +ob_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr, + int flags, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume, fd, xattr, + flags, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, 0); + + return 0; +} + + +int +ob_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume, fd, name, + xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM, 0, 0); + + return 0; +} + + +int +ob_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_fremovexattr_stub (frame, default_fremovexattr_resume, fd, + name, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (fremovexattr, frame, -1, ENOMEM, 0); + + return 0; +} + + +int +ob_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + int cmd, struct gf_flock *flock, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_finodelk_stub (frame, default_finodelk_resume, volume, fd, + cmd, flock, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, 0); + + return 0; +} + + +int +ob_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, + const char *basename, entrylk_cmd cmd, entrylk_type type, + dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_fentrylk_stub (frame, default_fentrylk_resume, volume, fd, + basename, cmd, type, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, 0); + + return 0; +} + + +int +ob_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd, + gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_fxattrop_stub (frame, default_fxattrop_resume, fd, optype, + xattr, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM, 0, 0); + + return 0; +} + + +int +ob_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *iatt, int valid, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + stub = fop_fsetattr_stub (frame, default_fsetattr_resume, fd, + iatt, valid, xdata); + if (!stub) + goto err; + + open_and_resume (this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, 0, 0, 0); + + return 0; +} + +int +ob_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + call_stub_t *stub; + + stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode, + offset, len, xdata); + if (!stub) + goto err; + + open_and_resume(this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; +} + +int +ob_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + call_stub_t *stub; + + stub = fop_discard_stub(frame, default_discard_resume, fd, offset, len, + xdata); + if (!stub) + goto err; + + open_and_resume(this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT(discard, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; +} + +int +ob_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + call_stub_t *stub; + + stub = fop_zerofill_stub(frame, default_zerofill_resume, fd, + offset, len, xdata); + if (!stub) + goto err; + + open_and_resume(this, fd, stub); + + return 0; +err: + STACK_UNWIND_STRICT(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; +} + + +int +ob_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags, + dict_t *xdata) +{ + fd_t *fd = NULL; + call_stub_t *stub = NULL; + + stub = fop_unlink_stub (frame, default_unlink_resume, loc, + xflags, xdata); + if (!stub) + goto err; + + fd = fd_lookup (loc->inode, 0); + + open_and_resume (this, fd, stub); + if (fd) + fd_unref (fd); + + return 0; +err: + STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, 0, 0, 0); + + return 0; +} + + +int +ob_rename (call_frame_t *frame, xlator_t *this, loc_t *src, loc_t *dst, + dict_t *xdata) +{ + fd_t *fd = NULL; + call_stub_t *stub = NULL; + + stub = fop_rename_stub (frame, default_rename_resume, src, dst, xdata); + if (!stub) + goto err; + + if (dst->inode) + fd = fd_lookup (dst->inode, 0); + + open_and_resume (this, fd, stub); + if (fd) + fd_unref (fd); + + return 0; +err: + STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM, 0, 0, 0, 0, 0, 0); + + return 0; +} + + +int +ob_release (xlator_t *this, fd_t *fd) +{ + ob_fd_t *ob_fd = NULL; + + ob_fd = ob_fd_ctx_get (this, fd); + + ob_fd_free (ob_fd); + + return 0; +} + + +int +ob_priv_dump (xlator_t *this) +{ + ob_conf_t *conf = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + + conf = this->private; + + if (!conf) + return -1; + + gf_proc_dump_build_key (key_prefix, "xlator.performance.open-behind", + "priv"); + + gf_proc_dump_add_section (key_prefix); + + gf_proc_dump_write ("use_anonymous_fd", "%d", conf->use_anonymous_fd); + + gf_proc_dump_write ("lazy_open", "%d", conf->lazy_open); + + return 0; +} + + +int +ob_fdctx_dump (xlator_t *this, fd_t *fd) +{ + ob_fd_t *ob_fd = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; + int ret = 0; + + ret = TRY_LOCK (&fd->lock); + if (ret) + return 0; + + ob_fd = __ob_fd_ctx_get (this, fd); + if (!ob_fd) { + UNLOCK (&fd->lock); + return 0; + } + + gf_proc_dump_build_key (key_prefix, "xlator.performance.open-behind", + "file"); + gf_proc_dump_add_section (key_prefix); + + gf_proc_dump_write ("fd", "%p", fd); + + gf_proc_dump_write ("open_frame", "%p", ob_fd->open_frame); + + gf_proc_dump_write ("open_frame.root.unique", "%p", + ob_fd->open_frame->root->unique); + + gf_proc_dump_write ("loc.path", "%s", ob_fd->loc.path); + + gf_proc_dump_write ("loc.ino", "%s", uuid_utoa (ob_fd->loc.gfid)); + + gf_proc_dump_write ("flags", "%p", ob_fd->open_frame); + + UNLOCK (&fd->lock); + + return 0; +} + + +int +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + ret = xlator_mem_acct_init (this, gf_ob_mt_end + 1); + + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Memory accounting failed"); + + return ret; +} + + +int +reconfigure (xlator_t *this, dict_t *options) +{ + ob_conf_t *conf = NULL; + int ret = -1; + + conf = this->private; + + GF_OPTION_RECONF ("use-anonymous-fd", conf->use_anonymous_fd, options, + bool, out); + + GF_OPTION_RECONF ("lazy-open", conf->lazy_open, options, bool, out); + + ret = 0; +out: + return ret; +} + + +int +init (xlator_t *this) +{ + ob_conf_t *conf = NULL; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "FATAL: volume (%s) not configured with exactly one " + "child", this->name); + return -1; + } + + if (!this->parents) + gf_log (this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + + conf = GF_CALLOC (1, sizeof (*conf), gf_ob_mt_conf_t); + if (!conf) + goto err; + + GF_OPTION_INIT ("use-anonymous-fd", conf->use_anonymous_fd, bool, err); + + GF_OPTION_INIT ("lazy-open", conf->lazy_open, bool, err); + + this->private = conf; + + return 0; +err: + if (conf) + GF_FREE (conf); + + return -1; +} + + +void +fini (xlator_t *this) +{ + ob_conf_t *conf = NULL; + + conf = this->private; + + GF_FREE (conf); + + return; +} + + +struct xlator_fops fops = { + .open = ob_open, + .readv = ob_readv, + .writev = ob_writev, + .flush = ob_flush, + .fsync = ob_fsync, + .fstat = ob_fstat, + .ftruncate = ob_ftruncate, + .fsetxattr = ob_fsetxattr, + .fgetxattr = ob_fgetxattr, + .fremovexattr = ob_fremovexattr, + .finodelk = ob_finodelk, + .fentrylk = ob_fentrylk, + .fxattrop = ob_fxattrop, + .fsetattr = ob_fsetattr, + .fallocate = ob_fallocate, + .discard = ob_discard, + .zerofill = ob_zerofill, + .unlink = ob_unlink, + .rename = ob_rename, + .lk = ob_lk, +}; + +struct xlator_cbks cbks = { + .release = ob_release, +}; + +struct xlator_dumpops dumpops = { + .priv = ob_priv_dump, + .fdctx = ob_fdctx_dump, +}; + + +struct volume_options options[] = { + { .key = {"use-anonymous-fd"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "yes", + .description = "For read operations, use anonymous FD when " + "original FD is open-behind and not yet opened in the backend.", + }, + { .key = {"lazy-open"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "yes", + .description = "Perform open in the backend only when a necessary " + "FOP arrives (e.g writev on the FD, unlink of the file). When option " + "is disabled, perform backend open right after unwinding open().", + }, + { .key = {NULL} } + +}; diff --git a/xlators/performance/quick-read/src/Makefile.am b/xlators/performance/quick-read/src/Makefile.am index db917f897..4906f408a 100644 --- a/xlators/performance/quick-read/src/Makefile.am +++ b/xlators/performance/quick-read/src/Makefile.am @@ -1,14 +1,15 @@ xlator_LTLIBRARIES = quick-read.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance -quick_read_la_LDFLAGS = -module -avoidversion +quick_read_la_LDFLAGS = -module -avoid-version quick_read_la_SOURCES = quick-read.c quick_read_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la noinst_HEADERS = quick-read.h quick-read-mem-types.h -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ - -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) CLEANFILES = diff --git a/xlators/performance/quick-read/src/quick-read-mem-types.h b/xlators/performance/quick-read/src/quick-read-mem-types.h index b6a65e57c..78547f641 100644 --- a/xlators/performance/quick-read/src/quick-read-mem-types.h +++ b/xlators/performance/quick-read/src/quick-read-mem-types.h @@ -1,20 +1,11 @@ /* - Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef __QR_MEM_TYPES_H__ @@ -24,12 +15,13 @@ enum gf_qr_mem_types_ { gf_qr_mt_qr_inode_t = gf_common_mt_end + 1, + gf_qr_mt_content_t, gf_qr_mt_qr_fd_ctx_t, - gf_qr_mt_qr_local_t, gf_qr_mt_iovec, gf_qr_mt_qr_conf_t, gf_qr_mt_qr_priority_t, gf_qr_mt_qr_private_t, + gf_qr_mt_qr_unlink_ctx_t, gf_qr_mt_end }; #endif diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c index e8385089f..445ea8658 100644 --- a/xlators/performance/quick-read/src/quick-read.c +++ b/xlators/performance/quick-read/src/quick-read.c @@ -1,200 +1,112 @@ /* - Copyright (c) 2009-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #include "quick-read.h" #include "statedump.h" -#define QR_DEFAULT_CACHE_SIZE 134217728 +qr_inode_t *qr_inode_ctx_get (xlator_t *this, inode_t *inode); +void __qr_inode_prune (qr_inode_table_t *table, qr_inode_t *qr_inode); -struct volume_options options[]; -void -qr_local_free (qr_local_t *local) +int +__qr_inode_ctx_set (xlator_t *this, inode_t *inode, qr_inode_t *qr_inode) { - if (local == NULL) { - goto out; - } - - if (local->stub != NULL) { - call_stub_destroy (local->stub); - } + uint64_t value = 0; + int ret = -1; - if (local->path != NULL) { - GF_FREE (local->path); - } + value = (long) qr_inode; - GF_FREE (local); + ret = __inode_ctx_set (inode, this, &value); -out: - return; + return ret; } -int32_t -qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset); - - -static void -qr_loc_wipe (loc_t *loc) +qr_inode_t * +__qr_inode_ctx_get (xlator_t *this, inode_t *inode) { - if (loc == NULL) { - goto out; - } - - if (loc->path) { - GF_FREE ((char *)loc->path); - loc->path = NULL; - } + qr_inode_t *qr_inode = NULL; + uint64_t value = 0; + int ret = -1; - if (loc->inode) { - inode_unref (loc->inode); - loc->inode = NULL; - } + ret = __inode_ctx_get (inode, this, &value); + if (ret) + return NULL; - if (loc->parent) { - inode_unref (loc->parent); - loc->parent = NULL; - } + qr_inode = (void *) ((long) value); -out: - return; + return qr_inode; } -static int32_t -qr_loc_fill (loc_t *loc, inode_t *inode, char *path) +qr_inode_t * +qr_inode_ctx_get (xlator_t *this, inode_t *inode) { - int32_t ret = -1; - char *parent = NULL; - char *path_copy = NULL; - - GF_VALIDATE_OR_GOTO_WITH_ERROR ("quick-read", loc, out, errno, EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR ("quick-read", inode, out, errno, - EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR ("quick-read", path, out, errno, EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR ("quick-read", inode->table, out, errno, - EINVAL); - - loc->inode = inode_ref (inode); - loc->path = gf_strdup (path); - loc->ino = inode->ino; - - path_copy = gf_strdup (path); - if (path_copy == NULL) { - ret = -1; - goto out; - } - - parent = dirname (path_copy); + qr_inode_t *qr_inode = NULL; - loc->parent = inode_from_path (inode->table, parent); - if (loc->parent == NULL) { - ret = -1; - errno = EINVAL; - gf_log ("quick-read", GF_LOG_WARNING, - "cannot search parent inode for path (%s)", path); - goto out; - } + LOCK (&inode->lock); + { + qr_inode = __qr_inode_ctx_get (this, inode); + } + UNLOCK (&inode->lock); - loc->name = strrchr (loc->path, '/'); - ret = 0; -out: - if (ret == -1) { - qr_loc_wipe (loc); - } - - if (path_copy) { - GF_FREE (path_copy); - } - - return ret; + return qr_inode; } -void -qr_resume_pending_ops (qr_fd_ctx_t *qr_fd_ctx, int32_t op_ret, int32_t op_errno) +qr_inode_t * +qr_inode_new (xlator_t *this, inode_t *inode) { - call_stub_t *stub = NULL, *tmp = NULL; - struct list_head waiting_ops = {0, }; - - GF_VALIDATE_OR_GOTO ("quick-read", qr_fd_ctx, out); - - INIT_LIST_HEAD (&waiting_ops); - - LOCK (&qr_fd_ctx->lock); - { - qr_fd_ctx->open_in_transit = 0; - list_splice_init (&qr_fd_ctx->waiting_ops, - &waiting_ops); - } - UNLOCK (&qr_fd_ctx->lock); - - if (!list_empty (&waiting_ops)) { - list_for_each_entry_safe (stub, tmp, &waiting_ops, list) { - list_del_init (&stub->list); - if (op_ret < 0) { - qr_local_t *local = NULL; - - local = stub->frame->local; - local->op_ret = op_ret; - local->op_errno = op_errno; - } - - call_resume (stub); - } - } - -out: - return; -} - + qr_inode_t *qr_inode = NULL; -static void -qr_fd_ctx_free (qr_fd_ctx_t *qr_fd_ctx) -{ - GF_VALIDATE_OR_GOTO ("quick-read", qr_fd_ctx, out); + qr_inode = GF_CALLOC (1, sizeof (*qr_inode), gf_qr_mt_qr_inode_t); + if (!qr_inode) + return NULL; - GF_ASSERT (list_empty (&qr_fd_ctx->waiting_ops)); + INIT_LIST_HEAD (&qr_inode->lru); - GF_FREE (qr_fd_ctx->path); - GF_FREE (qr_fd_ctx); + qr_inode->priority = 0; /* initial priority */ -out: - return; + return qr_inode; } -static inline uint32_t -is_match (const char *path, const char *pattern) -{ - int32_t ret = 0; - uint32_t match = 0; - - GF_VALIDATE_OR_GOTO ("quick-read", path, out); - GF_VALIDATE_OR_GOTO ("quick-read", pattern, out); - - ret = fnmatch (pattern, path, FNM_NOESCAPE); - match = (ret == 0); +qr_inode_t * +qr_inode_ctx_get_or_new (xlator_t *this, inode_t *inode) +{ + qr_inode_t *qr_inode = NULL; + int ret = -1; + qr_private_t *priv = NULL; + + priv = this->private; + + LOCK (&inode->lock); + { + qr_inode = __qr_inode_ctx_get (this, inode); + if (qr_inode) + goto unlock; + + qr_inode = qr_inode_new (this, inode); + if (!qr_inode) + goto unlock; + + ret = __qr_inode_ctx_set (this, inode, qr_inode); + if (ret) { + __qr_inode_prune (&priv->table, qr_inode); + GF_FREE (qr_inode); + } + } +unlock: + UNLOCK (&inode->lock); -out: - return match; + return qr_inode; } @@ -204,3001 +116,578 @@ qr_get_priority (qr_conf_t *conf, const char *path) uint32_t priority = 0; struct qr_priority *curr = NULL; - GF_VALIDATE_OR_GOTO ("quick-read", conf, out); - GF_VALIDATE_OR_GOTO ("quick-read", path, out); - list_for_each_entry (curr, &conf->priority_list, list) { - if (is_match (path, curr->pattern)) + if (fnmatch (curr->pattern, path, FNM_NOESCAPE) == 0) priority = curr->priority; } -out: return priority; } -/* To be called with this-priv->table.lock held */ -qr_inode_t * -__qr_inode_alloc (xlator_t *this, char *path, inode_t *inode) -{ - qr_inode_t *qr_inode = NULL; - qr_private_t *priv = NULL; - int priority = 0; - - GF_VALIDATE_OR_GOTO ("quick-read", this, out); - GF_VALIDATE_OR_GOTO (this->name, path, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); - - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - - qr_inode = GF_CALLOC (1, sizeof (*qr_inode), gf_qr_mt_qr_inode_t); - if (qr_inode == NULL) { - goto out; - } - - INIT_LIST_HEAD (&qr_inode->lru); - - priority = qr_get_priority (&priv->conf, path); - - list_add_tail (&qr_inode->lru, &priv->table.lru[priority]); - - qr_inode->inode = inode; - qr_inode->priority = priority; -out: - return qr_inode; -} - - -/* To be called with qr_inode->table->lock held */ void -__qr_inode_free (qr_inode_t *qr_inode) +__qr_inode_register (qr_inode_table_t *table, qr_inode_t *qr_inode) { - GF_VALIDATE_OR_GOTO ("quick-read", qr_inode, out); - - if (qr_inode->xattr) { - dict_unref (qr_inode->xattr); - } + if (!qr_inode->data) + return; - list_del (&qr_inode->lru); + if (list_empty (&qr_inode->lru)) + /* first time addition of this qr_inode into table */ + table->cache_used += qr_inode->size; + else + list_del_init (&qr_inode->lru); - GF_FREE (qr_inode); -out: - return; + list_add_tail (&qr_inode->lru, &table->lru[qr_inode->priority]); } -/* To be called with priv->table.lock held */ + void -__qr_cache_prune (xlator_t *this) +qr_inode_set_priority (xlator_t *this, inode_t *inode, const char *path) { - qr_private_t *priv = NULL; - qr_conf_t *conf = NULL; - qr_inode_table_t *table = NULL; - qr_inode_t *curr = NULL, *next = NULL; - int32_t index = 0; - uint64_t size_to_prune = 0; - uint64_t size_pruned = 0; - - GF_VALIDATE_OR_GOTO ("quick-read", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - - table = &priv->table; - conf = &priv->conf; - - size_to_prune = table->cache_used - conf->cache_size; - - for (index=0; index < conf->max_pri; index++) { - list_for_each_entry_safe (curr, next, &table->lru[index], lru) { - size_pruned += curr->stbuf.ia_size; - inode_ctx_del (curr->inode, this, NULL); - __qr_inode_free (curr); - if (size_pruned >= size_to_prune) - goto done; - } - } + uint32_t priority = 0; + qr_inode_table_t *table = NULL; + qr_inode_t *qr_inode = NULL; + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; -done: - table->cache_used -= size_pruned; - -out: - return; -} + qr_inode = qr_inode_ctx_get (this, inode); + if (!qr_inode) + return; -/* To be called with table->lock held */ -inline char -__qr_need_cache_prune (qr_conf_t *conf, qr_inode_table_t *table) -{ - char need_prune = 0; + priv = this->private; + table = &priv->table; + conf = &priv->conf; - GF_VALIDATE_OR_GOTO ("quick-read", conf, out); - GF_VALIDATE_OR_GOTO ("quick-read", table, out); + if (path) + priority = qr_get_priority (conf, path); + else + /* retain existing priority, just bump LRU */ + priority = qr_inode->priority; - need_prune = (table->cache_used > conf->cache_size); + LOCK (&table->lock); + { + qr_inode->priority = priority; -out: - return need_prune; + __qr_inode_register (table, qr_inode); + } + UNLOCK (&table->lock); } -int32_t -qr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *dict, struct iatt *postparent) +/* To be called with priv->table.lock held */ +void +__qr_inode_prune (qr_inode_table_t *table, qr_inode_t *qr_inode) { - data_t *content = NULL; - qr_inode_t *qr_inode = NULL; - uint64_t value = 0; - int ret = -1; - qr_conf_t *conf = NULL; - qr_inode_table_t *table = NULL; - qr_private_t *priv = NULL; - qr_local_t *local = NULL; - - GF_ASSERT (frame); - - if ((op_ret == -1) || (dict == NULL)) { - goto out; - } - - if ((this == NULL) || (this->private == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "quick-read configuration is not found"); - op_ret = -1; - op_errno = EINVAL; - goto out; - } + GF_FREE (qr_inode->data); + qr_inode->data = NULL; - priv = this->private; - conf = &priv->conf; - table = &priv->table; - - local = frame->local; - - if (buf->ia_size > conf->max_file_size) { - goto out; - } - - if (IA_ISDIR (buf->ia_type)) { - goto out; - } - - if (inode == NULL) { - op_ret = -1; - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, - "lookup returned a NULL inode"); - goto out; - } - - content = dict_get (dict, GF_CONTENT_KEY); - if (content == NULL) { - goto out; - } + if (!list_empty (&qr_inode->lru)) { + table->cache_used -= qr_inode->size; + qr_inode->size = 0; - LOCK (&table->lock); - { - ret = inode_ctx_get (inode, this, &value); - if (ret == -1) { - qr_inode = __qr_inode_alloc (this, local->path, inode); - if (qr_inode == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto unlock; - } - - ret = inode_ctx_put (inode, this, - (uint64_t)(long)qr_inode); - if (ret == -1) { - __qr_inode_free (qr_inode); - qr_inode = NULL; - op_ret = -1; - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, - "cannot set quick-read context in " - "inode (ino:%"PRId64" gfid:%s)", - inode->ino, inode->gfid); - goto unlock; - } - } else { - qr_inode = (qr_inode_t *)(long)value; - if (qr_inode == NULL) { - op_ret = -1; - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, - "cannot find quick-read context in " - "inode (ino:%"PRId64" gfid:%s)", - inode->ino, uuid_utoa (inode->gfid)); - goto unlock; - } - } - - if (qr_inode->xattr) { - dict_unref (qr_inode->xattr); - qr_inode->xattr = NULL; - table->cache_used -= qr_inode->stbuf.ia_size; - } - - qr_inode->xattr = dict_ref (dict); - qr_inode->stbuf = *buf; - table->cache_used += buf->ia_size; - - gettimeofday (&qr_inode->tv, NULL); - if (__qr_need_cache_prune (conf, table)) { - __qr_cache_prune (this); - } - } -unlock: - UNLOCK (&table->lock); - -out: - /* - * FIXME: content size in dict can be greater than the size application - * requested for. Applications need to be careful till this is fixed. - */ - QR_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, dict, - postparent); + list_del_init (&qr_inode->lru); + } - return 0; + memset (&qr_inode->buf, 0, sizeof (qr_inode->buf)); } -int32_t -qr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) +void +qr_inode_prune (xlator_t *this, inode_t *inode) { - qr_conf_t *conf = NULL; - dict_t *new_req_dict = NULL; - int32_t op_ret = -1, op_errno = EINVAL; - data_t *content = NULL; - uint64_t requested_size = 0, size = 0, value = 0; - char cached = 0; - qr_inode_t *qr_inode = NULL; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - qr_local_t *local = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind); - - priv = this->private; - GF_VALIDATE_OR_GOTO (frame->this->name, priv, unwind); - - conf = &priv->conf; - if (conf == NULL) { - op_ret = -1; - op_errno = EINVAL; - goto unwind; - } - - table = &priv->table; - local = GF_CALLOC (1, sizeof (*local), gf_qr_mt_qr_local_t); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind, op_errno, - ENOMEM); - - frame->local = local; - - local->path = gf_strdup (loc->path); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind, op_errno, - ENOMEM); - LOCK (&table->lock); - { - op_ret = inode_ctx_get (loc->inode, this, &value); - if (op_ret == 0) { - qr_inode = (qr_inode_t *)(long)value; - if (qr_inode != NULL) { - if (qr_inode->xattr) { - cached = 1; - } - } - } - } - UNLOCK (&table->lock); - - if ((xattr_req == NULL) && (conf->max_file_size > 0)) { - new_req_dict = xattr_req = dict_new (); - if (xattr_req == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - } - - if (!cached) { - if (xattr_req) { - content = dict_get (xattr_req, GF_CONTENT_KEY); - if (content) { - requested_size = data_to_uint64 (content); - } - } - - if ((conf->max_file_size > 0) - && (conf->max_file_size != requested_size)) { - size = (conf->max_file_size > requested_size) ? - conf->max_file_size : requested_size; - - op_ret = dict_set (xattr_req, GF_CONTENT_KEY, - data_from_uint64 (size)); - if (op_ret < 0) { - op_ret = -1; - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_WARNING, - "cannot set key in request dict to " - "request file " - "content during lookup cbk"); - goto unwind; - } - } - } - - STACK_WIND (frame, qr_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xattr_req); - - if (new_req_dict) { - dict_unref (new_req_dict); - } - - return 0; + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + qr_inode_t *qr_inode = NULL; -unwind: - QR_STACK_UNWIND (lookup, frame, op_ret, op_errno, NULL, NULL, NULL, - NULL); + qr_inode = qr_inode_ctx_get (this, inode); + if (!qr_inode) + return; - if (new_req_dict) { - dict_unref (new_req_dict); - } + priv = this->private; + table = &priv->table; - return 0; + LOCK (&table->lock); + { + __qr_inode_prune (table, qr_inode); + } + UNLOCK (&table->lock); } -int32_t -qr_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, fd_t *fd) +/* To be called with priv->table.lock held */ +void +__qr_cache_prune (qr_inode_table_t *table, qr_conf_t *conf) { - uint64_t value = 0; - int32_t ret = -1; - qr_local_t *local = NULL; - qr_inode_t *qr_inode = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - call_stub_t *stub = NULL, *tmp = NULL; - char is_open = 0; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - struct list_head waiting_ops; - - GF_ASSERT (frame); - - priv = this->private; - table = &priv->table; - - local = frame->local; - if (local != NULL) { - is_open = local->is_open; - } - - INIT_LIST_HEAD (&waiting_ops); - - ret = fd_ctx_get (fd, this, &value); - if ((ret == -1) && (op_ret != -1)) { - op_ret = -1; - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, - "cannot find quick-read context in fd (%p) opened on " - "inode (ino:%"PRId64" gfid: %s", fd, fd->inode->ino, - uuid_utoa (fd->inode->gfid)); - goto out; - } - - if (value) { - qr_fd_ctx = (qr_fd_ctx_t *) (long)value; - } + qr_inode_t *curr = NULL; + qr_inode_t *next = NULL; + int index = 0; + size_t size_pruned = 0; - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - qr_fd_ctx->open_in_transit = 0; - - if (op_ret == 0) { - qr_fd_ctx->opened = 1; - } - list_splice_init (&qr_fd_ctx->waiting_ops, - &waiting_ops); - } - UNLOCK (&qr_fd_ctx->lock); - - if (local && local->is_open - && ((local->open_flags & O_TRUNC) == O_TRUNC)) { - LOCK (&table->lock); - { - ret = inode_ctx_del (fd->inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long) value; - - if (qr_inode != NULL) { - __qr_inode_free (qr_inode); - } - } - } - UNLOCK (&table->lock); - } + for (index = 0; index < conf->max_pri; index++) { + list_for_each_entry_safe (curr, next, &table->lru[index], lru) { - if (!list_empty (&waiting_ops)) { - list_for_each_entry_safe (stub, tmp, &waiting_ops, - list) { - list_del_init (&stub->list); - if (op_ret < 0) { - qr_local_t *local = NULL; + size_pruned += curr->size; - local = stub->frame->local; - local->op_ret = op_ret; - local->op_errno = op_errno; - } + __qr_inode_prune (table, curr); - call_resume (stub); - } + if (table->cache_used < conf->cache_size) + return; } } -out: - if (is_open) { - QR_STACK_UNWIND (open, frame, op_ret, op_errno, fd); - } else { - STACK_DESTROY (frame->root); - } - return 0; + return; } -int32_t -qr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, int32_t wbflags) +void +qr_cache_prune (xlator_t *this) { - qr_inode_t *qr_inode = NULL; - int32_t ret = -1; - uint64_t filep = 0; - char content_cached = 0; - qr_fd_ctx_t *qr_fd_ctx = NULL, *tmp_fd_ctx = NULL; - int32_t op_ret = -1, op_errno = EINVAL; - qr_local_t *local = NULL; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this->private, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + qr_inode_table_t *table = NULL; priv = this->private; table = &priv->table; + conf = &priv->conf; - tmp_fd_ctx = qr_fd_ctx = GF_CALLOC (1, sizeof (*qr_fd_ctx), - gf_qr_mt_qr_fd_ctx_t); - if (qr_fd_ctx == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - LOCK_INIT (&qr_fd_ctx->lock); - INIT_LIST_HEAD (&qr_fd_ctx->waiting_ops); - - qr_fd_ctx->path = gf_strdup (loc->path); - if (qr_fd_ctx->path == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - qr_fd_ctx->flags = flags; - qr_fd_ctx->wbflags = wbflags; - - ret = fd_ctx_set (fd, this, (uint64_t)(long)qr_fd_ctx); - if (ret == -1) { - op_ret = -1; - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, - "cannot set quick-read context in " - "fd (%p) opened on inode (ino:%"PRId64" gfid:%s)", fd, - fd->inode->ino, uuid_utoa (fd->inode->gfid)); - goto unwind; - } - - tmp_fd_ctx = NULL; - - local = GF_CALLOC (1, sizeof (*local), gf_qr_mt_qr_local_t); - if (local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - - local->is_open = 1; - local->open_flags = flags; - frame->local = local; - LOCK (&table->lock); - { - ret = inode_ctx_get (fd->inode, this, &filep); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long) filep; - if (qr_inode) { - if (qr_inode->xattr) { - content_cached = 1; - } - } - } - } - UNLOCK (&table->lock); - - if (content_cached && (flags & O_DIRECTORY)) { - op_ret = -1; - op_errno = ENOTDIR; - gf_log (this->name, GF_LOG_WARNING, - "open with O_DIRECTORY flag received on non-directory"); - goto unwind; - } - - if (!content_cached || ((flags & O_ACCMODE) == O_WRONLY) - || ((flags & O_TRUNC) == O_TRUNC) - || ((flags & O_DIRECT) == O_DIRECT) - || ((wbflags & GF_OPEN_NOWB) != 0)) { - LOCK (&qr_fd_ctx->lock); - { - /* - * we really need not set this flag, since open is - * not yet unwounded. - */ - - qr_fd_ctx->open_in_transit = 1; - if (((flags & O_DIRECT) == O_DIRECT) - || ((wbflags & GF_OPEN_NOWB)) != 0) { - qr_fd_ctx->disabled = 1; - } - } - UNLOCK (&qr_fd_ctx->lock); - goto wind; - } else { - op_ret = 0; - op_errno = 0; - goto unwind; - } - -unwind: - if (tmp_fd_ctx != NULL) { - qr_fd_ctx_free (tmp_fd_ctx); - } - - QR_STACK_UNWIND (open, frame, op_ret, op_errno, fd); - return 0; - -wind: - STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags); - return 0; -} - - -static inline time_t -qr_time_elapsed (struct timeval *now, struct timeval *then) -{ - time_t time_elapsed = 0; - - GF_VALIDATE_OR_GOTO ("quick-read", now, out); - GF_VALIDATE_OR_GOTO ("quick-read", then, out); - - time_elapsed = now->tv_sec - then->tv_sec; - -out: - return time_elapsed; + LOCK (&table->lock); + { + if (table->cache_used > conf->cache_size) + __qr_cache_prune (table, conf); + } + UNLOCK (&table->lock); } -static inline char -qr_need_validation (qr_conf_t *conf, qr_inode_t *qr_inode) +void * +qr_content_extract (dict_t *xdata) { - struct timeval now = {0, }; - char need_validation = 0; + data_t *data = NULL; + void *content = NULL; - GF_VALIDATE_OR_GOTO ("quick-read", conf, out); - GF_VALIDATE_OR_GOTO ("quick-read", qr_inode, out); + data = dict_get (xdata, GF_CONTENT_KEY); + if (!data) + return NULL; - gettimeofday (&now, NULL); + content = GF_CALLOC (1, data->len, gf_qr_mt_content_t); + if (!content) + return NULL; - if (qr_time_elapsed (&now, &qr_inode->tv) >= conf->cache_timeout) - need_validation = 1; + memcpy (content, data->data, data->len); -out: - return need_validation; + return content; } -static int32_t -qr_validate_cache_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) +void +qr_content_update (xlator_t *this, qr_inode_t *qr_inode, void *data, + struct iatt *buf) { - qr_inode_t *qr_inode = NULL; - qr_local_t *local = NULL; - uint64_t value = 0; - int32_t ret = 0; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - call_stub_t *stub = NULL; - - GF_ASSERT (frame); - if (this == NULL) { - op_ret = -1; - op_errno = EINVAL; - gf_log (frame->this->name, GF_LOG_WARNING, - "xlator object (this) is NULL"); - goto unwind; - } - - local = frame->local; - if ((local == NULL) || ((local->fd) == NULL)) { - op_ret = -1; - op_errno = EINVAL; - gf_log (frame->this->name, GF_LOG_WARNING, - (local == NULL) ? "local is NULL" - : "fd is not stored in local"); - goto unwind; - } - - local->just_validated = 1; - - if (op_ret == -1) { - goto unwind; - } + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; priv = this->private; table = &priv->table; - LOCK (&table->lock); - { - ret = inode_ctx_get (local->fd->inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long) value; - } - - if (qr_inode != NULL) { - gettimeofday (&qr_inode->tv, NULL); + LOCK (&table->lock); + { + __qr_inode_prune (table, qr_inode); - if ((qr_inode->stbuf.ia_mtime != buf->ia_mtime) - || (qr_inode->stbuf.ia_mtime_nsec - != buf->ia_mtime_nsec)) { - inode_ctx_del (local->fd->inode, this, NULL); - __qr_inode_free (qr_inode); - } - } - } - UNLOCK (&table->lock); + qr_inode->data = data; + qr_inode->size = buf->ia_size; - stub = local->stub; - local->stub = NULL; + qr_inode->ia_mtime = buf->ia_mtime; + qr_inode->ia_mtime_nsec = buf->ia_mtime_nsec; - call_resume (stub); + qr_inode->buf = *buf; - return 0; + gettimeofday (&qr_inode->last_refresh, NULL); -unwind: - /* this is actually unwind of readv */ - if ((local != NULL) && (local->stub != NULL)) { - call_stub_destroy (local->stub); - } + __qr_inode_register (table, qr_inode); + } + UNLOCK (&table->lock); - QR_STACK_UNWIND (readv, frame, op_ret, op_errno, NULL, -1, NULL, NULL); - return 0; + qr_cache_prune (this); } -int32_t -qr_validate_cache_helper (call_frame_t *frame, xlator_t *this, fd_t *fd) +gf_boolean_t +qr_size_fits (qr_conf_t *conf, struct iatt *buf) { - qr_local_t *local = NULL; - int32_t op_ret = -1, op_errno = -1; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, out); - - local = frame->local; - if (local == NULL) { - op_ret = -1; - op_errno = EINVAL; - } else { - op_ret = local->op_ret; - op_errno = local->op_errno; - } - -out: - if (op_ret == -1) { - qr_validate_cache_cbk (frame, NULL, this, op_ret, op_errno, - NULL); - } else { - STACK_WIND (frame, qr_validate_cache_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd); - } - - return 0; + return (buf->ia_size <= conf->max_file_size); } -int -qr_validate_cache (call_frame_t *frame, xlator_t *this, fd_t *fd, - call_stub_t *stub) +gf_boolean_t +qr_mtime_equal (qr_inode_t *qr_inode, struct iatt *buf) { - int ret = -1; - int flags = 0; - uint64_t value = 0; - loc_t loc = {0, }; - char *path = NULL; - qr_local_t *local = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - call_stub_t *validate_stub = NULL; - char need_open = 0, can_wind = 0, validate_cbk_called = 0; - call_frame_t *open_frame = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, out); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, out); - GF_VALIDATE_OR_GOTO (frame->this->name, stub, out); - - if (frame->local == NULL) { - local = GF_CALLOC (1, sizeof (*local), gf_qr_mt_qr_local_t); - if (local == NULL) { - goto out; - } - } else { - local = frame->local; - } - - local->fd = fd; - local->stub = stub; - frame->local = local; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - validate_stub = fop_fstat_stub (frame, - qr_validate_cache_helper, - fd); - if (validate_stub == NULL) { - ret = -1; - if (need_open) { - qr_fd_ctx->open_in_transit = 0; - } - goto unlock; - } - - list_add_tail (&validate_stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - - if (ret == -1) { - goto out; - } - } else { - can_wind = 1; - } - - if (need_open) { - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - validate_cbk_called = 1; - goto out; - } - - ret = qr_loc_fill (&loc, fd->inode, path); - if (ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - validate_cbk_called = 1; - STACK_DESTROY (open_frame->root); - goto out; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, - &loc, flags, fd, qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } else if (can_wind) { - STACK_WIND (frame, qr_validate_cache_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd); - } - - ret = 0; -out: - if ((ret < 0) && !validate_cbk_called) { - if (frame->local == NULL) { - call_stub_destroy (stub); - } - - qr_validate_cache_cbk (frame, NULL, this, -1, errno, NULL); - } - return ret; -} - - -int32_t -qr_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iovec *vector, int32_t count, - struct iatt *stbuf, struct iobref *iobref) -{ - GF_ASSERT (frame); - - QR_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, - stbuf, iobref); - return 0; -} - - -int32_t -qr_readv_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset) -{ - qr_local_t *local = NULL; - int32_t op_errno = EINVAL, ret = 0; - uint64_t value = 0; - qr_fd_ctx_t *fdctx = NULL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding read call", - fdctx ? fdctx->path : NULL, strerror (errno)); - goto unwind; - } - - STACK_WIND (frame, qr_readv_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->readv, fd, size, offset); - return 0; - -unwind: - QR_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL); - return 0; + return (qr_inode->ia_mtime == buf->ia_mtime && + qr_inode->ia_mtime_nsec == buf->ia_mtime_nsec); } -int32_t -qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset) +void +__qr_content_refresh (xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf) { - qr_inode_t *qr_inode = NULL; - int32_t ret = -1, op_ret = -1, op_errno = -1; - uint64_t value = 0; - int count = -1, flags = 0, i = 0; - char content_cached = 0, need_validation = 0; - char need_open = 0, can_wind = 0, need_unwind = 0; - struct iobuf *iobuf = NULL; - struct iobref *iobref = NULL; - struct iatt stbuf = {0, }; - data_t *content = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - call_stub_t *stub = NULL; - loc_t loc = {0, }; - qr_conf_t *conf = NULL; - struct iovec *vector = NULL; - char *path = NULL; - off_t start = 0, end = 0; - size_t len = 0; - struct iobuf_pool *iobuf_pool = NULL; - qr_local_t *local = NULL; - char just_validated = 0; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - call_frame_t *open_frame = NULL; - - op_ret = 0; + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + qr_conf_t *conf = NULL; priv = this->private; - conf = &priv->conf; table = &priv->table; + conf = &priv->conf; - local = frame->local; - - if (local != NULL) { - just_validated = local->just_validated; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - if (qr_fd_ctx != NULL) { - if (qr_fd_ctx->disabled) { - goto out; - } - } - } - - iobuf_pool = this->ctx->iobuf_pool; - - LOCK (&table->lock); - { - ret = inode_ctx_get (fd->inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long)value; - if (qr_inode) { - if (qr_inode->xattr){ - if (!just_validated - && qr_need_validation (conf, - qr_inode)) { - need_validation = 1; - goto unlock; - } - - content = dict_get (qr_inode->xattr, - GF_CONTENT_KEY); - - stbuf = qr_inode->stbuf; - content_cached = 1; - list_move_tail (&qr_inode->lru, - &table->lru[qr_inode->priority]); - - if (offset > content->len) { - op_ret = 0; - end = content->len; - } else { - if ((offset + size) - > content->len) { - op_ret = content->len - - offset; - end = content->len; - } else { - op_ret = size; - end = offset + size; - } - } - - count = (op_ret - / iobuf_pool->default_page_size); - if ((op_ret % iobuf_pool->default_page_size) - != 0) { - count++; - } - - if (count == 0) { - op_ret = 0; - goto unlock; - } - - vector = GF_CALLOC (count, - sizeof (*vector), - gf_qr_mt_iovec); - if (vector == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - goto unlock; - } - - iobref = iobref_new (); - if (iobref == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - goto unlock; - } - - for (i = 0; i < count; i++) { - iobuf = iobuf_get (iobuf_pool); - if (iobuf == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - goto unlock; - } - - start = offset - + - (iobuf_pool->default_page_size - * i); - - if (start > end) { - len = 0; - } else { - len = - (iobuf_pool->default_page_size - > (end - start)) - ? (end - start) - : - iobuf_pool->default_page_size; - - memcpy (iobuf->ptr, - content->data - + start, - len); - } - - iobref_add (iobref, iobuf); - iobuf_unref (iobuf); - - vector[i].iov_base = iobuf->ptr; - vector[i].iov_len = len; - } - } - } - } - } -unlock: - UNLOCK (&table->lock); - -out: - if (content_cached || need_unwind) { - QR_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, - count, &stbuf, iobref); - - } else if (need_validation) { - stub = fop_readv_stub (frame, qr_readv, fd, size, offset); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - qr_validate_cache (frame, this, fd, stub); - } else { - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - if (frame->local == NULL) { - frame->local = GF_CALLOC (1, - sizeof (qr_local_t), - gf_qr_mt_qr_local_t); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto fdctx_unlock; - } - } - - stub = fop_readv_stub (frame, - qr_readv_helper, - fd, size, - offset); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto fdctx_unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - fdctx_unlock: - UNLOCK (&qr_fd_ctx->lock); - - if (op_ret == -1) { - need_unwind = 1; - goto out; - } - } else { - can_wind = 1; - } - - if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } + if (qr_size_fits (conf, buf) && qr_mtime_equal (qr_inode, buf)) { + qr_inode->buf = *buf; - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } + gettimeofday (&qr_inode->last_refresh, NULL); - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, - &loc, flags, fd, qr_fd_ctx->wbflags); + __qr_inode_register (table, qr_inode); + } else { + __qr_inode_prune (table, qr_inode); + } - qr_loc_wipe (&loc); - } else if (can_wind) { - STACK_WIND (frame, qr_readv_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->readv, fd, size, - offset); - } - - } - -ret: - if (vector) { - GF_FREE (vector); - } - - if (iobref) { - iobref_unref (iobref); - } - - return 0; + return; } -int32_t -qr_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) -{ - GF_ASSERT (frame); - QR_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf); - return 0; -} - - -int32_t -qr_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t off, - struct iobref *iobref) -{ - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding write call", - fdctx ? fdctx->path : NULL, strerror (errno)); - goto unwind; - } - - STACK_WIND (frame, qr_writev_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->writev, fd, vector, count, off, - iobref); - return 0; - -unwind: - QR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL); - return 0; -} - - -int32_t -qr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, - int32_t count, off_t off, struct iobref *iobref) +void +qr_content_refresh (xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf) { - uint64_t value = 0; - int flags = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_inode_t *qr_inode = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t op_ret = -1, op_errno = -1, ret = -1; - char can_wind = 0, need_unwind = 0, need_open = 0; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - call_frame_t *open_frame = NULL; + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; priv = this->private; table = &priv->table; - ret = fd_ctx_get (fd, this, &value); - - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } - - LOCK (&table->lock); - { - ret = inode_ctx_get (fd->inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long)value; - if (qr_inode != NULL) { - inode_ctx_del (fd->inode, this, NULL); - __qr_inode_free (qr_inode); - } - } - } - UNLOCK (&table->lock); - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = GF_CALLOC (1, - sizeof (qr_local_t), - gf_qr_mt_qr_local_t); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_writev_stub (frame, qr_writev_helper, - fd, vector, count, off, - iobref); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - - if (need_unwind) { - QR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_writev_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->writev, fd, vector, count, - off, iobref); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } - -ret: - return 0; -} - - -int32_t -qr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *buf) -{ - QR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf); - return 0; -} - - -int32_t -qr_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd) -{ - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding fstat call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, qr_fstat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd); - return 0; - -unwind: - QR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL); - return 0; -} - - -int32_t -qr_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd) -{ - qr_fd_ctx_t *qr_fd_ctx = NULL; - char need_open = 0, can_wind = 0, need_unwind = 0; - uint64_t value = 0; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - call_stub_t *stub = NULL; - loc_t loc = {0, }; - char *path = NULL; - int flags = 0; - call_frame_t *open_frame = NULL; - - GF_ASSERT (frame); - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "fd is NULL"); - need_unwind = 1; - goto unwind; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = GF_CALLOC (1, - sizeof (qr_local_t), - gf_qr_mt_qr_local_t); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_fstat_stub (frame, qr_fstat_helper, - fd); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - -unwind: - if (need_unwind) { - QR_STACK_UNWIND (fstat, frame, op_ret, op_errno, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_fstat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } - -ret: - return 0; -} - - -int32_t -qr_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop) -{ - GF_ASSERT (frame); - QR_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, preop, postop); - return 0; -} - - -int32_t -qr_fsetattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid) -{ - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding fsetattr " - "call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, qr_fsetattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, - valid); - return 0; - -unwind: - QR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL); - return 0; + LOCK (&table->lock); + { + __qr_content_refresh (this, qr_inode, buf); + } + UNLOCK (&table->lock); } -int32_t -qr_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid) +gf_boolean_t +__qr_cache_is_fresh (xlator_t *this, qr_inode_t *qr_inode) { - uint64_t value = 0; - int flags = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char need_open = 0, can_wind = 0, need_unwind = 0; - call_frame_t *open_frame = NULL; - - GF_ASSERT (frame); - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" : - "fd is NULL"); - need_unwind = 1; - goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } + qr_conf_t *conf = NULL; + qr_private_t *priv = NULL; + struct timeval now; + struct timeval diff; - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } + priv = this->private; + conf = &priv->conf; - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = GF_CALLOC (1, - sizeof (qr_local_t), - gf_qr_mt_qr_local_t); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_fsetattr_stub (frame, - qr_fsetattr_helper, - fd, stbuf, valid); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - -out: - if (need_unwind) { - QR_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, NULL, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_fsetattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, - valid); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } - -ret: - return 0; -} - - -int32_t -qr_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - GF_ASSERT (frame); - QR_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno); - return 0; -} - - -int32_t -qr_fsetxattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - dict_t *dict, int32_t flags) -{ - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding fsetxattr " - "call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, qr_fsetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags); - return 0; - -unwind: - QR_STACK_UNWIND (fsetxattr, frame, -1, op_errno); - return 0; -} - - -int32_t -qr_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags) -{ - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - int open_flags = 0; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char need_open = 0, can_wind = 0, need_unwind = 0; - call_frame_t *open_frame = NULL; - - GF_ASSERT (frame); - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) " - "is NULL" : "fd is NULL"); - need_unwind = 1; - goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - open_flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = GF_CALLOC (1, - sizeof (qr_local_t), - gf_qr_mt_qr_local_t); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_fsetxattr_stub (frame, - qr_fsetxattr_helper, - fd, dict, flags); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - -out: - if (need_unwind) { - QR_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno); - } else if (can_wind) { - STACK_WIND (frame, qr_fsetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetxattr, fd, dict, - flags); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, open_flags, - fd, qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } - -ret: - return 0; -} - - -int32_t -qr_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) -{ - GF_ASSERT (frame); - QR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict); - return 0; -} - - -int32_t -qr_fgetxattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name) -{ - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding fgetxattr " - "call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, qr_fgetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fgetxattr, fd, name); - return 0; - -unwind: - QR_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL); - return 0; -} - - -int32_t -qr_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name) -{ - int flags = 0; - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char need_open = 0, can_wind = 0, need_unwind = 0; - call_frame_t *open_frame = NULL; - - /* - * FIXME: Can quick-read use the extended attributes stored in the - * cache? this needs to be discussed. - */ - - GF_ASSERT (frame); - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" : - "fd is NULL"); - need_unwind = 1; - goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = GF_CALLOC (1, - sizeof (qr_local_t), - gf_qr_mt_qr_local_t); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_fgetxattr_stub (frame, - qr_fgetxattr_helper, - fd, name); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - -out: - if (need_unwind) { - QR_STACK_UNWIND (open, frame, op_ret, op_errno, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_fgetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fgetxattr, fd, name); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } + gettimeofday (&now, NULL); -ret: - return 0; -} + timersub (&now, &qr_inode->last_refresh, &diff); + if (diff.tv_sec >= conf->cache_timeout) + return _gf_false; -int32_t -qr_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno) -{ - GF_ASSERT (frame); - QR_STACK_UNWIND (flush, frame, op_ret, op_errno); - return 0; + return _gf_true; } -int32_t -qr_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd) +int +qr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode_ret, + struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding flush call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, qr_flush_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->flush, fd); - return 0; - -unwind: - QR_STACK_UNWIND (flush, frame, -1, op_errno); - return 0; -} + void *content = NULL; + qr_inode_t *qr_inode = NULL; + inode_t *inode = NULL; + inode = frame->local; + frame->local = NULL; -int32_t -qr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) -{ - uint64_t value = 0; - call_stub_t *stub = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char can_wind = 0, need_unwind = 0; - - GF_ASSERT (frame); - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "fd is NULL"); - need_unwind = 1; + if (op_ret == -1) { + qr_inode_prune (this, inode); goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - if (qr_fd_ctx->opened) { - can_wind = 1; - } else if (qr_fd_ctx->open_in_transit) { - frame->local = GF_CALLOC (1, - sizeof (qr_local_t), - gf_qr_mt_qr_local_t); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_flush_stub (frame, qr_flush_helper, - fd); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } else { - op_ret = 0; - need_unwind = 1; - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - + } + + if (dict_get (xdata, "sh-failed")) { + qr_inode_prune (this, inode); + goto out; + } + + content = qr_content_extract (xdata); + + if (content) { + /* new content came along, always replace old content */ + qr_inode = qr_inode_ctx_get_or_new (this, inode); + if (!qr_inode) + /* no harm done */ + goto out; + + qr_content_update (this, qr_inode, content, buf); + } else { + /* purge old content if necessary */ + qr_inode = qr_inode_ctx_get (this, inode); + if (!qr_inode) + /* usual path for large files */ + goto out; + + qr_content_refresh (this, qr_inode, buf); + } out: - if (need_unwind) { - QR_STACK_UNWIND (flush, frame, op_ret, op_errno); - } else if (can_wind) { - STACK_WIND (frame, qr_flush_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->flush, fd); - } - - return 0; -} - - -int32_t -qr_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - GF_ASSERT (frame); - QR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno); - return 0; -} - - -int32_t -qr_fentrylk_helper (call_frame_t *frame, xlator_t *this, const char *volume, - fd_t *fd, const char *basename, entrylk_cmd cmd, - entrylk_type type) -{ - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; + if (inode) + inode_unref (inode); - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding fentrylk " - "call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } - - STACK_WIND(frame, qr_fentrylk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fentrylk, volume, fd, basename, - cmd, type); - return 0; - -unwind: - QR_STACK_UNWIND (fentrylk, frame, -1, op_errno); + STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode_ret, + buf, xdata, postparent); return 0; } -int32_t -qr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, - const char *basename, entrylk_cmd cmd, entrylk_type type) +int +qr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - int flags = 0; - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char need_open = 0, can_wind = 0, need_unwind = 0; - call_frame_t *open_frame = NULL; - - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "fd is NULL"); - need_unwind = 1; - goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = GF_CALLOC (1, - sizeof (qr_local_t), - gf_qr_mt_qr_local_t); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_fentrylk_stub (frame, - qr_fentrylk_helper, - volume, fd, basename, - cmd, type); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - -out: - if (need_unwind) { - QR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno); - } else if (can_wind) { - STACK_WIND (frame, qr_fentrylk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fentrylk, volume, fd, - basename, cmd, type); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - qr_fd_ctx->wbflags); + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + qr_inode_t *qr_inode = NULL; + int ret = -1; + dict_t *new_xdata = NULL; - qr_loc_wipe (&loc); - } + priv = this->private; + conf = &priv->conf; -ret: - return 0; -} + qr_inode = qr_inode_ctx_get (this, loc->inode); + if (qr_inode && qr_inode->data) + /* cached. only validate in qr_lookup_cbk */ + goto wind; + + if (!xdata) + xdata = new_xdata = dict_new (); + + if (!xdata) + goto wind; + + ret = 0; + if (conf->max_file_size) + ret = dict_set (xdata, GF_CONTENT_KEY, + data_from_uint64 (conf->max_file_size)); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "cannot set key in request dict (%s)", + loc->path); +wind: + frame->local = inode_ref (loc->inode); + STACK_WIND (frame, qr_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); -int32_t -qr_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + if (new_xdata) + dict_unref (new_xdata); -{ - GF_ASSERT (frame); - QR_STACK_UNWIND (finodelk, frame, op_ret, op_errno); return 0; } -int32_t -qr_finodelk_helper (call_frame_t *frame, xlator_t *this, const char *volume, - fd_t *fd, int32_t cmd, struct gf_flock *lock) +int +qr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata) { - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); + gf_dirent_t *entry = NULL; + qr_inode_t *qr_inode = NULL; - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); + if (op_ret <= 0) + goto unwind; - if (local->op_ret < 0) { - op_errno = local->op_errno; + list_for_each_entry (entry, &entries->list, list) { + if (!entry->inode) + continue; - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } + qr_inode = qr_inode_ctx_get (this, entry->inode); + if (!qr_inode) + /* no harm */ + continue; - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding finodelk " - "call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; + qr_content_refresh (this, qr_inode, &entry->d_stat); } - STACK_WIND (frame, qr_finodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, volume, fd, cmd, lock); - return 0; - unwind: - QR_STACK_UNWIND (finodelk, frame, -1, op_errno); - return 0; -} - - -int32_t -qr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, - int32_t cmd, struct gf_flock *lock) -{ - int flags = 0; - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char need_open = 0, can_wind = 0, need_unwind = 0; - call_frame_t *open_frame = NULL; - - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "fd is NULL"); - need_unwind = 1; - goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = GF_CALLOC (1, - sizeof (qr_local_t), - gf_qr_mt_qr_local_t); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_finodelk_stub (frame, - qr_finodelk_helper, - volume, fd, cmd, - lock); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - -out: - if (need_unwind) { - QR_STACK_UNWIND (finodelk, frame, op_ret, op_errno); - } else if (can_wind) { - STACK_WIND (frame, qr_finodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, volume, fd, - cmd, lock); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } - -ret: - return 0; -} - - -int32_t -qr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf) -{ - GF_ASSERT (frame); - QR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf); - return 0; + STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata); + return 0; } -int32_t -qr_fsync_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) +int +qr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, + size_t size, off_t offset, dict_t *xdata) { - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding fsync call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, qr_fsync_cbk, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->fsync, fd, flags); - return 0; - -unwind: - QR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL); - return 0; + STACK_WIND (frame, qr_readdirp_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->readdirp, + fd, size, offset, xdata); + return 0; } -int32_t -qr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) +int +qr_readv_cached (call_frame_t *frame, qr_inode_t *qr_inode, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - int open_flags = 0; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char need_open = 0, can_wind = 0, need_unwind = 0; - call_frame_t *open_frame = NULL; - - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "fd is NULL"); - need_unwind = 1; - goto out; - } + xlator_t *this = NULL; + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + int op_ret = -1; + struct iobuf *iobuf = NULL; + struct iobref *iobref = NULL; + struct iovec iov = {0, }; + struct iatt buf = {0, }; - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } + this = frame->this; + priv = this->private; + table = &priv->table; - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - open_flags = qr_fd_ctx->flags; + LOCK (&table->lock); + { + op_ret = -1; - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } + if (!qr_inode->data) + goto unlock; - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = GF_CALLOC (1, - sizeof (qr_local_t), - gf_qr_mt_qr_local_t); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_fsync_stub (frame, qr_fsync_helper, - fd, flags); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } + if (offset >= qr_inode->size) + goto unlock; -out: - if (need_unwind) { - QR_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_fsync_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsync, fd, flags); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } + if (!__qr_cache_is_fresh (this, qr_inode)) + goto unlock; - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } + op_ret = min (size, (qr_inode->size - offset)); - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, open_flags, - fd, qr_fd_ctx->wbflags); + iobuf = iobuf_get2 (this->ctx->iobuf_pool, op_ret); + if (!iobuf) { + op_ret = -1; + goto unlock; + } - qr_loc_wipe (&loc); - } + iobref = iobref_new (); + if (!iobref) { + op_ret = -1; + iobuf_unref (iobuf); + goto unlock; + } -ret: - return 0; -} + iobref_add (iobref, iobuf); + memcpy (iobuf->ptr, qr_inode->data + offset, op_ret); -int32_t -qr_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) -{ - int32_t ret = 0; - uint64_t value = 0; - qr_inode_t *qr_inode = NULL; - qr_local_t *local = NULL; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - - GF_ASSERT (frame); + buf = qr_inode->buf; - if (op_ret == -1) { - goto out; - } + /* bump LRU */ + __qr_inode_register (table, qr_inode); + } +unlock: + UNLOCK (&table->lock); - local = frame->local; - if ((local == NULL) || (local->fd == NULL) - || (local->fd->inode == NULL)) { - op_ret = -1; - op_errno = EINVAL; - gf_log (frame->this->name, GF_LOG_WARNING, "cannot get inode"); - goto out; - } + if (op_ret > 0) { + iov.iov_base = iobuf->ptr; + iov.iov_len = op_ret; - if ((this == NULL) || (this->private == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "cannot get quick read configuration from xlator " - "object"); - op_ret = -1; - op_errno = EINVAL; - goto out; - } + STACK_UNWIND_STRICT (readv, frame, op_ret, 0, &iov, 1, + &buf, iobref, xdata); + } - priv = this->private; - table = &priv->table; + if (iobuf) + iobuf_unref (iobuf); - LOCK (&table->lock); - { - ret = inode_ctx_get (local->fd->inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long) value; - - if (qr_inode) { - if (qr_inode->stbuf.ia_size != postbuf->ia_size) - { - inode_ctx_del (local->fd->inode, this, - NULL); - __qr_inode_free (qr_inode); - } - } - } - } - UNLOCK (&table->lock); + if (iobref) + iobref_unref (iobref); -out: - QR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, - postbuf); - return 0; + return op_ret; } -int32_t -qr_ftruncate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset) +int +qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; + qr_inode_t *qr_inode = NULL; - GF_ASSERT (frame); + qr_inode = qr_inode_ctx_get (this, fd->inode); + if (!qr_inode) + goto wind; - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding ftruncate " - "call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, qr_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset); - return 0; + if (qr_readv_cached (frame, qr_inode, size, offset, flags, xdata) <= 0) + goto wind; -unwind: - QR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL); - return 0; + return 0; +wind: + STACK_WIND (frame, default_readv_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv, + fd, size, offset, flags, xdata); + return 0; } -int32_t -qr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) +int +qr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov, + int count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) { - int flags = 0; - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_local_t *local = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char need_open = 0, can_wind = 0, need_unwind = 0; - call_frame_t *open_frame = NULL; - - GF_ASSERT (frame); - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "fd is NULL"); - need_unwind = 1; - goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } - - local = GF_CALLOC (1, sizeof (*local), gf_qr_mt_qr_local_t); - if (local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - goto out; - } - - local->fd = fd; - frame->local = local; - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - stub = fop_ftruncate_stub (frame, - qr_ftruncate_helper, - fd, offset); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - -out: - if (need_unwind) { - QR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, - NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } - -ret: - return 0; -} + qr_inode_prune (this, fd->inode); - -int32_t -qr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct gf_flock *lock) -{ - GF_ASSERT (frame); - QR_STACK_UNWIND (lk, frame, op_ret, op_errno, lock); - return 0; + STACK_WIND (frame, default_writev_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev, + fd, iov, count, offset, flags, iobref, xdata); + return 0; } -int32_t -qr_lk_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, - struct gf_flock *lock) +int +qr_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - qr_local_t *local = NULL; - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - - local = frame->local; - GF_VALIDATE_OR_GOTO (frame->this->name, local, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if (local->op_ret < 0) { - op_errno = local->op_errno; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - fdctx = (qr_fd_ctx_t *)(long) value; - } - - gf_log (this->name, GF_LOG_WARNING, - "open failed on path (%s) (%s), unwinding lk call", - fdctx ? fdctx->path : NULL, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, qr_lk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lk, fd, cmd, lock); - - return 0; + qr_inode_prune (this, loc->inode); -unwind: - QR_STACK_UNWIND (lk, frame, -1, op_errno, NULL); - return 0; + STACK_WIND (frame, default_truncate_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate, + loc, offset, xdata); + return 0; } -int32_t -qr_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, - struct gf_flock *lock) +int +qr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - int flags = 0; - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - char need_open = 0, can_wind = 0, need_unwind = 0; - call_frame_t *open_frame = NULL; - - GF_ASSERT (frame); - if ((this == NULL) || (fd == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "fd is NULL"); - need_unwind = 1; - goto out; - } - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - frame->local = GF_CALLOC (1, - sizeof (qr_local_t), - gf_qr_mt_qr_local_t); - if (frame->local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - stub = fop_lk_stub (frame, qr_lk_helper, fd, - cmd, lock); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } - -out: - if (need_unwind) { - QR_STACK_UNWIND (lk, frame, op_ret, op_errno, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_lk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lk, fd, cmd, lock); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx, -1, errno); - goto ret; - } - - open_frame = create_frame (this, this->ctx->pool); - if (open_frame == NULL) { - qr_resume_pending_ops (qr_fd_ctx, -1, ENOMEM); - qr_loc_wipe (&loc); - goto ret; - } - - STACK_WIND (open_frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } + qr_inode_prune (this, fd->inode); -ret: - return 0; + STACK_WIND (frame, default_ftruncate_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->ftruncate, + fd, offset, xdata); + return 0; } -int32_t -qr_release (xlator_t *this, fd_t *fd) +int +qr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + fd_t *fd, dict_t *xdata) { - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = 0; - uint64_t value = 0; - - GF_VALIDATE_OR_GOTO ("quick-read", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - ret = fd_ctx_del (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - if (qr_fd_ctx) { - qr_fd_ctx_free (qr_fd_ctx); - } - } + qr_inode_set_priority (this, fd->inode, loc->path); -out: - return 0; + STACK_WIND (frame, default_open_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->open, + loc, flags, fd, xdata); + return 0; } - -int32_t +int qr_forget (xlator_t *this, inode_t *inode) { qr_inode_t *qr_inode = NULL; - uint64_t value = 0; - int32_t ret = -1; - qr_private_t *priv = NULL; - GF_VALIDATE_OR_GOTO ("quick-read", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); + qr_inode = qr_inode_ctx_get (this, inode); - priv = this->private; + if (!qr_inode) + return 0; - LOCK (&priv->table.lock); - { - ret = inode_ctx_del (inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long) value; - __qr_inode_free (qr_inode); - } - } - UNLOCK (&priv->table.lock); + qr_inode_prune (this, inode); -out: - return 0; + GF_FREE (qr_inode); + + return 0; } @@ -3206,126 +695,35 @@ int32_t qr_inodectx_dump (xlator_t *this, inode_t *inode) { qr_inode_t *qr_inode = NULL; - uint64_t value = 0; int32_t ret = -1; - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; char buf[256] = {0, }; - struct tm *tm = NULL; - ret = inode_ctx_get (inode, this, &value); - if (ret != 0) { - goto out; - } - qr_inode = (qr_inode_t *)(long)value; - if (qr_inode == NULL) { + qr_inode = qr_inode_ctx_get (this, inode); + if (!qr_inode) goto out; - } gf_proc_dump_build_key (key_prefix, "xlator.performance.quick-read", "inodectx"); gf_proc_dump_add_section (key_prefix); - gf_proc_dump_build_key (key, key_prefix, "inode.gfid"); - uuid_unparse (inode->gfid, buf); - gf_proc_dump_write (key, "%s", buf); - - gf_proc_dump_build_key (key, key_prefix, "inode.ino"); - gf_proc_dump_write (key, "%ld", inode->ino); - - gf_proc_dump_build_key (key, key_prefix, "entire-file-cached"); - gf_proc_dump_write (key, "%s", qr_inode->xattr ? "yes" : "no"); - - tm = localtime (&qr_inode->tv.tv_sec); - strftime (buf, 256, "%Y-%m-%d %H:%M:%S", tm); - snprintf (buf + strlen (buf), 256 - strlen (buf), - ".%"GF_PRI_SUSECONDS, qr_inode->tv.tv_usec); - - gf_proc_dump_build_key (key, key_prefix, "last-cache-validation-time"); - gf_proc_dump_write (key, "%s", buf); - - ret = 0; -out: - return ret; -} + gf_proc_dump_write ("entire-file-cached", "%s", qr_inode->data ? "yes" : "no"); -int32_t -qr_fdctx_dump (xlator_t *this, fd_t *fd) -{ - qr_fd_ctx_t *fdctx = NULL; - uint64_t value = 0; - int32_t ret = 0, i = 0; - char uuidbuf[256] = {0, }; - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; - char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; - call_stub_t *stub = NULL; - - ret = fd_ctx_get (fd, this, &value); - if (ret != 0) { - goto out; - } + if (qr_inode->last_refresh.tv_sec) { + gf_time_fmt (buf, sizeof buf, qr_inode->last_refresh.tv_sec, + gf_timefmt_FT); + snprintf (buf + strlen (buf), sizeof buf - strlen (buf), + ".%"GF_PRI_SUSECONDS, qr_inode->last_refresh.tv_usec); - fdctx = (qr_fd_ctx_t *)(long)value; - if (fdctx == NULL) { - goto out; + gf_proc_dump_write ("last-cache-validation-time", "%s", buf); } - gf_proc_dump_build_key (key_prefix, "xlator.performance.quick-read", - "fdctx"); - gf_proc_dump_add_section (key_prefix); - - gf_proc_dump_build_key (key, key_prefix, "fd"); - gf_proc_dump_write (key, "%p", fd); - - gf_proc_dump_build_key (key, key_prefix, "path"); - gf_proc_dump_write (key, "%s", fdctx->path); - - gf_proc_dump_build_key (key, key_prefix, "fd.inode.gfid"); - uuid_unparse (fd->inode->gfid, uuidbuf); - gf_proc_dump_write (key, "%s", uuidbuf); - - gf_proc_dump_build_key (key, key_prefix, "fd.inode.ino"); - gf_proc_dump_write (key, "%ld", fd->inode->ino); - - LOCK (&fdctx->lock); - { - gf_proc_dump_build_key (key, key_prefix, "opened"); - gf_proc_dump_write (key, "%s", fdctx->opened ? "yes" : "no"); - - gf_proc_dump_build_key (key, key_prefix, "open-in-progress"); - gf_proc_dump_write (key, "%s", fdctx->open_in_transit ? - "yes" : "no"); - - gf_proc_dump_build_key (key, key_prefix, - "caching-disabled (for this fd)"); - gf_proc_dump_write (key, "%s", fdctx->disabled ? "yes" : "no"); - - gf_proc_dump_build_key (key, key_prefix, "flags"); - gf_proc_dump_write (key, "%d", fdctx->flags); - - gf_proc_dump_build_key (key, key_prefix, "wbflags"); - gf_proc_dump_write (key, "%d", fdctx->wbflags); - - list_for_each_entry (stub, &fdctx->waiting_ops, list) { - gf_proc_dump_build_key (key, key_prefix, - "waiting-ops[%d].frame", i); - gf_proc_dump_write (key, "%"PRId64, - stub->frame->root->unique); - - gf_proc_dump_build_key (key, key_prefix, - "waiting-ops[%d].fop", i); - gf_proc_dump_write (key, "%s", gf_fop_list[stub->fop]); - - i++; - } - } - UNLOCK (&fdctx->lock); - ret = 0; out: return ret; } + int qr_priv_dump (xlator_t *this) { @@ -3336,7 +734,6 @@ qr_priv_dump (xlator_t *this) uint32_t i = 0; qr_inode_t *curr = NULL; uint64_t total_size = 0; - char key[GF_DUMP_MAX_BUF_LEN]; char key_prefix[GF_DUMP_MAX_BUF_LEN]; if (!this) { @@ -3346,40 +743,32 @@ qr_priv_dump (xlator_t *this) priv = this->private; conf = &priv->conf; - if (!conf) { - gf_log (this->name, GF_LOG_WARNING, "conf null in xlator"); + if (!conf) return -1; - } table = &priv->table; - gf_proc_dump_build_key (key_prefix, "xlator.performance.quick-read", "priv"); gf_proc_dump_add_section (key_prefix); - gf_proc_dump_build_key (key, key_prefix, "max_file_size"); - gf_proc_dump_write (key, "%d", conf->max_file_size); - gf_proc_dump_build_key (key, key_prefix, "cache_timeout"); - gf_proc_dump_write (key, "%d", conf->cache_timeout); + gf_proc_dump_write ("max_file_size", "%d", conf->max_file_size); + gf_proc_dump_write ("cache_timeout", "%d", conf->cache_timeout); if (!table) { - gf_log (this->name, GF_LOG_WARNING, "table is NULL"); goto out; } else { for (i = 0; i < conf->max_pri; i++) { list_for_each_entry (curr, &table->lru[i], lru) { file_count++; - total_size += curr->stbuf.ia_size; + total_size += curr->size; } } } - gf_proc_dump_build_key (key, key_prefix, "total_files_cached"); - gf_proc_dump_write (key, "%d", file_count); - gf_proc_dump_build_key (key, key_prefix, "total_cache_used"); - gf_proc_dump_write (key, "%d", total_size); + gf_proc_dump_write ("total_files_cached", "%d", file_count); + gf_proc_dump_write ("total_cache_used", "%d", total_size); out: return 0; @@ -3406,12 +795,49 @@ mem_acct_init (xlator_t *this) } +static gf_boolean_t +check_cache_size_ok (xlator_t *this, int64_t cache_size) +{ + int ret = _gf_true; + uint64_t total_mem = 0; + uint64_t max_cache_size = 0; + volume_option_t *opt = NULL; + + GF_ASSERT (this); + opt = xlator_volume_option_get (this, "cache-size"); + if (!opt) { + ret = _gf_false; + gf_log (this->name, GF_LOG_ERROR, + "could not get cache-size option"); + goto out; + } + + total_mem = get_mem_size (); + if (-1 == total_mem) + max_cache_size = opt->max; + else + max_cache_size = total_mem; + + gf_log (this->name, GF_LOG_DEBUG, "Max cache size is %"PRIu64, + max_cache_size); + if (cache_size > max_cache_size) { + ret = _gf_false; + gf_log (this->name, GF_LOG_ERROR, "Cache size %"PRIu64 + " is greater than the max size of %"PRIu64, + cache_size, max_cache_size); + goto out; + } +out: + return ret; +} + int reconfigure (xlator_t *this, dict_t *options) { - int32_t ret = -1; - qr_private_t *priv = NULL; - qr_conf_t *conf = NULL; + int32_t ret = -1; + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + uint64_t cache_size_new = 0; GF_VALIDATE_OR_GOTO ("quick-read", this, out); GF_VALIDATE_OR_GOTO (this->name, this->private, out); @@ -3427,7 +853,14 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("cache-timeout", conf->cache_timeout, options, int32, out); - GF_OPTION_RECONF ("cache-size", conf->cache_size, options, size, out); + GF_OPTION_RECONF ("cache-size", cache_size_new, options, size, out); + if (!check_cache_size_ok (this, cache_size_new)) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Not reconfiguring cache-size"); + goto out; + } + conf->cache_size = cache_size_new; ret = 0; out: @@ -3517,13 +950,9 @@ qr_get_priority_list (const char *opt_str, struct list_head *first) priority_str = strtok_r (NULL, ",", &tmp_str); } out: - if (string != NULL) { - GF_FREE (string); - } + GF_FREE (string); - if (dup_str != NULL) { - GF_FREE (dup_str); - } + GF_FREE (dup_str); if (max_pri == -1) { list_for_each_entry_safe (curr, tmp, first, list) { @@ -3570,6 +999,10 @@ init (xlator_t *this) GF_OPTION_INIT ("cache-timeout", conf->cache_timeout, int32, out); GF_OPTION_INIT ("cache-size", conf->cache_size, size, out); + if (!check_cache_size_ok (this, conf->cache_size)) { + ret = -1; + goto out; + } INIT_LIST_HEAD (&conf->priority_list); conf->max_pri = 1; @@ -3612,37 +1045,79 @@ out: void +qr_inode_table_destroy (qr_private_t *priv) +{ + int i = 0; + qr_conf_t *conf = NULL; + + conf = &priv->conf; + + for (i = 0; i < conf->max_pri; i++) { + GF_ASSERT (list_empty (&priv->table.lru[i])); + } + + LOCK_DESTROY (&priv->table.lock); + + return; +} + + +void +qr_conf_destroy (qr_conf_t *conf) +{ + struct qr_priority *curr = NULL, *tmp = NULL; + + list_for_each_entry_safe (curr, tmp, &conf->priority_list, list) { + list_del (&curr->list); + GF_FREE (curr->pattern); + GF_FREE (curr); + } + + return; +} + + +void fini (xlator_t *this) { + qr_private_t *priv = NULL; + + if (this == NULL) { + goto out; + } + + priv = this->private; + if (priv == NULL) { + goto out; + } + + qr_inode_table_destroy (priv); + qr_conf_destroy (&priv->conf); + + this->private = NULL; + + GF_FREE (priv); +out: return; } struct xlator_fops fops = { .lookup = qr_lookup, + .readdirp = qr_readdirp, .open = qr_open, .readv = qr_readv, - .writev = qr_writev, - .fstat = qr_fstat, - .fsetxattr = qr_fsetxattr, - .fgetxattr = qr_fgetxattr, - .flush = qr_flush, - .fentrylk = qr_fentrylk, - .finodelk = qr_finodelk, - .fsync = qr_fsync, - .ftruncate = qr_ftruncate, - .lk = qr_lk, - .fsetattr = qr_fsetattr, + .writev = qr_writev, + .truncate = qr_truncate, + .ftruncate = qr_ftruncate }; struct xlator_cbks cbks = { .forget = qr_forget, - .release = qr_release, }; struct xlator_dumpops dumpops = { .priv = qr_priv_dump, .inodectx = qr_inodectx_dump, - .fdctx = qr_fdctx_dump }; struct volume_options options[] = { @@ -3652,7 +1127,7 @@ struct volume_options options[] = { { .key = {"cache-size"}, .type = GF_OPTION_TYPE_SIZET, .min = 0, - .max = 6 * GF_UNIT_GB, + .max = 32 * GF_UNIT_GB, .default_value = "128MB", .description = "Size of the read cache." }, @@ -3668,4 +1143,5 @@ struct volume_options options[] = { .max = 1 * GF_UNIT_KB * 1000, .default_value = "64KB", }, + { .key = {NULL} } }; diff --git a/xlators/performance/quick-read/src/quick-read.h b/xlators/performance/quick-read/src/quick-read.h index 064151b63..6f0a05417 100644 --- a/xlators/performance/quick-read/src/quick-read.h +++ b/xlators/performance/quick-read/src/quick-read.h @@ -1,20 +1,11 @@ /* - Copyright (c) 2009-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef __QUICK_READ_H @@ -43,40 +34,20 @@ #include <fnmatch.h> #include "quick-read-mem-types.h" -struct qr_fd_ctx { - char opened; - char disabled; - char open_in_transit; - char *path; - int flags; - int wbflags; - struct list_head waiting_ops; - gf_lock_t lock; -}; -typedef struct qr_fd_ctx qr_fd_ctx_t; - -struct qr_local { - char is_open; - char *path; - char just_validated; - fd_t *fd; - int open_flags; - int32_t op_ret; - int32_t op_errno; - call_stub_t *stub; -}; -typedef struct qr_local qr_local_t; struct qr_inode { - dict_t *xattr; - inode_t *inode; + void *data; + size_t size; int priority; - struct iatt stbuf; - struct timeval tv; + uint32_t ia_mtime; + uint32_t ia_mtime_nsec; + struct iatt buf; + struct timeval last_refresh; struct list_head lru; }; typedef struct qr_inode qr_inode_t; + struct qr_priority { char *pattern; int32_t priority; @@ -106,13 +77,5 @@ struct qr_private { }; typedef struct qr_private qr_private_t; -void qr_local_free (qr_local_t *local); - -#define QR_STACK_UNWIND(op, frame, params ...) do { \ - qr_local_t *__local = frame->local; \ - frame->local = NULL; \ - STACK_UNWIND_STRICT (op, frame, params); \ - qr_local_free (__local); \ - } while (0) #endif /* #ifndef __QUICK_READ_H */ diff --git a/xlators/performance/read-ahead/src/Makefile.am b/xlators/performance/read-ahead/src/Makefile.am index b46020aac..be80ae7ac 100644 --- a/xlators/performance/read-ahead/src/Makefile.am +++ b/xlators/performance/read-ahead/src/Makefile.am @@ -1,14 +1,15 @@ xlator_LTLIBRARIES = read-ahead.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance -read_ahead_la_LDFLAGS = -module -avoidversion +read_ahead_la_LDFLAGS = -module -avoid-version read_ahead_la_SOURCES = read-ahead.c page.c read_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la noinst_HEADERS = read-ahead.h read-ahead-mem-types.h -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ - -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) CLEANFILES = diff --git a/xlators/performance/read-ahead/src/page.c b/xlators/performance/read-ahead/src/page.c index 9778ef542..e79e7ae78 100644 --- a/xlators/performance/read-ahead/src/page.c +++ b/xlators/performance/read-ahead/src/page.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef _CONFIG_H @@ -142,7 +133,8 @@ ra_waitq_return (ra_waitq_t *waitq) int ra_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct iatt *stbuf, struct iobref *iobref) + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) { ra_local_t *local = NULL; off_t pending_offset = 0; @@ -175,14 +167,8 @@ ra_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret >= 0) file->stbuf = *stbuf; - if (op_ret < 0) { - page = ra_page_get (file, pending_offset); - if (page) - waitq = ra_page_error (page, op_ret, op_errno); - goto unlock; - } - page = ra_page_get (file, pending_offset); + if (!page) { gf_log (this->name, GF_LOG_TRACE, "wasted copy: %"PRId64"[+%"PRId64"] file=%p", @@ -190,6 +176,29 @@ ra_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto unlock; } + /* + * "Dirty" means that the request was a pure read-ahead; it's + * set for requests we issue ourselves, and cleared when user + * requests are issued or put on the waitq. "Poisoned" means + * that we got a write while a read was still in flight, and we + * couldn't stop it so we marked it instead. If it's both + * dirty and poisoned by the time we get here, we cancel its + * effect so that a subsequent user read doesn't get data that + * we know is stale (because we made it stale ourselves). We + * can't use ESTALE because that has special significance. + * ECANCELED has no such special meaning, and is close to what + * we're trying to indicate. + */ + if (page->dirty && page->poisoned) { + op_ret = -1; + op_errno = ECANCELED; + } + + if (op_ret < 0) { + waitq = ra_page_error (page, op_ret, op_errno); + goto unlock; + } + if (page->vector) { iobref_unref (page->iobref); GF_FREE (page->vector); @@ -216,7 +225,7 @@ unlock: fd_unref (local->fd); - GF_FREE (frame->local); + mem_put (frame->local); frame->local = NULL; out: @@ -244,7 +253,7 @@ ra_page_fault (ra_file_t *file, call_frame_t *frame, off_t offset) goto err; } - fault_local = GF_CALLOC (1, sizeof (ra_local_t), gf_ra_mt_ra_local_t); + fault_local = mem_get0 (THIS->local_pool); if (fault_local == NULL) { STACK_DESTROY (fault_frame->root); op_ret = -1; @@ -261,7 +270,7 @@ ra_page_fault (ra_file_t *file, call_frame_t *frame, off_t offset) STACK_WIND (fault_frame, ra_fault_cbk, FIRST_CHILD (fault_frame->this), FIRST_CHILD (fault_frame->this)->fops->readv, - file->fd, file->page_size, offset); + file->fd, file->page_size, offset, 0, NULL); return; @@ -430,11 +439,11 @@ ra_frame_unwind (call_frame_t *frame) file = (ra_file_t *)(long)tmp_file; STACK_UNWIND_STRICT (readv, frame, local->op_ret, local->op_errno, - vector, count, &file->stbuf, iobref); + vector, count, &file->stbuf, iobref, NULL); iobref_unref (iobref); pthread_mutex_destroy (&local->local_lock); - GF_FREE (local); + mem_put (local); GF_FREE (vector); out: @@ -491,6 +500,9 @@ ra_page_wakeup (ra_page_t *page) ra_frame_fill (page, frame); } + if (page->stale) { + ra_page_purge (page); + } out: return waitq; } diff --git a/xlators/performance/read-ahead/src/read-ahead-mem-types.h b/xlators/performance/read-ahead/src/read-ahead-mem-types.h index 7ca093696..219e29289 100644 --- a/xlators/performance/read-ahead/src/read-ahead-mem-types.h +++ b/xlators/performance/read-ahead/src/read-ahead-mem-types.h @@ -1,20 +1,11 @@ /* - Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ @@ -25,7 +16,6 @@ enum gf_ra_mem_types_ { gf_ra_mt_ra_file_t = gf_common_mt_end + 1, - gf_ra_mt_ra_local_t, gf_ra_mt_ra_conf_t, gf_ra_mt_ra_page_t, gf_ra_mt_ra_waitq_t, diff --git a/xlators/performance/read-ahead/src/read-ahead.c b/xlators/performance/read-ahead/src/read-ahead.c index 6e4e7c30b..069ab1f1a 100644 --- a/xlators/performance/read-ahead/src/read-ahead.c +++ b/xlators/performance/read-ahead/src/read-ahead.c @@ -1,27 +1,18 @@ /* - Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ /* TODO: - handle O_DIRECT - maintain offset, flush on lseek - - ensure efficient memory managment in case of random seek + - ensure efficient memory management in case of random seek */ #ifndef _CONFIG_H @@ -44,12 +35,11 @@ read_ahead (call_frame_t *frame, ra_file_t *file); int ra_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd) + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { ra_conf_t *conf = NULL; ra_file_t *file = NULL; int ret = 0; - long wbflags = 0; GF_ASSERT (frame); GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); @@ -60,8 +50,6 @@ ra_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto unwind; } - wbflags = (long)frame->local; - file = GF_CALLOC (1, sizeof (*file), gf_ra_mt_ra_file_t); if (!file) { op_ret = -1; @@ -74,10 +62,6 @@ ra_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if ((fd->flags & O_DIRECT) || ((fd->flags & O_ACCMODE) == O_WRONLY)) file->disabled = 1; - if (wbflags & GF_OPEN_NOWB) { - file->disabled = 1; - } - file->offset = (unsigned long long) 0; file->conf = conf; file->pages.next = &file->pages; @@ -116,7 +100,7 @@ ra_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, unwind: frame->local = NULL; - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd); + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); return 0; } @@ -126,7 +110,7 @@ int ra_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { ra_conf_t *conf = NULL; ra_file_t *file = NULL; @@ -187,7 +171,7 @@ ra_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, unwind: STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, - preparent, postparent); + preparent, postparent, xdata); return 0; } @@ -195,17 +179,15 @@ unwind: int ra_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, int32_t wbflags) + fd_t *fd, dict_t *xdata) { GF_ASSERT (frame); GF_ASSERT (this); - frame->local = (void *)(long)wbflags; - STACK_WIND (frame, ra_open_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->open, - loc, flags, fd, wbflags); + loc, flags, fd, xdata); return 0; } @@ -213,7 +195,7 @@ ra_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, int ra_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, fd_t *fd, dict_t *params) + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { GF_ASSERT (frame); GF_ASSERT (this); @@ -221,7 +203,7 @@ ra_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, STACK_WIND (frame, ra_create_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, - loc, flags, mode, fd, params); + loc, flags, mode, umask, fd, xdata); return 0; } @@ -231,7 +213,8 @@ ra_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, */ static void -flush_region (call_frame_t *frame, ra_file_t *file, off_t offset, off_t size) +flush_region (call_frame_t *frame, ra_file_t *file, off_t offset, off_t size, + int for_write) { ra_page_t *trav = NULL; ra_page_t *next = NULL; @@ -243,8 +226,17 @@ flush_region (call_frame_t *frame, ra_file_t *file, off_t offset, off_t size) && trav->offset < (offset + size)) { next = trav->next; - if (trav->offset >= offset && !trav->waitq) { - ra_page_purge (trav); + if (trav->offset >= offset) { + if (!trav->waitq) { + ra_page_purge (trav); + } + else { + trav->stale = 1; + + if (for_write) { + trav->poisoned = 1; + } + } } trav = next; } @@ -352,7 +344,8 @@ out: int ra_need_atime_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct iatt *stbuf, struct iobref *iobref) + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) { GF_ASSERT (frame); STACK_DESTROY (frame->root); @@ -392,15 +385,15 @@ dispatch_requests (call_frame_t *frame, ra_file_t *file) trav = ra_page_get (file, trav_offset); if (!trav) { trav = ra_page_create (file, trav_offset); + if (!trav) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto unlock; + } fault = 1; need_atime_update = 0; } - - if (!trav) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unlock; - } + trav->dirty = 0; if (trav->ready) { gf_log (frame->this->name, GF_LOG_TRACE, @@ -443,7 +436,7 @@ dispatch_requests (call_frame_t *frame, ra_file_t *file) STACK_WIND (ra_frame, ra_need_atime_cbk, FIRST_CHILD (frame->this), FIRST_CHILD (frame->this)->fops->readv, - file->fd, 1, 1); + file->fd, 1, 1, 0, NULL); } out: @@ -454,12 +447,13 @@ out: int ra_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct iatt *stbuf, struct iobref *iobref) + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) { GF_ASSERT (frame); STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, - stbuf, iobref); + stbuf, iobref, xdata); return 0; } @@ -467,7 +461,7 @@ ra_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int ra_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset) + off_t offset, uint32_t flags, dict_t *xdata) { ra_file_t *file = NULL; ra_local_t *local = NULL; @@ -489,12 +483,8 @@ ra_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, fd_ctx_get (fd, this, &tmp_file); file = (ra_file_t *)(long)tmp_file; - if (file == NULL) { - op_errno = EBADF; - gf_log (this->name, GF_LOG_WARNING, - "readv received on fd (%p) with no" - " file set in its context", fd); - goto unwind; + if (!file || file->disabled) { + goto disabled; } if (file->offset != offset) { @@ -508,7 +498,7 @@ ra_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, "expected offset (%"PRId64") when page_count=%d", offset, file->page_count); - if (file->expected < (conf->page_size * conf->page_count)) { + if (file->expected < (file->page_size * conf->page_count)) { file->expected += size; file->page_count = min ((file->expected / file->page_size), @@ -517,18 +507,10 @@ ra_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, } if (!expected_offset) { - flush_region (frame, file, 0, file->pages.prev->offset + 1); + flush_region (frame, file, 0, file->pages.prev->offset + 1, 0); } - if (file->disabled) { - STACK_WIND (frame, ra_readv_disabled_cbk, - FIRST_CHILD (frame->this), - FIRST_CHILD (frame->this)->fops->readv, - file->fd, size, offset); - return 0; - } - - local = (void *) GF_CALLOC (1, sizeof (*local), gf_ra_mt_ra_local_t); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto unwind; @@ -548,7 +530,7 @@ ra_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, dispatch_requests (frame, file); - flush_region (frame, file, 0, floor (offset, file->page_size)); + flush_region (frame, file, 0, floor (offset, file->page_size), 0); read_ahead (frame, file); @@ -559,18 +541,26 @@ ra_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, return 0; unwind: - STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL); + STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, + NULL); + + return 0; +disabled: + STACK_WIND (frame, ra_readv_disabled_cbk, + FIRST_CHILD (frame->this), + FIRST_CHILD (frame->this)->fops->readv, + fd, size, offset, flags, xdata); return 0; } int ra_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno) + int32_t op_errno, dict_t *xdata) { GF_ASSERT (frame); - STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno); + STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata); return 0; } @@ -578,16 +568,18 @@ ra_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int ra_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf) + int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { GF_ASSERT (frame); - STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf); + STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, + xdata); return 0; } int -ra_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) +ra_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { ra_file_t *file = NULL; uint64_t tmp_file = 0; @@ -600,28 +592,23 @@ ra_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) fd_ctx_get (fd, this, &tmp_file); file = (ra_file_t *)(long)tmp_file; - if (file == NULL) { - op_errno = EBADF; - gf_log (this->name, GF_LOG_WARNING, - "flush received on fd (%p) with no" - " file set in its context", fd); - goto unwind; + if (file) { + flush_region (frame, file, 0, file->pages.prev->offset+1, 0); } - flush_region (frame, file, 0, file->pages.prev->offset+1); - STACK_WIND (frame, ra_flush_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->flush, fd); + FIRST_CHILD (this)->fops->flush, fd, xdata); return 0; unwind: - STACK_UNWIND_STRICT (flush, frame, -1, op_errno); + STACK_UNWIND_STRICT (flush, frame, -1, op_errno, NULL); return 0; } int -ra_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync) +ra_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) { ra_file_t *file = NULL; uint64_t tmp_file = 0; @@ -634,22 +621,16 @@ ra_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync) fd_ctx_get (fd, this, &tmp_file); file = (ra_file_t *)(long)tmp_file; - if (file == NULL) { - op_errno = EBADF; - gf_log (this->name, GF_LOG_WARNING, - "fsync received on fd (%p) with no" - " file set in its context", fd); - goto unwind; + if (file) { + flush_region (frame, file, 0, file->pages.prev->offset+1, 0); } - flush_region (frame, file, 0, file->pages.prev->offset+1); - STACK_WIND (frame, ra_fsync_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsync, fd, datasync); + FIRST_CHILD (this)->fops->fsync, fd, datasync, xdata); return 0; unwind: - STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL); + STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, NULL); return 0; } @@ -657,39 +638,29 @@ unwind: int ra_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { - fd_t *fd = NULL; ra_file_t *file = NULL; - uint64_t tmp_file = 0; GF_ASSERT (frame); - fd = frame->local; + file = frame->local; - fd_ctx_get (fd, this, &tmp_file); - file = (ra_file_t *)(long)tmp_file; - - if (file == NULL) { - gf_log (this->name, GF_LOG_WARNING, - "no read-ahead context set in fd (%p)", fd); - op_errno = EBADF; - op_ret = -1; - goto out; + if (file) { + flush_region (frame, file, 0, file->pages.prev->offset+1, 1); } - flush_region (frame, file, 0, file->pages.prev->offset+1); - -out: frame->local = NULL; - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf); + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); return 0; } int ra_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, - int32_t count, off_t offset, struct iobref *iobref) + int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) { ra_file_t *file = NULL; uint64_t tmp_file = 0; @@ -701,29 +672,22 @@ ra_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, fd_ctx_get (fd, this, &tmp_file); file = (ra_file_t *)(long)tmp_file; - if (file == NULL) { - op_errno = EBADF; - gf_log (this->name, GF_LOG_WARNING, "writev received on fd with" - "no file set in its context"); - goto unwind; + if (file) { + flush_region (frame, file, 0, file->pages.prev->offset+1, 1); + frame->local = file; + /* reset the read-ahead counters too */ + file->expected = file->page_count = 0; } - flush_region (frame, file, 0, file->pages.prev->offset+1); - - /* reset the read-ahead counters too */ - file->expected = file->page_count = 0; - - frame->local = fd; - STACK_WIND (frame, ra_writev_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, - fd, vector, count, offset, iobref); + fd, vector, count, offset, flags, iobref, xdata); return 0; unwind: - STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL); + STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, NULL); return 0; } @@ -731,29 +695,30 @@ unwind: int ra_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { GF_ASSERT (frame); STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, - postbuf); + postbuf, xdata); return 0; } int ra_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) + int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) { GF_ASSERT (frame); - STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf); + STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata); return 0; } int -ra_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) +ra_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { ra_file_t *file = NULL; fd_t *iter_fd = NULL; @@ -775,8 +740,16 @@ ra_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) if (!file) continue; + /* + * Truncation invalidates reads just like writing does. + * TBD: this seems to flush more than it should. The + * only time we should flush at all is when we're + * shortening (not lengthening) the file, and then only + * from new EOF to old EOF. The same problem exists in + * ra_ftruncate. + */ flush_region (frame, file, 0, - file->pages.prev->offset + 1); + file->pages.prev->offset + 1, 1); } } UNLOCK (&inode->lock); @@ -784,17 +757,17 @@ ra_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) STACK_WIND (frame, ra_truncate_cbk, FIRST_CHILD (this), FIRST_CHILD (this)->fops->truncate, - loc, offset); + loc, offset, xdata); return 0; unwind: - STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL); + STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL, NULL); return 0; } void -ra_page_dump (struct ra_page *page, char *key_prefix) +ra_page_dump (struct ra_page *page) { int i = 0; call_frame_t *frame = NULL; @@ -805,22 +778,19 @@ ra_page_dump (struct ra_page *page, char *key_prefix) goto out; } - gf_proc_dump_build_key (key, key_prefix, "offset"); - gf_proc_dump_write (key, "%"PRId64, page->offset); + gf_proc_dump_write ("offset", "%"PRId64, page->offset); - gf_proc_dump_build_key (key, key_prefix, "size"); - gf_proc_dump_write (key, "%"PRId64, page->size); + gf_proc_dump_write ("size", "%"PRId64, page->size); - gf_proc_dump_build_key (key, key_prefix, "dirty"); - gf_proc_dump_write (key, "%s", page->dirty ? "yes" : "no"); + gf_proc_dump_write ("dirty", "%s", page->dirty ? "yes" : "no"); - gf_proc_dump_build_key (key, key_prefix, "ready"); - gf_proc_dump_write (key, "%s", page->ready ? "yes" : "no"); + gf_proc_dump_write ("poisoned", "%s", page->poisoned ? "yes" : "no"); + + gf_proc_dump_write ("ready", "%s", page->ready ? "yes" : "no"); for (trav = page->waitq; trav; trav = trav->next) { frame = trav->data; - gf_proc_dump_build_key (key, key_prefix, "waiting-frame[%d]", - i++); + sprintf (key, "waiting-frame[%d]", i++); gf_proc_dump_write (key, "%"PRId64, frame->root->unique); } @@ -855,36 +825,31 @@ ra_fdctx_dump (xlator_t *this, fd_t *fd) ret = __inode_path (fd->inode, NULL, &path); if (path != NULL) { - gf_proc_dump_build_key (key, key_prefix, "path"); - gf_proc_dump_write (key, "%s", path); + gf_proc_dump_write ("path", "%s", path); GF_FREE (path); } - gf_proc_dump_build_key (key, key_prefix, "fd"); - gf_proc_dump_write (key, "%p", fd); + gf_proc_dump_write ("fd", "%p", fd); - gf_proc_dump_build_key (key, key_prefix, "disabled"); - gf_proc_dump_write (key, "%s", file->disabled ? "yes" : "no"); + gf_proc_dump_write ("disabled", "%s", file->disabled ? "yes" : "no"); if (file->disabled) { ret = 0; goto out; } - gf_proc_dump_build_key (key, key_prefix, "page-size"); - gf_proc_dump_write (key, "%"PRId64, file->page_size); + gf_proc_dump_write ("page-size", "%"PRId64, file->page_size); - gf_proc_dump_build_key (key, key_prefix, "page-count"); - gf_proc_dump_write (key, "%u", file->page_count); + gf_proc_dump_write ("page-count", "%u", file->page_count); - gf_proc_dump_build_key (key, key_prefix, - "next-expected-offset-for-sequential-reads"); - gf_proc_dump_write (key, "%"PRId64, file->offset); + gf_proc_dump_write ("next-expected-offset-for-sequential-reads", + "%"PRId64, file->offset); for (page = file->pages.next; page != &file->pages; page = page->next) { - gf_proc_dump_build_key (key, key_prefix, "page[%d]", i++); - ra_page_dump (page, key_prefix); + sprintf (key, "page[%d]", i); + gf_proc_dump_write (key, "%p", page[i++]); + ra_page_dump (page); } ret = 0; @@ -893,7 +858,7 @@ out: } int -ra_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd) +ra_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { ra_file_t *file = NULL; fd_t *iter_fd = NULL; @@ -916,23 +881,24 @@ ra_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd) if (!file) continue; flush_region (frame, file, 0, - file->pages.prev->offset + 1); + file->pages.prev->offset + 1, 0); } } UNLOCK (&inode->lock); STACK_WIND (frame, ra_attr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd); + FIRST_CHILD (this)->fops->fstat, fd, xdata); return 0; unwind: - STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL); + STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL, NULL); return 0; } int -ra_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) +ra_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { ra_file_t *file = NULL; fd_t *iter_fd = NULL; @@ -953,29 +919,137 @@ ra_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) file = (ra_file_t *)(long)tmp_file; if (!file) continue; + /* + * Truncation invalidates reads just like writing does. + * TBD: this seems to flush more than it should. The + * only time we should flush at all is when we're + * shortening (not lengthening) the file, and then only + * from new EOF to old EOF. The same problem exists in + * ra_truncate. + */ flush_region (frame, file, 0, - file->pages.prev->offset + 1); + file->pages.prev->offset + 1, 1); } } UNLOCK (&inode->lock); STACK_WIND (frame, ra_truncate_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->ftruncate, fd, offset); + FIRST_CHILD (this)->fops->ftruncate, fd, offset, xdata); return 0; unwind: - STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL); + STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL, NULL); return 0; } +int +ra_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + GF_ASSERT (frame); + + STACK_UNWIND_STRICT (discard, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + return 0; +} + +static int +ra_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + ra_file_t *file = NULL; + fd_t *iter_fd = NULL; + inode_t *inode = NULL; + uint64_t tmp_file = 0; + int32_t op_errno = EINVAL; + + GF_ASSERT (frame); + GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); + GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); + + inode = fd->inode; + + LOCK (&inode->lock); + { + list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { + fd_ctx_get (iter_fd, this, &tmp_file); + file = (ra_file_t *)(long)tmp_file; + if (!file) + continue; + + flush_region(frame, file, offset, len, 1); + } + } + UNLOCK (&inode->lock); + + STACK_WIND (frame, ra_discard_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->discard, fd, offset, len, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT (discard, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + +int +ra_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + GF_ASSERT (frame); + + STACK_UNWIND_STRICT (zerofill, frame, op_ret, op_errno, prebuf, + postbuf, xdata); + return 0; +} + +static int +ra_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + ra_file_t *file = NULL; + fd_t *iter_fd = NULL; + inode_t *inode = NULL; + uint64_t tmp_file = 0; + int32_t op_errno = EINVAL; + + GF_ASSERT (frame); + GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); + GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); + + inode = fd->inode; + + LOCK (&inode->lock); + { + list_for_each_entry (iter_fd, &inode->fd_list, inode_list) { + fd_ctx_get (iter_fd, this, &tmp_file); + file = (ra_file_t *)(long)tmp_file; + if (!file) + continue; + + flush_region(frame, file, offset, len, 1); + } + } + UNLOCK (&inode->lock); + + STACK_WIND (frame, ra_zerofill_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->zerofill, fd, + offset, len, xdata); + return 0; + +unwind: + STACK_UNWIND_STRICT (zerofill, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} int ra_priv_dump (xlator_t *this) { ra_conf_t *conf = NULL; int ret = -1; - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; + gf_boolean_t add_section = _gf_false; if (!this) { goto out; @@ -987,29 +1061,32 @@ ra_priv_dump (xlator_t *this) goto out; } - ret = pthread_mutex_trylock (&conf->conf_lock); - if (ret) { - gf_log (this->name, GF_LOG_WARNING, "Unable to lock client %s " - "(%s)", this->name, strerror (ret)); - ret = -1; - goto out; - } - gf_proc_dump_build_key (key_prefix, "xlator.performance.read-ahead", "priv"); gf_proc_dump_add_section (key_prefix); - gf_proc_dump_build_key (key, key_prefix, "page_size"); - gf_proc_dump_write (key, "%d", conf->page_size); - gf_proc_dump_build_key (key, key_prefix, "page_count"); - gf_proc_dump_write (key, "%d", conf->page_count); - gf_proc_dump_build_key (key, key_prefix, "force_atime_update"); - gf_proc_dump_write (key, "%d", conf->force_atime_update); + add_section = _gf_true; + ret = pthread_mutex_trylock (&conf->conf_lock); + if (ret) + goto out; + { + gf_proc_dump_write ("page_size", "%d", conf->page_size); + gf_proc_dump_write ("page_count", "%d", conf->page_count); + gf_proc_dump_write ("force_atime_update", "%d", + conf->force_atime_update); + } pthread_mutex_unlock (&conf->conf_lock); ret = 0; out: + if (ret && conf) { + if (add_section == _gf_false) + gf_proc_dump_add_section (key_prefix); + + gf_proc_dump_write ("Unable to dump priv", + "(Lock acquisition failed) %s", this->name); + } return ret; } @@ -1035,16 +1112,33 @@ out: } int +reconfigure (xlator_t *this, dict_t *options) +{ + ra_conf_t *conf = NULL; + int ret = -1; + + GF_VALIDATE_OR_GOTO ("read-ahead", this, out); + GF_VALIDATE_OR_GOTO ("read-ahead", this->private, out); + + conf = this->private; + + GF_OPTION_RECONF ("page-count", conf->page_count, options, uint32, out); + + GF_OPTION_RECONF ("page-size", conf->page_size, options, size, out); + + ret = 0; + out: + return ret; +} + +int init (xlator_t *this) { ra_conf_t *conf = NULL; - dict_t *options = NULL; - char *page_count_string = NULL; int32_t ret = -1; GF_VALIDATE_OR_GOTO ("read-ahead", this, out); - options = this->options; if (!this->children || this->children->next) { gf_log (this->name, GF_LOG_ERROR, "FATAL: read-ahead not configured with exactly one" @@ -1063,60 +1157,32 @@ init (xlator_t *this) } conf->page_size = this->ctx->page_size; - conf->page_count = 4; - - if (dict_get (options, "page-count")) { - page_count_string = data_to_str (dict_get (options, - "page-count")); - } - - if (page_count_string) { - if (gf_string2uint_base10 (page_count_string, &conf->page_count) - != 0) { - gf_log ("read-ahead", GF_LOG_ERROR, - "invalid number format \"%s\" of \"option " - "page-count\"", - page_count_string); - goto out; - } - gf_log (this->name, GF_LOG_WARNING, - "Using conf->page_count = %u", conf->page_count); - } + GF_OPTION_INIT ("page-size", conf->page_size, size, out); - if (dict_get (options, "force-atime-update")) { - char *force_atime_update_str = NULL; + GF_OPTION_INIT ("page-count", conf->page_count, uint32, out); - force_atime_update_str - = data_to_str (dict_get (options, - "force-atime-update")); - - if (gf_string2boolean (force_atime_update_str, - &conf->force_atime_update) == -1) { - gf_log (this->name, GF_LOG_ERROR, - "'force-atime-update' takes only boolean " - "options"); - goto out; - } - - if (conf->force_atime_update) { - gf_log (this->name, GF_LOG_WARNING, "Forcing atime " - "updates on cache hit"); - } - } + GF_OPTION_INIT ("force-atime-update", conf->force_atime_update, bool, out); conf->files.next = &conf->files; conf->files.prev = &conf->files; pthread_mutex_init (&conf->conf_lock, NULL); + + this->local_pool = mem_pool_new (ra_local_t, 64); + if (!this->local_pool) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + goto out; + } + this->private = conf; ret = 0; out: if (ret == -1) { - if (conf != NULL) { - GF_FREE (conf); - } + GF_FREE (conf); } return ret; @@ -1135,11 +1201,14 @@ fini (xlator_t *this) goto out; } + this->private = NULL; + + GF_ASSERT ((conf->files.next == &conf->files) + && (conf->files.prev == &conf->files)); + pthread_mutex_destroy (&conf->conf_lock); GF_FREE (conf); - this->private = NULL; - out: return; } @@ -1154,6 +1223,8 @@ struct xlator_fops fops = { .truncate = ra_truncate, .ftruncate = ra_ftruncate, .fstat = ra_fstat, + .discard = ra_discard, + .zerofill = ra_zerofill, }; struct xlator_cbks cbks = { @@ -1167,12 +1238,22 @@ struct xlator_dumpops dumpops = { struct volume_options options[] = { { .key = {"force-atime-update"}, - .type = GF_OPTION_TYPE_BOOL + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false" }, { .key = {"page-count"}, .type = GF_OPTION_TYPE_INT, .min = 1, - .max = 16 + .max = 16, + .default_value = "4", + .description = "Number of pages that will be pre-fetched" }, + { .key = {"page-size"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 4096, + .max = 1048576 * 64, + .default_value = "131072", + .description = "Page size with which read-ahead performs server I/O" + }, { .key = {NULL} }, }; diff --git a/xlators/performance/read-ahead/src/read-ahead.h b/xlators/performance/read-ahead/src/read-ahead.h index d0bbcde81..d1d768c34 100644 --- a/xlators/performance/read-ahead/src/read-ahead.h +++ b/xlators/performance/read-ahead/src/read-ahead.h @@ -1,20 +1,11 @@ /* - Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef __READ_AHEAD_H @@ -76,7 +67,8 @@ struct ra_page { struct ra_page *next; struct ra_page *prev; struct ra_file *file; - char dirty; + char dirty; /* Internal request, not from user. */ + char poisoned; /* Pending read invalidated by write. */ char ready; struct iovec *vector; int32_t count; @@ -84,6 +76,7 @@ struct ra_page { size_t size; struct ra_waitq *waitq; struct iobref *iobref; + char stale; }; diff --git a/xlators/performance/readdir-ahead/Makefile.am b/xlators/performance/readdir-ahead/Makefile.am new file mode 100644 index 000000000..a985f42a8 --- /dev/null +++ b/xlators/performance/readdir-ahead/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/performance/readdir-ahead/src/Makefile.am b/xlators/performance/readdir-ahead/src/Makefile.am new file mode 100644 index 000000000..cdabd1428 --- /dev/null +++ b/xlators/performance/readdir-ahead/src/Makefile.am @@ -0,0 +1,15 @@ +xlator_LTLIBRARIES = readdir-ahead.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance + +readdir_ahead_la_LDFLAGS = -module -avoidversion + +readdir_ahead_la_SOURCES = readdir-ahead.c +readdir_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = readdir-ahead.h readdir-ahead-mem-types.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h new file mode 100644 index 000000000..39e2c5369 --- /dev/null +++ b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h @@ -0,0 +1,24 @@ +/* + Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef __RDA_MEM_TYPES_H__ +#define __RDA_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_rda_mem_types_ { + gf_rda_mt_rda_local = gf_common_mt_end + 1, + gf_rda_mt_rda_fd_ctx, + gf_rda_mt_rda_priv, + gf_rda_mt_end +}; + +#endif diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c new file mode 100644 index 000000000..53e6756f0 --- /dev/null +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c @@ -0,0 +1,560 @@ +/* + Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +/* + * performance/readdir-ahead preloads a local buffer with directory entries + * on opendir. The optimization involves using maximum sized gluster rpc + * requests (128k) to minimize overhead of smaller client requests. + * + * For example, fuse currently supports a maximum readdir buffer of 4k + * (regardless of the filesystem client's buffer size). readdir-ahead should + * effectively convert these smaller requests into fewer, larger sized requests + * for simple, sequential workloads (i.e., ls). + * + * The translator is currently designed to handle the simple, sequential case + * only. If a non-sequential directory read occurs, readdir-ahead disables + * preloads on the directory. + */ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "xlator.h" +#include "call-stub.h" +#include "readdir-ahead.h" +#include "readdir-ahead-mem-types.h" +#include "defaults.h" + +static int rda_fill_fd(call_frame_t *, xlator_t *, fd_t *); + +/* + * Get (or create) the fd context for storing prepopulated directory + * entries. + */ +static struct +rda_fd_ctx *get_rda_fd_ctx(fd_t *fd, xlator_t *this) +{ + uint64_t val; + struct rda_fd_ctx *ctx; + + LOCK(&fd->lock); + + if (__fd_ctx_get(fd, this, &val) < 0) { + ctx = GF_CALLOC(1, sizeof(struct rda_fd_ctx), + gf_rda_mt_rda_fd_ctx); + if (!ctx) + goto out; + + LOCK_INIT(&ctx->lock); + INIT_LIST_HEAD(&ctx->entries.list); + ctx->state = RDA_FD_NEW; + /* ctx offset values initialized to 0 */ + + if (__fd_ctx_set(fd, this, (uint64_t) ctx) < 0) { + GF_FREE(ctx); + ctx = NULL; + goto out; + } + } else { + ctx = (struct rda_fd_ctx *) val; + } +out: + UNLOCK(&fd->lock); + return ctx; +} + +/* + * Reset the tracking state of the context. + */ +static void +rda_reset_ctx(struct rda_fd_ctx *ctx) +{ + ctx->state = RDA_FD_NEW; + ctx->cur_offset = 0; + ctx->cur_size = 0; + ctx->next_offset = 0; + gf_dirent_free(&ctx->entries); +} + +/* + * Check whether we can handle a request. Offset verification is done by the + * caller, so we only check whether the preload buffer has completion status + * (including an error) or has some data to return. + */ +static gf_boolean_t +rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size) +{ + if ((ctx->state & RDA_FD_EOD) || + (ctx->state & RDA_FD_ERROR) || + (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0))) + return _gf_true; + + return _gf_false; +} + +/* + * Serve a request from the fd dentry list based on the size of the request + * buffer. ctx must be locked. + */ +static int32_t +__rda_serve_readdirp(xlator_t *this, gf_dirent_t *entries, size_t request_size, + struct rda_fd_ctx *ctx) +{ + gf_dirent_t *dirent, *tmp; + size_t dirent_size, size = 0; + int32_t count = 0; + struct rda_priv *priv = this->private; + + list_for_each_entry_safe(dirent, tmp, &ctx->entries.list, list) { + dirent_size = gf_dirent_size(dirent->d_name); + if (size + dirent_size > request_size) + break; + + size += dirent_size; + list_del_init(&dirent->list); + ctx->cur_size -= dirent_size; + + list_add_tail(&dirent->list, &entries->list); + ctx->cur_offset = dirent->d_off; + count++; + } + + if (ctx->cur_size <= priv->rda_low_wmark) + ctx->state |= RDA_FD_PLUGGED; + + return count; +} + +static int32_t +rda_readdirp_stub(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) +{ + gf_dirent_t entries; + int32_t ret; + struct rda_fd_ctx *ctx; + int op_errno = 0; + + ctx = get_rda_fd_ctx(fd, this); + INIT_LIST_HEAD(&entries.list); + ret = __rda_serve_readdirp(this, &entries, size, ctx); + + if (!ret && (ctx->state & RDA_FD_ERROR)) { + ret = -1; + op_errno = ctx->op_errno; + ctx->state &= ~RDA_FD_ERROR; + + /* + * the preload has stopped running in the event of an error, so + * pass all future requests along + */ + ctx->state |= RDA_FD_BYPASS; + } + + STACK_UNWIND_STRICT(readdirp, frame, ret, op_errno, &entries, xdata); + gf_dirent_free(&entries); + + return 0; +} + +static int32_t +rda_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + struct rda_fd_ctx *ctx; + call_stub_t *stub; + int fill = 0; + + ctx = get_rda_fd_ctx(fd, this); + if (!ctx) + goto err; + + if (ctx->state & RDA_FD_BYPASS) + goto bypass; + + LOCK(&ctx->lock); + + /* recheck now that we have the lock */ + if (ctx->state & RDA_FD_BYPASS) { + UNLOCK(&ctx->lock); + goto bypass; + } + + /* + * If a new read comes in at offset 0 and the buffer has been + * completed, reset the context and kickstart the filler again. + */ + if (!off && (ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)) { + rda_reset_ctx(ctx); + fill = 1; + } + + /* + * If a readdir occurs at an unexpected offset or we already have a + * request pending, admit defeat and just get out of the way. + */ + if (off != ctx->cur_offset || ctx->stub) { + ctx->state |= RDA_FD_BYPASS; + UNLOCK(&ctx->lock); + goto bypass; + } + + stub = fop_readdirp_stub(frame, rda_readdirp_stub, fd, size, off, xdata); + if (!stub) { + UNLOCK(&ctx->lock); + goto err; + } + + /* + * If we haven't bypassed the preload, this means we can either serve + * the request out of the preload or the request that enables us to do + * so is in flight... + */ + if (rda_can_serve_readdirp(ctx, size)) + call_resume(stub); + else + ctx->stub = stub; + + UNLOCK(&ctx->lock); + + if (fill) + rda_fill_fd(frame, this, fd); + + return 0; + +bypass: + STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata); + return 0; + +err: + STACK_UNWIND_STRICT(readdirp, frame, -1, ENOMEM, NULL, NULL); + return 0; +} + +static int32_t +rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + gf_dirent_t *dirent, *tmp; + struct rda_local *local = frame->local; + struct rda_fd_ctx *ctx = local->ctx; + struct rda_priv *priv = this->private; + int fill = 1; + + LOCK(&ctx->lock); + + /* Verify that the preload buffer is still pending on this data. */ + if (ctx->next_offset != local->offset) { + gf_log(this->name, GF_LOG_ERROR, + "Out of sequence directory preload."); + ctx->state |= (RDA_FD_BYPASS|RDA_FD_ERROR); + ctx->op_errno = EUCLEAN; + + goto out; + } + + if (entries) { + list_for_each_entry_safe(dirent, tmp, &entries->list, list) { + list_del_init(&dirent->list); + /* must preserve entry order */ + list_add_tail(&dirent->list, &ctx->entries.list); + + ctx->cur_size += gf_dirent_size(dirent->d_name); + ctx->next_offset = dirent->d_off; + } + } + + if (ctx->cur_size >= priv->rda_high_wmark) + ctx->state &= ~RDA_FD_PLUGGED; + + if (!op_ret) { + /* we've hit eod */ + ctx->state &= ~RDA_FD_RUNNING; + ctx->state |= RDA_FD_EOD; + } else if (op_ret == -1) { + /* kill the preload and pend the error */ + ctx->state &= ~RDA_FD_RUNNING; + ctx->state |= RDA_FD_ERROR; + ctx->op_errno = op_errno; + } + + /* + * NOTE: The strict bypass logic in readdirp() means a pending request + * is always based on ctx->cur_offset. + */ + if (ctx->stub && + rda_can_serve_readdirp(ctx, ctx->stub->args.size)) { + call_resume(ctx->stub); + ctx->stub = NULL; + } + +out: + /* + * If we have been marked for bypass and have no pending stub, clear the + * run state so we stop preloading the context with entries. + */ + if ((ctx->state & RDA_FD_BYPASS) && !ctx->stub) + ctx->state &= ~RDA_FD_RUNNING; + + if (!(ctx->state & RDA_FD_RUNNING)) { + fill = 0; + STACK_DESTROY(ctx->fill_frame->root); + ctx->fill_frame = NULL; + } + + UNLOCK(&ctx->lock); + + if (fill) + rda_fill_fd(frame, this, local->fd); + + return 0; +} + +/* + * Start prepopulating the fd context with directory entries. + */ +static int +rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd) +{ + call_frame_t *nframe = NULL; + struct rda_local *local = NULL; + struct rda_fd_ctx *ctx; + off_t offset; + struct rda_priv *priv = this->private; + + ctx = get_rda_fd_ctx(fd, this); + if (!ctx) + goto err; + + LOCK(&ctx->lock); + + if (ctx->state & RDA_FD_NEW) { + ctx->state &= ~RDA_FD_NEW; + ctx->state |= RDA_FD_RUNNING; + if (priv->rda_low_wmark) + ctx->state |= RDA_FD_PLUGGED; + } + + offset = ctx->next_offset; + + if (!ctx->fill_frame) { + nframe = copy_frame(frame); + if (!nframe) { + UNLOCK(&ctx->lock); + goto err; + } + + local = mem_get0(this->local_pool); + if (!local) { + UNLOCK(&ctx->lock); + goto err; + } + + local->ctx = ctx; + local->fd = fd; + nframe->local = local; + + ctx->fill_frame = nframe; + } else { + nframe = ctx->fill_frame; + local = nframe->local; + } + + local->offset = offset; + + UNLOCK(&ctx->lock); + + STACK_WIND(nframe, rda_fill_fd_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, priv->rda_req_size, + offset, NULL); + + return 0; + +err: + if (nframe) + FRAME_DESTROY(nframe); + + return -1; +} + +static int32_t +rda_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) +{ + if (!op_ret) + rda_fill_fd(frame, this, fd); + + STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata); + return 0; +} + +static int32_t +rda_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, + dict_t *xdata) +{ + STACK_WIND(frame, rda_opendir_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->opendir, loc, fd, xdata); + return 0; +} + +static int32_t +rda_releasedir(xlator_t *this, fd_t *fd) +{ + uint64_t val; + struct rda_fd_ctx *ctx; + + if (fd_ctx_del(fd, this, &val) < 0) + return -1; + + ctx = (struct rda_fd_ctx *) val; + if (!ctx) + return 0; + + rda_reset_ctx(ctx); + + if (ctx->fill_frame) + STACK_DESTROY(ctx->fill_frame->root); + + if (ctx->stub) + gf_log(this->name, GF_LOG_ERROR, + "released a directory with a pending stub"); + + GF_FREE(ctx); + return 0; +} + +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + if (!this) + goto out; + + ret = xlator_mem_acct_init(this, gf_rda_mt_end + 1); + + if (ret != 0) + gf_log(this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + +out: + return ret; +} + +int +reconfigure(xlator_t *this, dict_t *options) +{ + struct rda_priv *priv = this->private; + + GF_OPTION_RECONF("rda-request-size", priv->rda_req_size, options, + uint32, err); + GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size, + err); + GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options, size, + err); + + return 0; +err: + return -1; +} + +int +init(xlator_t *this) +{ + struct rda_priv *priv = NULL; + + GF_VALIDATE_OR_GOTO("readdir-ahead", this, err); + + if (!this->children || this->children->next) { + gf_log(this->name, GF_LOG_ERROR, + "FATAL: readdir-ahead not configured with exactly one" + " child"); + goto err; + } + + if (!this->parents) { + gf_log(this->name, GF_LOG_WARNING, + "dangling volume. check volfile "); + } + + priv = GF_CALLOC(1, sizeof(struct rda_priv), gf_rda_mt_rda_priv); + if (!priv) + goto err; + this->private = priv; + + this->local_pool = mem_pool_new(struct rda_local, 32); + if (!this->local_pool) + goto err; + + GF_OPTION_INIT("rda-request-size", priv->rda_req_size, uint32, err); + GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size, err); + GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size, err); + + return 0; + +err: + if (this->local_pool) + mem_pool_destroy(this->local_pool); + if (priv) + GF_FREE(priv); + + return -1; +} + + +void +fini(xlator_t *this) +{ + GF_VALIDATE_OR_GOTO ("readdir-ahead", this, out); + + GF_FREE(this->private); + +out: + return; +} + +struct xlator_fops fops = { + .opendir = rda_opendir, + .readdirp = rda_readdirp, +}; + +struct xlator_cbks cbks = { + .releasedir = rda_releasedir, +}; + +struct volume_options options[] = { + { .key = {"rda-request-size"}, + .type = GF_OPTION_TYPE_INT, + .min = 4096, + .max = 131072, + .default_value = "131072", + .description = "readdir-ahead request size", + }, + { .key = {"rda-low-wmark"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 0, + .max = 10 * GF_UNIT_MB, + .default_value = "4096", + .description = "the value under which we plug", + }, + { .key = {"rda-high-wmark"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 0, + .max = 100 * GF_UNIT_MB, + .default_value = "131072", + .description = "the value over which we unplug", + }, + { .key = {NULL} }, +}; + diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.h b/xlators/performance/readdir-ahead/src/readdir-ahead.h new file mode 100644 index 000000000..e48786dae --- /dev/null +++ b/xlators/performance/readdir-ahead/src/readdir-ahead.h @@ -0,0 +1,46 @@ +/* + Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __READDIR_AHEAD_H +#define __READDIR_AHEAD_H + +/* state flags */ +#define RDA_FD_NEW (1 << 0) +#define RDA_FD_RUNNING (1 << 1) +#define RDA_FD_EOD (1 << 2) +#define RDA_FD_ERROR (1 << 3) +#define RDA_FD_BYPASS (1 << 4) +#define RDA_FD_PLUGGED (1 << 5) + +struct rda_fd_ctx { + off_t cur_offset; /* current head of the ctx */ + size_t cur_size; /* current size of the preload */ + off_t next_offset; /* tail of the ctx */ + uint32_t state; + gf_lock_t lock; + gf_dirent_t entries; + call_frame_t *fill_frame; + call_stub_t *stub; + int op_errno; +}; + +struct rda_local { + struct rda_fd_ctx *ctx; + fd_t *fd; + off_t offset; +}; + +struct rda_priv { + uint32_t rda_req_size; + uint64_t rda_low_wmark; + uint64_t rda_high_wmark; +}; + +#endif /* __READDIR_AHEAD_H */ diff --git a/xlators/performance/stat-prefetch/src/Makefile.am b/xlators/performance/stat-prefetch/src/Makefile.am deleted file mode 100644 index cfb130714..000000000 --- a/xlators/performance/stat-prefetch/src/Makefile.am +++ /dev/null @@ -1,14 +0,0 @@ -xlator_LTLIBRARIES = stat-prefetch.la -xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance - -stat_prefetch_la_LDFLAGS = -module -avoidversion -stat_prefetch_la_SOURCES = stat-prefetch.c -noinst_HEADERS = stat-prefetch.h stat-prefetch-mem-types.h - -stat_prefetch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la - -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ - -I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/rbtree -shared -nostartfiles $(GF_CFLAGS) - -CLEANFILES = - diff --git a/xlators/performance/stat-prefetch/src/stat-prefetch-mem-types.h b/xlators/performance/stat-prefetch/src/stat-prefetch-mem-types.h deleted file mode 100644 index 156b3472b..000000000 --- a/xlators/performance/stat-prefetch/src/stat-prefetch-mem-types.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - - -#ifndef __SP_MEM_TYPES_H__ -#define __SP_MEM_TYPES_H__ - -#include "mem-types.h" - -enum gf_sp_mem_types_ { - gf_sp_mt_sp_cache_t = gf_common_mt_end + 1, - gf_sp_mt_sp_fd_ctx_t, - gf_sp_mt_stat, - gf_sp_mt_sp_local_t, - gf_sp_mt_sp_inode_ctx_t, - gf_sp_mt_sp_private_t, - gf_sp_mt_fd_wrapper_t, - gf_sp_mt_end -}; -#endif diff --git a/xlators/performance/stat-prefetch/src/stat-prefetch.c b/xlators/performance/stat-prefetch/src/stat-prefetch.c deleted file mode 100644 index f8b2bcd79..000000000 --- a/xlators/performance/stat-prefetch/src/stat-prefetch.c +++ /dev/null @@ -1,4315 +0,0 @@ -/* - Copyright (c) 2009-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - -#include "stat-prefetch.h" -#include "statedump.h" - -#define GF_SP_CACHE_BUCKETS 1 -#define GF_SP_CACHE_ENTRIES_EXPECTED (128 * 1024) //1048576 - -typedef enum { - SP_EXPECT, - SP_DONT_EXPECT, - SP_DONT_CARE -} sp_expect_t; - - -void -sp_inode_ctx_free (xlator_t *this, sp_inode_ctx_t *ctx) -{ - call_stub_t *stub = NULL, *tmp = NULL; - - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - GF_VALIDATE_OR_GOTO (this->name, ctx, out); - - LOCK (&ctx->lock); - { - if (!list_empty (&ctx->waiting_ops)) { - gf_log (this->name, GF_LOG_WARNING, "inode ctx is " - "being freed even when there are file " - "operations waiting for lookup-behind to " - "complete. The operations in the waiting list " - "are:"); - list_for_each_entry_safe (stub, tmp, &ctx->waiting_ops, - list) { - gf_log (this->name, GF_LOG_WARNING, - "OP (%s)", gf_fop_list[stub->fop]); - - list_del_init (&stub->list); - call_stub_destroy (stub); - } - } - } - UNLOCK (&ctx->lock); - - LOCK_DESTROY (&ctx->lock); - GF_FREE (ctx); - -out: - return; -} - - -sp_inode_ctx_t * -sp_inode_ctx_init () -{ - sp_inode_ctx_t *inode_ctx = NULL; - - inode_ctx = GF_CALLOC (1, sizeof (*inode_ctx), gf_sp_mt_sp_inode_ctx_t); - if (inode_ctx == NULL) { - goto out; - } - - LOCK_INIT (&inode_ctx->lock); - INIT_LIST_HEAD (&inode_ctx->waiting_ops); - -out: - return inode_ctx; -} - - -int -sp_update_inode_ctx (xlator_t *this, inode_t *inode, int32_t *op_ret, - int32_t *op_errno, char *lookup_in_progress, - char *looked_up, struct iatt *stbuf, - struct list_head *waiting_ops, int32_t *error) -{ - int32_t ret = -1; - sp_inode_ctx_t *inode_ctx = NULL; - uint64_t value = 0; - - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); - - ret = inode_ctx_get (inode, this, &value); - if (ret == 0) { - inode_ctx = (sp_inode_ctx_t *)(long)value; - } - - if (inode_ctx == NULL) { - ret = -1; - if (error != NULL) { - *error = EINVAL; - } - - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", inode->ino, - uuid_utoa (inode->gfid)); - goto out; - } - - LOCK (&inode_ctx->lock); - { - if (op_ret != NULL) { - inode_ctx->op_ret = *op_ret; - } - - if (op_errno != NULL) { - inode_ctx->op_errno = *op_errno; - } - - if (looked_up != NULL) { - inode_ctx->looked_up = *looked_up; - } - - if (lookup_in_progress != NULL) { - inode_ctx->lookup_in_progress = *lookup_in_progress; - } - - if ((op_ret != NULL ) && (*op_ret == 0) && (stbuf != NULL) - && IA_ISDIR (stbuf->ia_type)) { - memcpy (&inode_ctx->stbuf, stbuf, sizeof (*stbuf)); - } - - if (waiting_ops != NULL) { - list_splice_init (&inode_ctx->waiting_ops, waiting_ops); - } - } - UNLOCK (&inode_ctx->lock); - - ret = 0; - -out: - return ret; -} - - -sp_inode_ctx_t * -sp_check_and_create_inode_ctx (xlator_t *this, inode_t *inode, - sp_expect_t expect) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0; - - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); - - LOCK (&inode->lock); - { - ret = __inode_ctx_get (inode, this, &value); - if (ret == 0) { - inode_ctx = (sp_inode_ctx_t *)(long)value; - - if ((expect == SP_DONT_EXPECT) && (inode_ctx != NULL)) { - gf_log_callingfn (this->name, GF_LOG_WARNING, - "stat-prefetch context is " - "present in inode " - "(ino:%"PRId64" gfid:%s) " - "when it is supposed to be " - "not present", inode->ino, - uuid_utoa (inode->gfid)); - } - } else { - if (expect == SP_EXPECT) { - gf_log_callingfn (this->name, GF_LOG_WARNING, - "stat-prefetch context is " - "not present in inode " - "(ino:%"PRId64" gfid:%s)" - " when it is supposed to be " - "present", inode->ino, - uuid_utoa (inode->gfid)); - } - - inode_ctx = sp_inode_ctx_init (); - if (inode_ctx != NULL) { - ret = __inode_ctx_put (inode, this, - (long)inode_ctx); - if (ret == -1) { - sp_inode_ctx_free (this, inode_ctx); - inode_ctx = NULL; - } - } - } - } - UNLOCK (&inode->lock); - -out: - return inode_ctx; -} - - -sp_cache_t * -sp_cache_ref (sp_cache_t *cache) -{ - if (cache == NULL) { - goto out; - } - - LOCK (&cache->lock); - { - cache->ref++; - } - UNLOCK (&cache->lock); - -out: - return cache;; -} - - -void -sp_cache_unref (sp_cache_t *cache) -{ - int refcount = 0; - - if (cache == NULL) { - goto out; - } - - LOCK (&cache->lock); - { - refcount = --cache->ref; - } - UNLOCK (&cache->lock); - - if (refcount == 0) { - rbthash_table_destroy (cache->table); - GF_FREE (cache); - } - -out: - return; -} - - -int32_t -sp_process_inode_ctx (call_frame_t *frame, xlator_t *this, loc_t *loc, - call_stub_t *stub, char *need_unwind, char *need_lookup, - char *can_wind, int32_t *error) -{ - int32_t ret = -1, op_errno = EINVAL; - sp_local_t *local = NULL; - sp_inode_ctx_t *inode_ctx = NULL; - uint64_t value = 0; - - if (need_unwind != NULL) { - *need_unwind = 1; - } - - GF_VALIDATE_OR_GOTO ("stat-prefetch", frame, out); - GF_VALIDATE_OR_GOTO (frame->this->name, this, out); - GF_VALIDATE_OR_GOTO (frame->this->name, loc, out); - GF_VALIDATE_OR_GOTO (frame->this->name, loc->inode, out); - GF_VALIDATE_OR_GOTO (frame->this->name, need_unwind, out); - GF_VALIDATE_OR_GOTO (frame->this->name, need_lookup, out); - GF_VALIDATE_OR_GOTO (frame->this->name, can_wind, out); - - inode_ctx_get (loc->inode, this, &value); - - inode_ctx = (sp_inode_ctx_t *)(long) value; - if (inode_ctx == NULL) { - gf_log_callingfn (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - *can_wind = 1; - *need_unwind = 0; - op_errno = 0; - ret = 0; - goto out; - } - - LOCK (&inode_ctx->lock); - { - if (!(inode_ctx->looked_up || inode_ctx->lookup_in_progress)) { - if (frame->local == NULL) { - local = GF_CALLOC (1, sizeof (*local), - gf_sp_mt_sp_local_t); - if (local == NULL) { - op_errno = ENOMEM; - goto unlock; - } - - frame->local = local; - - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "loc_copy failed (%s)", - strerror (op_errno)); - goto unlock; - } - } - - *need_lookup = 1; - inode_ctx->lookup_in_progress = 1; - } - - if (inode_ctx->looked_up) { - *can_wind = 1; - } else { - list_add_tail (&stub->list, &inode_ctx->waiting_ops); - stub = NULL; - } - - *need_unwind = 0; - ret = 0; - } -unlock: - UNLOCK (&inode_ctx->lock); - -out: - if (stub != NULL) { - call_stub_destroy (stub); - } - - if (error != NULL) { - *error = op_errno; - } - - return ret; -} - - -inline uint32_t -sp_hashfn (void *data, int len) -{ - return gf_dm_hashfn ((const char *)data, len); -} - - -sp_cache_t * -sp_cache_init (xlator_t *this) -{ - sp_cache_t *cache = NULL; - sp_private_t *priv = NULL; - - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, priv->mem_pool, out); - - cache = GF_CALLOC (1, sizeof (*cache), gf_sp_mt_sp_cache_t); - if (cache) { - cache->table = - rbthash_table_init (GF_SP_CACHE_BUCKETS, - sp_hashfn, __gf_free, - 0, priv->mem_pool); - if (cache->table == NULL) { - gf_log (this->name, GF_LOG_WARNING, - "cannot init a new rbthash table to hold " - "cache"); - GF_FREE (cache); - cache = NULL; - goto out; - } - - LOCK_INIT (&cache->lock); - cache->this = this; - } - -out: - return cache; -} - - -void -sp_local_free (sp_local_t *local) -{ - if (local) { - loc_wipe (&local->loc); - GF_FREE (local); - } -} - - -int32_t -sp_cache_remove_entry (sp_cache_t *cache, char *name, char remove_all) -{ - int32_t ret = -1; - rbthash_table_t *table = NULL; - xlator_t *this = NULL; - sp_private_t *priv = NULL; - void *data = NULL; - - GF_VALIDATE_OR_GOTO ("stat-prefetch", cache, out); - if ((name == NULL) && !remove_all) { - gf_log ((cache->this ? cache->this->name : "stat-prefetch"), - GF_LOG_WARNING, - "request to remove a single entry from cache and is no " - "name passed to identify it"); - goto out; - } - - this = cache->this; - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - - priv = this->private; - - LOCK (&cache->lock); - { - if (remove_all) { - table = cache->table; - cache->table = rbthash_table_init (GF_SP_CACHE_BUCKETS, - sp_hashfn, __gf_free, - 0, priv->mem_pool); - if (cache->table == NULL) { - cache->table = table; - } else { - rbthash_table_destroy (table); - ret = 0; - if (priv) { - LOCK (&priv->lock); - { - priv->entries = 0; - } - UNLOCK (&priv->lock); - } - } - } else { - data = rbthash_remove (cache->table, name, - strlen (name)); - GF_FREE (data); - ret = 0; - if (priv) { - LOCK (&priv->lock); - { - priv->entries--; - } - UNLOCK (&priv->lock); - } - } - } - UNLOCK (&cache->lock); - -out: - return ret; -} - - -int32_t -sp_cache_get_entry (sp_cache_t *cache, char *name, gf_dirent_t **entry) -{ - int32_t ret = -1; - gf_dirent_t *tmp = NULL, *new = NULL; - - GF_VALIDATE_OR_GOTO ("stat-prefetch", cache, out); - GF_VALIDATE_OR_GOTO ("stat-prefetch", cache->this, out); - GF_VALIDATE_OR_GOTO (cache->this->name, name, out); - GF_VALIDATE_OR_GOTO (cache->this->name, entry, out); - - LOCK (&cache->lock); - { - tmp = rbthash_get (cache->table, name, strlen (name)); - if (tmp != NULL) { - new = gf_dirent_for_name (tmp->d_name); - if (new == NULL) { - gf_log (cache->this->name, GF_LOG_WARNING, - "cannot create a new dentry to copy " - "from cache"); - goto unlock; - } - - new->d_ino = tmp->d_ino; - new->d_off = tmp->d_off; - new->d_len = tmp->d_len; - new->d_type = tmp->d_type; - new->d_stat = tmp->d_stat; - - *entry = new; - ret = 0; - } - } -unlock: - UNLOCK (&cache->lock); - -out: - return ret; -} - - -void -sp_cache_free (sp_cache_t *cache) -{ - sp_cache_remove_entry (cache, NULL, 1); - sp_cache_unref (cache); -} - - -sp_cache_t * -__sp_get_cache_fd (xlator_t *this, fd_t *fd) -{ - int32_t ret = -1; - sp_cache_t *cache = NULL; - uint64_t value = 0; - sp_fd_ctx_t *fd_ctx = NULL; - - ret = __fd_ctx_get (fd, this, &value); - if (ret == -1) { - goto out; - } - - fd_ctx = (void *)(long) value; - - cache = fd_ctx->cache; - -out: - return cache; -} - - -sp_cache_t * -sp_get_cache_fd (xlator_t *this, fd_t *fd) -{ - sp_cache_t *cache = NULL; - - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - LOCK (&fd->lock); - { - cache = __sp_get_cache_fd (this, fd); - if (cache != NULL) { - sp_cache_ref (cache); - } - } - UNLOCK (&fd->lock); - -out: - return cache; -} - - -void -sp_fd_ctx_free (sp_fd_ctx_t *fd_ctx) -{ - if (fd_ctx == NULL) { - goto out; - } - - if (fd_ctx->parent_inode) { - inode_unref (fd_ctx->parent_inode); - fd_ctx->parent_inode = NULL; - } - - if (fd_ctx->name) { - GF_FREE (fd_ctx->name); - fd_ctx->name = NULL; - } - - if (fd_ctx->cache) { - sp_cache_free (fd_ctx->cache); - } - - GF_FREE (fd_ctx); -out: - return; -} - - -inline sp_fd_ctx_t * -sp_fd_ctx_init (void) -{ - sp_fd_ctx_t *fd_ctx = NULL; - - fd_ctx = GF_CALLOC (1, sizeof (*fd_ctx), gf_sp_mt_sp_fd_ctx_t); - - return fd_ctx; -} - - -sp_fd_ctx_t * -sp_fd_ctx_new (xlator_t *this, inode_t *parent, char *name, sp_cache_t *cache) -{ - sp_fd_ctx_t *fd_ctx = NULL; - - fd_ctx = sp_fd_ctx_init (); - if (fd_ctx == NULL) { - goto out; - } - - if (parent) { - fd_ctx->parent_inode = inode_ref (parent); - } - - if (name) { - fd_ctx->name = gf_strdup (name); - if (fd_ctx->name == NULL) { - sp_fd_ctx_free (fd_ctx); - fd_ctx = NULL; - goto out; - } - } - - fd_ctx->cache = cache; - -out: - return fd_ctx; -} - - -sp_cache_t * -sp_del_cache_fd (xlator_t *this, fd_t *fd) -{ - sp_cache_t *cache = NULL; - uint64_t value = 0; - int32_t ret = -1; - sp_fd_ctx_t *fd_ctx = NULL; - - if (fd == NULL) { - goto out; - } - - LOCK (&fd->lock); - { - ret = __fd_ctx_get (fd, this, &value); - if (ret == 0) { - fd_ctx = (void *)(long) value; - cache = fd_ctx->cache; - fd_ctx->cache = NULL; - } - } - UNLOCK (&fd->lock); - -out: - return cache; -} - - -sp_cache_t * -sp_get_cache_inode (xlator_t *this, inode_t *inode, int32_t pid) -{ - fd_t *fd = NULL; - sp_cache_t *cache = NULL; - - if (inode == NULL) { - goto out; - } - - fd = fd_lookup (inode, pid); - if (fd == NULL) { - goto out; - } - - cache = sp_get_cache_fd (this, fd); - - fd_unref (fd); -out: - return cache; -} - - -fd_t * -_fd_ref (fd_t *fd); - -void -sp_remove_caches_from_all_fds_opened (xlator_t *this, inode_t *inode, - char *name) -{ - fd_t *fd = NULL; - sp_cache_t *cache = NULL; - struct fd_wrapper { - fd_t *fd; - struct list_head list; - }; - - struct fd_wrapper *wrapper = NULL, *tmp = NULL; - struct list_head head = {0, }; - char remove_all = 0; - - wrapper = NULL; - - INIT_LIST_HEAD (&head); - - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); - - remove_all = (name == NULL); - - LOCK (&inode->lock); - { - list_for_each_entry (fd, &inode->fd_list, inode_list) { - wrapper = GF_CALLOC (1, sizeof (*wrapper), - gf_sp_mt_fd_wrapper_t); - if (wrapper == NULL) { - goto unlock; - } - - INIT_LIST_HEAD (&wrapper->list); - - wrapper->fd = _fd_ref (fd); - list_add_tail (&wrapper->list, &head); - } - } -unlock: - UNLOCK (&inode->lock); - - list_for_each_entry_safe (wrapper, tmp, &head, list) { - cache = sp_get_cache_fd (this, wrapper->fd); - if (cache) { - sp_cache_remove_entry (cache, name, remove_all); - sp_cache_unref (cache); - } - - list_del (&wrapper->list); - fd_unref (wrapper->fd); - GF_FREE (wrapper); - } - -out: - return; -} - - -inline int32_t -__sp_put_cache (xlator_t *this, fd_t *fd, sp_cache_t *cache) -{ - sp_fd_ctx_t *fd_ctx = NULL; - int32_t ret = -1; - uint64_t value = 0; - - ret = __fd_ctx_get (fd, this, &value); - if (!ret) { - fd_ctx = (void *)(long)value; - } else { - fd_ctx = sp_fd_ctx_init (); - if (fd_ctx == NULL) { - ret = -1; - goto out; - } - - ret = __fd_ctx_set (fd, this, (long)(void *)fd_ctx); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "cannot set stat-prefetch context in fd (%p) " - "opened on inode (ino:%"PRId64" gfid:%s)", - fd, fd->inode->ino, - uuid_utoa (fd->inode->gfid)); - sp_fd_ctx_free (fd_ctx); - goto out; - } - } - - if (fd_ctx->cache) { - sp_cache_free (fd_ctx->cache); - } - - fd_ctx->cache = cache; - -out: - return ret; -} - - -inline int32_t -sp_put_cache (xlator_t *this, fd_t *fd, sp_cache_t *cache) -{ - int32_t ret = -1; - - if (fd != NULL) { - LOCK (&fd->lock); - { - ret = __sp_put_cache (this, fd, cache); - } - UNLOCK (&fd->lock); - } - - return ret; -} - - -int32_t -sp_cache_add_entries (sp_cache_t *cache, gf_dirent_t *entries) -{ - gf_dirent_t *entry = NULL, *new = NULL; - int32_t ret = -1; - uint64_t expected_offset = 0; - xlator_t *this = NULL; - sp_private_t *priv = NULL; - - GF_VALIDATE_OR_GOTO ("stat-prefetch", cache, out); - - this = cache->this; - if (this && this->private) { - priv = this->private; - } - - LOCK (&cache->lock); - { - list_for_each_entry (entry, &entries->list, list) { - if (IA_ISDIR (entry->d_stat.ia_type)) { - continue; - } - - if (uuid_is_null (entry->d_stat.ia_gfid)) - continue; - - new = gf_dirent_for_name (entry->d_name); - if (new == NULL) { - gf_log (cache->this->name, GF_LOG_WARNING, - "cannot create a new dentry to store " - "in cache"); - goto unlock; - } - - new->d_ino = entry->d_ino; - new->d_off = entry->d_off; - new->d_len = entry->d_len; - new->d_type = entry->d_type; - new->d_stat = entry->d_stat; - - ret = rbthash_insert (cache->table, new, new->d_name, - strlen (new->d_name)); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, "cannot " - "insert dentry (name:%s) into cache", - new->d_name); - - GF_FREE (new); - continue; - } - - expected_offset = new->d_off; - if (priv) { - LOCK (&priv->lock); - { - priv->entries++; - } - UNLOCK (&priv->lock); - } - } - - cache->expected_offset = expected_offset; - - ret = 0; - } -unlock: - UNLOCK (&cache->lock); - -out: - return ret; -} - - -int32_t -sp_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *dict, struct iatt *postparent) -{ - struct list_head waiting_ops = {0, }; - call_stub_t *stub = NULL, *tmp = NULL; - sp_local_t *local = NULL; - int need_unwind = 0; - char looked_up = 0, lookup_in_progress = 0; - - GF_ASSERT (frame); - - INIT_LIST_HEAD (&waiting_ops); - - local = frame->local; - if (local == NULL) { - op_ret = -1; - op_errno = EINVAL; - gf_log (this->name, GF_LOG_WARNING, "local is NULL, but it is " - "needed to find and resume operations waiting on " - "lookup"); - goto out; - } - - if (this == NULL) { - op_ret = -1; - op_errno = EINVAL; - gf_log (frame->this ? frame->this->name : "stat-prefetch", - GF_LOG_WARNING, "xlator object (this) is NULL"); - goto out; - } - - /* For '/' Entry is never cached, don't try to remove it */ - if ((op_ret == -1) && local->loc.parent) { - sp_remove_caches_from_all_fds_opened (this, local->loc.parent, - (char *)local->loc.name); - } - - if (local->is_lookup) - need_unwind = 1; - - lookup_in_progress = 0; - looked_up = 1; - sp_update_inode_ctx (this, local->loc.inode, &op_ret, &op_errno, - &lookup_in_progress, &looked_up, buf, - &waiting_ops, &op_errno); - - list_for_each_entry_safe (stub, tmp, &waiting_ops, list) { - list_del_init (&stub->list); - call_resume (stub); - } - -out: - if (need_unwind) { - SP_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, - dict, postparent); - } - - return 0; -} - - -int32_t -sp_get_ancestors (char *path, char **parent, char **grand_parent) -{ - int32_t ret = -1, i = 0; - char *cpy = NULL, *tmp = NULL; - - if (!path || !parent || !grand_parent) { - ret = 0; - goto out; - } - - for (i = 0; i < 2; i++) { - if (!strcmp (path, "/")) { - break; - } - - tmp = cpy; - - cpy = gf_strdup (path); - - if (tmp != NULL) { - GF_FREE (tmp); - } - - if (cpy == NULL) { - ret = -errno; - goto out; - } - - path = dirname (cpy); - switch (i) - { - case 0: - *parent = gf_strdup (path); - if (*parent == NULL) - goto out; - break; - case 1: - *grand_parent = gf_strdup (path); - if (*grand_parent == NULL) - goto out; - break; - } - } - - ret = 0; -out: - if (cpy != NULL) - GF_FREE(cpy); - return ret; -} - - -int32_t -sp_cache_remove_parent_entry (call_frame_t *frame, xlator_t *this, - inode_table_t *itable, char *path) -{ - char *parent = NULL, *grand_parent = NULL, *cpy = NULL; - inode_t *inode_gp = NULL; - int32_t ret = -1; - - ret = sp_get_ancestors (path, &parent, &grand_parent); - if (ret < 0) { - goto out; - } - - if (grand_parent && strcmp (grand_parent, "/")) { - inode_gp = inode_from_path (itable, grand_parent); - if (inode_gp) { - cpy = gf_strdup (parent); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, - cpy, out, ret, - -ENOMEM); - path = basename (cpy); - sp_remove_caches_from_all_fds_opened (this, inode_gp, - path); - GF_FREE (cpy); - - inode_unref (inode_gp); - } - } - - ret = 0; -out: - if (parent) { - GF_FREE (parent); - } - - if (grand_parent) { - GF_FREE (grand_parent); - } - - return ret; -} - - -void -sp_is_empty (dict_t *this, char *key, data_t *value, void *data) -{ - char *ptr = data; - - if (strcmp (key, "gfid-req") == 0) - return; - - if (ptr && *ptr) { - *ptr = 0; - } -} - - -int32_t -sp_lookup_helper (call_frame_t *frame,xlator_t *this, loc_t *loc, - dict_t *xattr_req) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - call_stub_t *stub = NULL; - char can_wind = 0; - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - op_errno = EINVAL; - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno, - EINVAL); - - stub = fop_lookup_stub (frame, sp_lookup_helper, loc, xattr_req); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, stub, unwind, - op_errno, ENOMEM); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - if (op_ret == 0) { - if (!inode_ctx->lookup_in_progress) { - inode_ctx->lookup_in_progress = 1; - can_wind = 1; - } else { - list_add_tail (&stub->list, - &inode_ctx->waiting_ops); - stub = NULL; - } - } - } - UNLOCK (&inode_ctx->lock); - - if (op_ret == -1) { - goto unwind; - } - - if (can_wind) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, - xattr_req); - } - - if (stub != NULL) { - call_stub_destroy (stub); - } - - return 0; - -unwind: - SP_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); - if (stub != NULL) { - call_stub_destroy (stub); - } - - return 0; -} - - -/* - * TODO: implement sending lookups for every fop done on this path. As of now - * lookup on the path is sent only for the first fop on this path. - */ -int32_t -sp_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) -{ - gf_dirent_t *dirent = NULL; - char entry_cached = 0; - uint64_t value = 0; - char xattr_req_empty = 1, can_wind = 0; - sp_cache_t *cache = NULL; - struct iatt postparent = {0, }, buf = {0, }; - int32_t ret = -1, op_ret = -1, op_errno = EINVAL; - sp_inode_ctx_t *inode_ctx = NULL, *parent_inode_ctx = NULL; - sp_local_t *local = NULL; - call_stub_t *stub = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, loc->inode, unwind); - - inode_ctx = sp_check_and_create_inode_ctx (this, loc->inode, - SP_DONT_CARE); - if (inode_ctx == NULL) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_WARNING, - "cannot create stat-prefetch context in inode (ino:%" - PRId64", gfid:%s)(%s)", loc->inode->ino, - loc->inode->gfid, strerror (op_errno)); - goto unwind; - } - - if ((loc->parent == NULL) || (loc->name == NULL)) { - goto wind; - } - - if (xattr_req != NULL) { - dict_foreach (xattr_req, sp_is_empty, &xattr_req_empty); - } - - if (!xattr_req_empty) { - goto wind; - } - - cache = sp_get_cache_inode (this, loc->parent, frame->root->pid); - if (cache) { - ret = sp_cache_get_entry (cache, (char *)loc->name, &dirent); - if (ret == 0) { - if (!uuid_is_null (loc->inode->gfid) - && (uuid_compare (loc->inode->gfid, - dirent->d_stat.ia_gfid)) - != 0) { - op_ret = -1; - op_errno = ESTALE; - goto unwind; - } - - ret = inode_ctx_get (loc->parent, this, &value); - if ((ret == 0) && (value != 0)) { - parent_inode_ctx = (void *)(long)value; - postparent = parent_inode_ctx->stbuf; - buf = dirent->d_stat; - op_ret = 0; - op_errno = 0; - entry_cached = 1; - } - - GF_FREE (dirent); - } - } else if (IA_ISDIR (loc->inode->ia_type)) { - cache = sp_get_cache_inode (this, loc->inode, frame->root->pid); - if (cache) { - ret = sp_cache_get_entry (cache, ".", &dirent); - if (ret == 0) { - ret = inode_ctx_get (loc->parent, this, &value); - if ((ret == 0) && (value != 0)) { - parent_inode_ctx = (void *)(long)value; - postparent = parent_inode_ctx->stbuf; - buf = dirent->d_stat; - op_ret = 0; - op_errno = 0; - entry_cached = 1; - } - - GF_FREE (dirent); - } - } - } - -wind: - if (entry_cached) { - if (cache) { - cache->hits++; - sp_cache_unref (cache); - } - } else { - if (cache) { - cache->miss++; - sp_cache_unref (cache); - } - - stub = fop_lookup_stub (frame, sp_lookup_helper, loc, - xattr_req); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, stub, unwind, - op_errno, ENOMEM); - - local = GF_CALLOC (1, sizeof (*local), gf_sp_mt_sp_local_t); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind, - op_errno, ENOMEM); - - frame->local = local; - - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "loc_copy failed (%s)", strerror (op_errno)); - goto unwind; - } - - local->is_lookup = 1; - - LOCK (&inode_ctx->lock); - { - if (inode_ctx->lookup_in_progress) { - list_add_tail (&stub->list, - &inode_ctx->waiting_ops); - stub = NULL; - } else { - can_wind = 1; - inode_ctx->lookup_in_progress = 1; - } - } - UNLOCK (&inode_ctx->lock); - - if (can_wind) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, - xattr_req); - } - - if (stub != NULL) { - call_stub_destroy (stub); - } - - return 0; - } - -unwind: - SP_STACK_UNWIND (lookup, frame, op_ret, op_errno, (loc)?loc->inode:NULL, - &buf, NULL, &postparent); - - return 0; -} - - -int32_t -sp_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, gf_dirent_t *entries) -{ - sp_local_t *local = NULL; - sp_cache_t *cache = NULL; - fd_t *fd = NULL; - int32_t ret = 0; - char was_present = 1; - sp_private_t *priv = NULL; - - GF_ASSERT (frame); - if (op_ret == -1) { - goto out; - } - - if ((this == NULL) || (this->private == NULL)) { - gf_log (frame->this->name, GF_LOG_WARNING, - (this == NULL) ? "xlator object (this) is NULL" - : "stat-prefetch configuration (this->private) is " - "NULL"); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - local = frame->local; - if (local == NULL) { - gf_log (frame->this->name, GF_LOG_WARNING, "local is NULL"); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - fd = local->fd; - - priv = this->private; - - LOCK (&priv->lock); - { - if (!priv->mem_pool) - priv->mem_pool = mem_pool_new (rbthash_entry_t, - GF_SP_CACHE_ENTRIES_EXPECTED); - } - UNLOCK (&priv->lock); - - if (!priv->mem_pool) - goto out; - - LOCK (&fd->lock); - { - cache = __sp_get_cache_fd (this, fd); - if (cache == NULL) { - was_present = 0; - cache = sp_cache_init (this); - if (cache == NULL) { - gf_log (this->name, GF_LOG_WARNING, - "creation of stat-prefetch cache " - "for fd (%p) opened on inode " - "(ino:%"PRId64", gfid:%s) failed", fd, - fd->inode->ino, - uuid_utoa (fd->inode->gfid)); - goto unlock; - } - - ret = __sp_put_cache (this, fd, cache); - if (ret == -1) { - sp_cache_free (cache); - gf_log (this->name, GF_LOG_WARNING, - "cannot store cache in fd (%p) opened " - "on inode (ino:%"PRId64", gfid:%s)", fd, - fd->inode->ino, - uuid_utoa (fd->inode->gfid)); - goto unlock; - } - } - - sp_cache_ref (cache); - } -unlock: - UNLOCK (&fd->lock); - - if (cache != NULL) { - sp_cache_add_entries (cache, entries); - if (was_present) { - sp_cache_unref (cache); - } - } - -out: - SP_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries); - return 0; -} - - -int32_t -sp_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t off) -{ - sp_cache_t *cache = NULL; - sp_local_t *local = NULL; - char *path = NULL; - int32_t ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd, unwind); - - cache = sp_get_cache_fd (this, fd); - if (cache) { - if (off != cache->expected_offset) { - sp_cache_remove_entry (cache, NULL, 1); - } - - sp_cache_unref (cache); - } - - ret = inode_path (fd->inode, NULL, &path); - if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_WARNING, "cannot construct path on " - "which fd (%p) is opened (fd.inode.ino = %"PRId64", " - "fd.inode.gfid = %s) (%s)", fd, fd->inode->ino, - uuid_utoa (fd->inode->gfid), strerror (op_errno)); - goto unwind; - } - - ret = sp_cache_remove_parent_entry (frame, this, fd->inode->table, - path); - - if (ret < 0) { - op_errno = -ret; - gf_log (this->name, GF_LOG_WARNING, - "cannot remove parent entry from grand-parent's cache" - " for path %s", path); - goto unwind; - } - - GF_FREE (path); - - local = GF_CALLOC (1, sizeof (*local), gf_sp_mt_sp_local_t); - if (local) { - local->fd = fd; - frame->local = local; - } - - STACK_WIND (frame, sp_readdir_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readdirp, fd, size, off); - - return 0; - -unwind: - if (path != NULL) { - GF_FREE (path); - } - - SP_STACK_UNWIND (readdir, frame, -1, op_errno, NULL); - return 0; -} - - -int32_t -sp_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) -{ - GF_ASSERT (frame); - - SP_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, postbuf); - return 0; -} - - - -int32_t -sp_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf, - struct iatt *preoldparent, struct iatt *postoldparent, - struct iatt *prenewparent, struct iatt *postnewparent) -{ - GF_ASSERT (frame); - SP_STACK_UNWIND (rename, frame, op_ret, op_errno, buf, preoldparent, - postoldparent, prenewparent, postnewparent); - return 0; -} - - -int32_t -sp_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, fd_t *fd) -{ - sp_local_t *local = NULL; - sp_fd_ctx_t *fd_ctx = NULL; - - GF_ASSERT (frame); - - if (op_ret == -1) { - goto out; - } - - if (this == NULL) { - gf_log (frame->this ? frame->this->name : "stat-prefetch", - GF_LOG_WARNING, "xlator object (this) is NULL"); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - local = frame->local; - if (local == NULL) { - gf_log (this->name, GF_LOG_WARNING, "local is NULL"); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - if (fd == NULL) { - gf_log (this->name, GF_LOG_WARNING, "fd is NULL"); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - fd_ctx = sp_fd_ctx_new (this, local->loc.parent, - (char *)local->loc.name, NULL); - if (fd_ctx == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - op_ret = fd_ctx_set (fd, this, (long)(void *)fd_ctx); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "cannot set stat-prefetch context in fd (%p) opened on " - "inode (ino:%"PRId64", gfid:%s)", fd, fd->inode->ino, - uuid_utoa (fd->inode->gfid)); - sp_fd_ctx_free (fd_ctx); - op_errno = ENOMEM; - } - -out: - SP_STACK_UNWIND (open, frame, op_ret, op_errno, fd); - return 0; -} - - -int32_t -sp_open_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, int32_t wbflags) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind); - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - op_errno = EINVAL; - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno, - EINVAL); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - } - UNLOCK (&inode_ctx->lock); - - if ((op_ret == -1) && ((op_errno != ENOENT) - || !((op_errno == ENOENT) - && (flags & O_CREAT)))) { - gf_log (this->name, GF_LOG_WARNING, "lookup-behind has failed " - "for path (%s)(%s), unwinding open call waiting on " - "it", loc->path, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, sp_fd_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags); - - return 0; - -unwind: - SP_STACK_UNWIND (open, frame, -1, op_errno, fd); - return 0; -} - - -int32_t -sp_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, int wbflags) -{ - call_stub_t *stub = NULL; - sp_local_t *local = NULL; - int32_t op_errno = EINVAL, ret = -1; - char can_wind = 0, need_lookup = 0, need_unwind = 1; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - local = GF_CALLOC (1, sizeof (*local), gf_sp_mt_sp_local_t); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno, - ENOMEM); - - frame->local = local; - - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, "loc_copy failed (%s)", - strerror (op_errno)); - goto out; - } - - stub = fop_open_stub (frame, sp_open_helper, loc, flags, fd, wbflags); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - sp_process_inode_ctx (frame, this, loc, stub, &need_unwind, - &need_lookup, &can_wind, &op_errno); -out: - if (need_unwind) { - SP_STACK_UNWIND (open, frame, -1, op_errno, fd); - } else if (need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, NULL); - } else if (can_wind) { - STACK_WIND (frame, sp_fd_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, - wbflags); - } - - return 0; - -} - -static int32_t -sp_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) -{ - sp_local_t *local = NULL; - sp_fd_ctx_t *fd_ctx = NULL; - char lookup_in_progress = 0, looked_up = 0; - - GF_ASSERT (frame); - - if (op_ret == -1) { - goto out; - } - - if (this == NULL) { - gf_log (frame->this ? frame->this->name : "stat-prefetch", - GF_LOG_WARNING, "xlator object (this) is NULL"); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - local = frame->local; - if (local == NULL) { - gf_log (this->name, GF_LOG_WARNING, "local is NULL"); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - looked_up = 1; - op_ret = sp_update_inode_ctx (this, local->loc.inode, &op_ret, - &op_errno, &lookup_in_progress, - &looked_up, buf, NULL, &op_errno); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "updating stat-prefetch context in inode (ino:%" - PRId64", gfid:%s) (path: %s) failed (%s)", - local->loc.inode->ino, - uuid_utoa (local->loc.inode->gfid), local->loc.path, - strerror (op_errno)); - goto out; - } - - op_ret = sp_update_inode_ctx (this, local->loc.parent, NULL, NULL, NULL, - NULL, postparent, NULL, &op_errno); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "updating stat-prefetch context in parent inode failed " - "for path (%s)(%s)", local->loc.path, - strerror (op_errno)); - goto out; - } - - fd_ctx = sp_fd_ctx_new (this, local->loc.parent, - (char *)local->loc.name, NULL); - if (fd_ctx == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - op_ret = fd_ctx_set (fd, this, (long)(void *)fd_ctx); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "cannot set stat-prefetch context in fd (%p) opened on " - "inode (ino:%"PRId64", gfid:%s)", fd, fd->inode->ino, - uuid_utoa (fd->inode->gfid)); - sp_fd_ctx_free (fd_ctx); - op_errno = ENOMEM; - } - -out: - SP_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf, - preparent, postparent); - return 0; -} - - -int32_t -sp_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, fd_t *fd, dict_t *params) -{ - sp_local_t *local = NULL; - int32_t op_errno = -1, ret = -1; - char need_unwind = 1; - sp_inode_ctx_t *inode_ctx = NULL; - - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc, out, op_errno, - EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->path, out, op_errno, - EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->name, out, op_errno, - EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, loc->inode, out, - op_errno, EINVAL); - - ret = sp_cache_remove_parent_entry (frame, this, loc->inode->table, - (char *)loc->path); - if (ret == -1) { - op_errno = ENOMEM; - goto out; - } - - local = GF_CALLOC (1, sizeof (*local), gf_sp_mt_sp_local_t); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno, - ENOMEM); - - frame->local = local; - - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, - "loc_copy failed (%s)", strerror (op_errno)); - goto out; - } - - inode_ctx = sp_check_and_create_inode_ctx (this, loc->inode, - SP_DONT_EXPECT); - if (inode_ctx == NULL) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_WARNING, - "cannot create stat-prefetch context in inode (ino:%" - PRId64", gfid:%s)(%s)", loc->inode->ino, - loc->inode->gfid, strerror (op_errno)); - goto out; - } - - need_unwind = 0; -out: - if (need_unwind) { - SP_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL); - } else { - STACK_WIND (frame, sp_create_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, loc, flags, - mode, fd, params); - } - return 0; -} - - -int32_t -sp_opendir_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this ? frame->this->name : "stat-prefetch", - this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO (this->name, inode_ctx, unwind); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - } - UNLOCK (&inode_ctx->lock); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, "lookup-behind has failed " - "for path (%s)(%s), unwinding opendir call waiting " - "on it", loc->path, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, sp_fd_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->opendir, loc, fd); - - return 0; - -unwind: - SP_STACK_UNWIND (opendir, frame, -1, op_errno, NULL); - return 0; -} - - -int32_t -sp_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) -{ - sp_local_t *local = NULL; - call_stub_t *stub = NULL; - int32_t op_errno = EINVAL, ret = -1; - char can_wind = 0, need_lookup = 0, need_unwind = 1; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this ? frame->this->name : "stat-prefetch", - this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - local = GF_CALLOC (1, sizeof (*local), gf_sp_mt_sp_local_t); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno, - ENOMEM); - - frame->local = local; - - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, "loc_copy failed (%s)", - strerror (op_errno)); - goto out; - } - - stub = fop_opendir_stub (frame, sp_opendir_helper, loc, fd); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - sp_process_inode_ctx (frame, this, loc, stub, &need_unwind, - &need_lookup, &can_wind, &op_errno); - -out: - if (need_unwind) { - SP_STACK_UNWIND (opendir, frame, -1, op_errno, NULL); - } else if (need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, NULL); - } else if (can_wind) { - STACK_WIND (frame, sp_fd_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->opendir, loc, fd); - } - - return 0; -} - - -int32_t -sp_new_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) -{ - sp_local_t *local = NULL; - char lookup_in_progress = 0, looked_up = 0; - - GF_ASSERT (frame); - - if (op_ret == -1) { - goto out; - } - - local = frame->local; - if (local == NULL) { - gf_log (frame->this->name, GF_LOG_WARNING, "local is NULL"); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - if (this == NULL) { - gf_log (frame->this->name, GF_LOG_WARNING, - "xlator object (this) is NULL"); - op_ret = -1; - op_errno = EINVAL; - goto out; - } - - looked_up = 1; - op_ret = sp_update_inode_ctx (this, local->loc.inode, &op_ret, - &op_errno, &lookup_in_progress, - &looked_up, buf, NULL, &op_errno); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "updating stat-prefetch context in inode (ino:%" - PRId64", gfid:%s) (path: %s) failed (%s)", - local->loc.inode->ino, - uuid_utoa (local->loc.inode->gfid), local->loc.path, - strerror (op_errno)); - goto out; - } - - op_ret = sp_update_inode_ctx (this, local->loc.parent, NULL, NULL, NULL, - NULL, postparent, NULL, &op_errno); - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "updating stat-prefetch context in parent inode failed " - "for path (%s)(%s)", local->loc.path, - strerror (op_errno)); - } - -out: - SP_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode, buf, preparent, - postparent); - return 0; -} - - -int -sp_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dict_t *params) -{ - int32_t ret = -1, op_errno = EINVAL; - char need_unwind = 1; - sp_inode_ctx_t *inode_ctx = NULL; - sp_local_t *local = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->path, out); - GF_VALIDATE_OR_GOTO (this->name, loc->name, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - ret = sp_cache_remove_parent_entry (frame, this, loc->inode->table, - (char *)loc->path); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "cannot remove parent entry from grand-parent's cache " - "for path (%s)", loc->path); - op_errno = ENOMEM; - goto out; - } - - local = GF_CALLOC (1, sizeof (*local), gf_sp_mt_sp_local_t); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno, - ENOMEM); - - frame->local = local; - - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, "loc_copy failed (%s)", - strerror (op_errno)); - goto out; - } - - inode_ctx = sp_check_and_create_inode_ctx (this, loc->inode, - SP_DONT_EXPECT); - if (inode_ctx == NULL) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_WARNING, - "cannot create stat-prefetch context in inode (ino:%" - PRId64", gfid:%s)(%s)", loc->inode->ino, - loc->inode->gfid, strerror (op_errno)); - goto out; - } - - need_unwind = 0; -out: - if (need_unwind) { - SP_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, - NULL); - } else { - STACK_WIND (frame, sp_new_entry_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mkdir, loc, mode, params); - } - - return 0; -} - - -int -sp_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, dict_t *params) -{ - int32_t op_errno = EINVAL, ret = -1; - char need_unwind = 1; - sp_inode_ctx_t *inode_ctx = NULL; - sp_local_t *local = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this ? frame->this->name : "stat-prefetch", - this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->path, out); - GF_VALIDATE_OR_GOTO (this->name, loc->name, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - ret = sp_cache_remove_parent_entry (frame, this, loc->inode->table, - (char *)loc->path); - if (ret == -1) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_WARNING, - "cannot remove parent entry from grand-parent's cache " - "for path (%s)", loc->path); - goto out; - } - - local = GF_CALLOC (1, sizeof (*local), gf_sp_mt_sp_local_t); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno, - ENOMEM); - - frame->local = local; - - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, "loc_copy failed (%s)", - strerror (op_errno)); - goto out; - } - - inode_ctx = sp_check_and_create_inode_ctx (this, loc->inode, - SP_DONT_EXPECT); - if (inode_ctx == NULL) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_WARNING, - "cannot create stat-prefetch context in inode (ino:%" - PRId64", gfid:%s)(%s)", loc->inode->ino, - loc->inode->gfid, strerror (op_errno)); - goto out; - } - - need_unwind = 0; -out: - if (need_unwind) { - SP_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, - NULL); - } else { - STACK_WIND (frame, sp_new_entry_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->mknod, loc, mode, - rdev, params); - } - - return 0; -} - - -int -sp_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath, - loc_t *loc, dict_t *params) -{ - int32_t ret = -1, op_errno = EINVAL; - char need_unwind = 1; - sp_inode_ctx_t *inode_ctx = NULL; - sp_local_t *local = NULL; - - GF_ASSERT (frame); - - GF_VALIDATE_OR_GOTO ((frame->this ? frame->this->name - : "stat-prefetch"), - this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->path, out); - GF_VALIDATE_OR_GOTO (this->name, loc->name, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - ret = sp_cache_remove_parent_entry (frame, this, loc->inode->table, - (char *)loc->path); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "cannot remove parent entry from grand-parent's cache " - "for path (%s)", loc->path); - op_errno = ENOMEM; - goto out; - } - - local = GF_CALLOC (1, sizeof (*local), gf_sp_mt_sp_local_t); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno, - ENOMEM); - - frame->local = local; - - ret = loc_copy (&local->loc, loc); - if (ret == -1) { - op_errno = errno; - gf_log (this->name, GF_LOG_WARNING, "loc_copy failed (%s)", - strerror (op_errno)); - goto out; - } - - inode_ctx = sp_check_and_create_inode_ctx (this, loc->inode, - SP_DONT_EXPECT); - if (inode_ctx == NULL) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_WARNING, - "cannot create stat-prefetch context in inode (ino:%" - PRId64", gfid:%s)(%s)", loc->inode->ino, - loc->inode->gfid, strerror (op_errno)); - goto out; - } - - need_unwind = 0; -out: - if (need_unwind) { - SP_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL, - NULL); - } else { - STACK_WIND (frame, sp_new_entry_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->symlink, linkpath, loc, - params); - } - - return 0; -} - - -int32_t -sp_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) -{ - GF_ASSERT (frame); - SP_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf, preparent, - postparent); - return 0; -} - - -int32_t -sp_link_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this ? frame->this->name : "stat-prefetch", - this, unwind); - GF_VALIDATE_OR_GOTO (this->name, oldloc, unwind); - GF_VALIDATE_OR_GOTO (this->name, newloc, unwind); - - ret = inode_ctx_get (oldloc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", oldloc->inode->ino, - uuid_utoa (oldloc->inode->gfid)); - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno, - EINVAL); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - } - UNLOCK (&inode_ctx->lock); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, "lookup-behind has failed " - "for path (%s)(%s), unwinding link call waiting on " - "it", oldloc->path, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, sp_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, oldloc, newloc); - - return 0; - -unwind: - SP_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL); - return 0; -} - - -int32_t -sp_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) -{ - call_stub_t *stub = NULL; - int32_t ret = 0, op_errno = EINVAL; - char can_wind = 0, need_lookup = 0, need_unwind = 1; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this ? frame->this->name : "stat-prefetch", - this, out); - GF_VALIDATE_OR_GOTO (this->name, newloc, out); - GF_VALIDATE_OR_GOTO (this->name, newloc->path, out); - GF_VALIDATE_OR_GOTO (this->name, newloc->name, out); - GF_VALIDATE_OR_GOTO (this->name, newloc->inode, out); - GF_VALIDATE_OR_GOTO (this->name, oldloc->name, out); - - ret = sp_cache_remove_parent_entry (frame, this, newloc->parent->table, - (char *)newloc->path); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "cannot remove parent entry from grand-parent's cache " - "for path (%s)", newloc->path); - op_errno = ENOMEM; - goto out; - } - - sp_remove_caches_from_all_fds_opened (this, oldloc->parent, - (char *)oldloc->name); - - stub = fop_link_stub (frame, sp_link_helper, oldloc, newloc); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - sp_process_inode_ctx (frame, this, oldloc, stub, &need_unwind, - &need_lookup, &can_wind, &op_errno); - -out: - if (need_unwind) { - SP_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, - NULL); - } else if (need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, oldloc, NULL); - } else if (can_wind) { - STACK_WIND (frame, sp_link_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->link, oldloc, newloc); - } - - return 0; -} - - -int32_t -sp_truncate_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, - off_t offset) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this ? frame->this->name : "stat-prefetch", - this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, inode_ctx, unwind, op_errno, - EINVAL); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - } - UNLOCK (&inode_ctx->lock); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, "lookup-behind has failed " - "for path (%s)(%s), unwinding truncate call " - "waiting on it", loc->path, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, sp_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset); - - return 0; - -unwind: - SP_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL); - return 0; -} - - -int32_t -sp_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) -{ - int32_t op_errno = EINVAL; - call_stub_t *stub = NULL; - char can_wind = 0, need_lookup = 0, need_unwind = 1; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this ? frame->this->name : "stat-prefetch", - this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->name, out); - - sp_remove_caches_from_all_fds_opened (this, loc->parent, - (char *)loc->name); - - stub = fop_truncate_stub (frame, sp_truncate_helper, loc, offset); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - sp_process_inode_ctx (frame, this, loc, stub, &need_unwind, - &need_lookup, &can_wind, &op_errno); - -out: - if (need_unwind) { - SP_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL); - } else if (need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, NULL); - } else if (can_wind) { - STACK_WIND (frame, sp_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset); - } - - return 0; -} - - -int32_t -sp_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) -{ - sp_fd_ctx_t *fd_ctx = NULL; - uint64_t value = 0; - int32_t ret = 0, op_errno = EINVAL; - inode_t *parent = NULL; - char *name = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this ? frame->this->name : "stat-prefetch", - this, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd, unwind); - - ret = fd_ctx_get (fd, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, "stat-prefetch context not " - "set in fd (%p) opened on inode (ino:%"PRId64", " - "gfid:%s", fd, fd->inode->ino, - uuid_utoa (fd->inode->gfid)); - goto unwind; - } - - fd_ctx = (void *)(long)value; - name = fd_ctx->name; - parent = fd_ctx->parent_inode; - - sp_remove_caches_from_all_fds_opened (this, parent, (char *)name); - - STACK_WIND (frame, sp_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset); - return 0; - -unwind: - SP_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL); - return 0; -} - - -int32_t -sp_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *prestat, struct iatt *poststat) -{ - GF_ASSERT (frame); - SP_STACK_UNWIND (setattr, frame, op_ret, op_errno, prestat, poststat); - return 0; -} - - -int -sp_setattr_helper (call_frame_t *frame, xlator_t *this, - loc_t *loc, struct iatt *buf, int32_t valid) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this ? frame->this->name : "stat-prefetch", - this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - op_errno = EINVAL; - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO (this->name, inode_ctx, unwind); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - } - UNLOCK (&inode_ctx->lock); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, "lookup-behind has failed " - "for path (%s)(%s), unwinding setattr call " - "waiting on it", loc->path, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, sp_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, loc, buf, valid); - - return 0; - -unwind: - SP_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL); - return 0; -} - - -int -sp_setattr (call_frame_t *frame, xlator_t *this, - loc_t *loc, struct iatt *buf, int32_t valid) -{ - int32_t op_errno = EINVAL; - call_stub_t *stub = NULL; - char can_wind = 0, need_lookup = 0, need_unwind = 1; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this ? frame->this->name : "stat-prefetch", - this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->name, out); - - sp_remove_caches_from_all_fds_opened (this, loc->parent, - (char *)loc->name); - - stub = fop_setattr_stub (frame, sp_setattr_helper, loc, buf, valid); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - sp_process_inode_ctx (frame, this, loc, stub, &need_unwind, - &need_lookup, &can_wind, &op_errno); - -out: - if (need_unwind) { - SP_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL); - } else if (need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, NULL); - } else if (can_wind) { - STACK_WIND (frame, sp_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, loc, buf, valid); - } - - return 0; -} - - -int32_t -sp_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, const char *path, - struct iatt *buf) -{ - GF_ASSERT (frame); - SP_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, buf); - return 0; -} - - -int32_t -sp_readlink_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, - size_t size) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this ? frame->this->name : "stat-prefetch", - this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO (this->name, inode_ctx, unwind); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - } - UNLOCK (&inode_ctx->lock); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, "lookup-behind has failed " - "for path (%s)(%s), unwinding readlink call " - "waiting on it", loc->path, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, sp_readlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readlink, loc, size); - - return 0; - -unwind: - SP_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL); - return 0; -} - - -int32_t -sp_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size) -{ - int32_t op_errno = EINVAL; - call_stub_t *stub = NULL; - char can_wind = 0, need_lookup = 0, need_unwind = 1; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->name, out); - - sp_remove_caches_from_all_fds_opened (this, loc->parent, - (char *)loc->name); - - stub = fop_readlink_stub (frame, sp_readlink_helper, loc, size); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - sp_process_inode_ctx (frame, this, loc, stub, &need_unwind, - &need_lookup, &can_wind, &op_errno); - -out: - if (need_unwind) { - SP_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL); - } else if (need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, NULL); - } else if (can_wind) { - STACK_WIND (frame, sp_readlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readlink, loc, size); - } - - return 0; -} - - -int32_t -sp_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *preparent, - struct iatt *postparent) -{ - GF_ASSERT (frame); - SP_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent, - postparent); - return 0; -} - - - -int32_t -sp_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - GF_ASSERT (frame); - SP_STACK_UNWIND (setxattr, frame, op_ret, op_errno); - return 0; -} - - -int32_t -sp_unlink_helper (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO (this->name, inode_ctx, unwind); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - } - UNLOCK (&inode_ctx->lock); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, "lookup-behind has failed " - "for path (%s)(%s), unwinding unlink call " - "waiting on it", loc->path, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc); - - return 0; - -unwind: - SP_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL); - return 0; -} - - -int32_t -sp_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - int32_t ret = -1, op_errno = EINVAL; - call_stub_t *stub = NULL; - char can_wind = 0, need_lookup = 0, need_unwind = 1; - - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->name, out); - - sp_remove_caches_from_all_fds_opened (this, loc->parent, - (char *)loc->name); - - ret = sp_cache_remove_parent_entry (frame, this, loc->parent->table, - (char *)loc->path); - if (ret == -1) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_WARNING, - "cannot remove parent entry from grand-parent's cache " - "for path (%s)", loc->path); - goto out; - } - - stub = fop_unlink_stub (frame, sp_unlink_helper, loc); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - sp_process_inode_ctx (frame, this, loc, stub, &need_unwind, - &need_lookup, &can_wind, &op_errno); - -out: - if (need_unwind) { - SP_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL); - } else if (need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, NULL); - } else if (can_wind) { - STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->unlink, loc); - } - - return 0; -} - - -int -sp_rmdir_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO (this->name, inode_ctx, unwind); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - } - UNLOCK (&inode_ctx->lock); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, "lookup-behind has failed " - "for path (%s)(%s), unwinding rmdir call " - "waiting on it", loc->path, strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, loc, flags); - - return 0; - -unwind: - SP_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL); - return 0; -} - - -int -sp_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags) -{ - int32_t ret = -1, op_errno = EINVAL; - call_stub_t *stub = NULL; - char can_wind = 0, need_lookup = 0, need_unwind = 1; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->name, out); - GF_VALIDATE_OR_GOTO (this->name, loc->path, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - sp_remove_caches_from_all_fds_opened (this, loc->inode, NULL); - - ret = sp_cache_remove_parent_entry (frame, this, loc->inode->table, - (char *)loc->path); - if (ret == -1) { - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_WARNING, - "cannot remove parent entry from grand-parent's cache " - "for path (%s)", loc->path); - goto out; - } - - stub = fop_rmdir_stub (frame, sp_rmdir_helper, loc, flags); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - sp_process_inode_ctx (frame, this, loc, stub, &need_unwind, - &need_lookup, &can_wind, &op_errno); - -out: - if (need_unwind) { - SP_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL); - } else if (need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, NULL); - } else if (can_wind) { - STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rmdir, loc, flags); - } - - return 0; -} - - -int32_t -sp_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iovec *vector, int32_t count, - struct iatt *stbuf, struct iobref *iobref) -{ - GF_ASSERT (frame); - SP_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf, - iobref); - return 0; -} - - -int32_t -sp_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset) -{ - sp_fd_ctx_t *fd_ctx = NULL; - uint64_t value = 0; - int32_t ret = 0, op_errno = EINVAL; - inode_t *parent = NULL; - char *name = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd, unwind); - - ret = fd_ctx_get (fd, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, "stat-prefetch context not " - "set in fd (%p) opened on inode (ino:%"PRId64", " - "gfid:%s", fd, fd->inode->ino, - uuid_utoa (fd->inode->gfid)); - goto unwind; - } - - fd_ctx = (void *)(long)value; - name = fd_ctx->name; - parent = fd_ctx->parent_inode; - - sp_remove_caches_from_all_fds_opened (this, parent, (char *)name); - - STACK_WIND (frame, sp_readv_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, fd, size, offset); - return 0; - -unwind: - SP_STACK_UNWIND (readv, frame, -1, op_errno, NULL, -1, NULL, NULL); - return 0; -} - - -int32_t -sp_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, - int32_t count, off_t off, struct iobref *iobref) -{ - sp_fd_ctx_t *fd_ctx = NULL; - uint64_t value = 0; - int32_t ret = 0, op_errno = EINVAL; - inode_t *parent = NULL; - char *name = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd, unwind); - - ret = fd_ctx_get (fd, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, "stat-prefetch context not " - "set in fd (%p) opened on inode (ino:%"PRId64", " - "gfid:%s", fd, fd->inode->ino, - uuid_utoa (fd->inode->gfid)); - goto unwind; - } - - fd_ctx = (void *)(long)value; - name = fd_ctx->name; - parent = fd_ctx->parent_inode; - - sp_remove_caches_from_all_fds_opened (this, parent, (char *)name); - - STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, fd, vector, count, off, - iobref); - return 0; - -unwind: - SP_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL); - return 0; -} - - -int32_t -sp_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) -{ - sp_fd_ctx_t *fd_ctx = NULL; - uint64_t value = 0; - int32_t ret = 0, op_errno = EINVAL; - inode_t *parent = NULL; - char *name = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd, unwind); - - ret = fd_ctx_get (fd, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, "stat-prefetch context not " - "set in fd (%p) opened on inode (ino:%"PRId64", " - "gfid:%s", fd, fd->inode->ino, - uuid_utoa (fd->inode->gfid)); - goto unwind; - } - - fd_ctx = (void *)(long)value; - name = fd_ctx->name; - parent = fd_ctx->parent_inode; - - sp_remove_caches_from_all_fds_opened (this, parent, (char *)name); - - STACK_WIND (frame, sp_unlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsync, fd, flags); - return 0; - -unwind: - SP_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL); - return 0; -} - - -int32_t -sp_rename_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc) -{ - uint64_t value = 0; - char need_unwind = 0; - char can_wind = 0; - int32_t ret = 0, op_errno = EINVAL; - int32_t old_op_ret = -1, old_op_errno = -1; - int32_t new_op_ret = -1, new_op_errno = -1; - char old_inode_looked_up = 0, new_inode_looked_up = 0; - sp_inode_ctx_t *old_inode_ctx = NULL, *new_inode_ctx = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this ? frame->this->name : "stat-prefetch", - this, unwind); - GF_VALIDATE_OR_GOTO (this->name, oldloc, unwind); - GF_VALIDATE_OR_GOTO (this->name, newloc, unwind); - - ret = inode_ctx_get (oldloc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", oldloc->inode->ino, - uuid_utoa (oldloc->inode->gfid)); - goto unwind; - } - - old_inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO (this->name, old_inode_ctx, unwind); - - LOCK (&old_inode_ctx->lock); - { - old_inode_looked_up = old_inode_ctx->looked_up; - old_op_ret = old_inode_ctx->op_ret; - old_op_errno = old_inode_ctx->op_errno; - need_unwind = old_inode_ctx->need_unwind; - } - UNLOCK (&old_inode_ctx->lock); - - if (need_unwind) { - /* there was an error while queuing up lookup stub for newloc */ - gf_log (this->name, GF_LOG_WARNING, - "could not queue lookup stub for path (%s)", - newloc->path); - goto unwind; - } - - if (newloc->inode != NULL) { - ret = inode_ctx_get (newloc->inode, this, &value); - if (ret == 0) { - new_inode_ctx = (sp_inode_ctx_t *)(long)value; - if (new_inode_ctx != NULL) { - LOCK (&new_inode_ctx->lock); - { - new_inode_looked_up - = new_inode_ctx->looked_up; - new_op_ret = new_inode_ctx->op_ret; - new_op_errno = new_inode_ctx->op_errno; - } - UNLOCK (&new_inode_ctx->lock); - } - } - } - - if (new_inode_ctx == NULL) { - if (old_op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "lookup-behind has failed " - "for path (%s)(%s), unwinding rename call " - "waiting on it", oldloc->path, - strerror (old_op_errno)); - - op_errno = old_op_errno; - goto unwind; - } else { - can_wind = 1; - } - } else { - if (new_inode_looked_up && old_inode_looked_up) { - if ((old_op_ret == -1) - || ((new_op_ret == -1) - && (new_op_errno != ENOENT))) { - if (old_op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "lookup-behind has failed " - "for path (%s)(%s), unwinding " - "rename call waiting on it", - oldloc->path, - strerror (old_op_errno)); - op_errno = old_op_errno; - } else { - gf_log (this->name, GF_LOG_WARNING, - "lookup-behind has failed " - "for path (%s)(%s), unwinding " - "rename call waiting on it", - newloc->path, - strerror (new_op_errno)); - op_errno = new_op_errno; - } - - goto unwind; - } else { - can_wind = 1; - } - } - } - - if (can_wind) { - STACK_WIND (frame, sp_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc); - } - - return 0; - -unwind: - SP_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, - NULL); - return 0; -} - - -int32_t -sp_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,loc_t *newloc) -{ - char need_unwind = 1; - uint64_t value = 0; - call_stub_t *stub = NULL; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = -1, op_errno = EINVAL; - char old_inode_can_wind = 0, new_inode_can_wind = 0; - char old_inode_need_lookup = 0, new_inode_need_lookup = 0; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this ? frame->this->name : "stat-prefetch", - this, out); - GF_VALIDATE_OR_GOTO (this->name, oldloc, out); - GF_VALIDATE_OR_GOTO (this->name, oldloc->path, out); - GF_VALIDATE_OR_GOTO (this->name, oldloc->name, out); - GF_VALIDATE_OR_GOTO (this->name, oldloc->inode, out); - - GF_VALIDATE_OR_GOTO (this->name, newloc, out); - GF_VALIDATE_OR_GOTO (this->name, newloc->path, out); - - sp_remove_caches_from_all_fds_opened (this, oldloc->parent, - (char *)oldloc->name); - - sp_remove_caches_from_all_fds_opened (this, newloc->parent, - (char *)newloc->name); - - ret = sp_cache_remove_parent_entry (frame, this, oldloc->parent->table, - (char *)oldloc->path); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "cannot remove parent entry from grand-parent's cache " - "for path (%s)", oldloc->path); - goto out; - } - - ret = sp_cache_remove_parent_entry (frame, this, newloc->parent->table, - (char *)newloc->path); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "cannot remove parent entry from grand-parent's cache " - "for path (%s)", newloc->path); - goto out; - } - - if (IA_ISDIR (oldloc->inode->ia_type)) { - sp_remove_caches_from_all_fds_opened (this, oldloc->inode, - NULL); - } - - stub = fop_rename_stub (frame, sp_rename_helper, oldloc, newloc); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - ret = sp_process_inode_ctx (frame, this, oldloc, stub, &need_unwind, - &old_inode_need_lookup, &old_inode_can_wind, - &op_errno); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, "processing stat-prefetch " - "context in inode (ino:%"PRId64", gfid:%s) (path:%s) " - "failed (%s)", oldloc->inode->ino, - uuid_utoa (oldloc->inode->gfid), oldloc->path, - strerror (op_errno)); - goto out; - } - - if (newloc->inode != NULL) { - stub = fop_rename_stub (frame, sp_rename_helper, oldloc, - newloc); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - ret = sp_process_inode_ctx (frame, this, newloc, stub, - &need_unwind, - &new_inode_need_lookup, - &new_inode_can_wind, &op_errno); - if (ret == -1) { - ret = inode_ctx_get (oldloc->inode, this, &value); - - inode_ctx = (sp_inode_ctx_t *)(long)value; - if (inode_ctx == NULL) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode" - " (ino:%"PRId64", gfid:%s) (path:%s)", - oldloc->inode->ino, - uuid_utoa (oldloc->inode->gfid), - oldloc->path); - goto out; - } - - LOCK (&inode_ctx->lock); - { - if (!inode_ctx->looked_up) { - /* unwind in sp_rename_helper */ - need_unwind = 0; - inode_ctx->need_unwind = 1; - } - } - UNLOCK (&inode_ctx->lock); - } - - } else { - new_inode_can_wind = 1; - } - -out: - if (need_unwind) { - SP_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL); - } else if (old_inode_need_lookup || new_inode_need_lookup) { - if (old_inode_need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, oldloc, - NULL); - } - - if (new_inode_need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, newloc, - NULL); - } - } else if (old_inode_can_wind && new_inode_can_wind) { - STACK_WIND (frame, sp_rename_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->rename, oldloc, newloc); - } - - return 0; -} - - -int32_t -sp_setxattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *dict, int32_t flags) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO (this->name, inode_ctx, unwind); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - } - UNLOCK (&inode_ctx->lock); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "lookup-behind has failed for path (%s)(%s), " - "unwinding setxattr call waiting on it", loc->path, - strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, loc, dict, - flags); - - return 0; - -unwind: - SP_STACK_UNWIND (setxattr, frame, -1, op_errno); - return 0; -} - - -int32_t -sp_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict, - int32_t flags) -{ - int32_t op_errno = EINVAL; - call_stub_t *stub = NULL; - char can_wind = 0, need_lookup = 0, need_unwind = 1; - - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->name, out); - - sp_remove_caches_from_all_fds_opened (this, loc->parent, - (char *)loc->name); - - stub = fop_setxattr_stub (frame, sp_setxattr_helper, loc, dict, flags); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - sp_process_inode_ctx (frame, this, loc, stub, &need_unwind, - &need_lookup, &can_wind, &op_errno); - -out: - if (need_unwind) { - SP_STACK_UNWIND (setxattr, frame, -1, op_errno); - } else if (need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, NULL); - } else if (can_wind) { - STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, loc, dict, - flags); - } - - return 0; -} - - -int32_t -sp_removexattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO (this->name, inode_ctx, unwind); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - } - UNLOCK (&inode_ctx->lock); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "lookup-behind has failed for path (%s)(%s), " - "unwinding setxattr call waiting on it", loc->path, - strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, loc, name); - - return 0; - -unwind: - SP_STACK_UNWIND (removexattr, frame, -1, op_errno); - return 0; -} - - -int32_t -sp_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name) -{ - int32_t op_errno = EINVAL; - call_stub_t *stub = NULL; - char can_wind = 0, need_lookup = 0, need_unwind = 1; - - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->name, out); - - sp_remove_caches_from_all_fds_opened (this, loc->parent, - (char *)loc->name); - - stub = fop_removexattr_stub (frame, sp_removexattr_helper, loc, name); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - sp_process_inode_ctx (frame, this, loc, stub, &need_unwind, - &need_lookup, &can_wind, &op_errno); - -out: - if (need_unwind) { - SP_STACK_UNWIND (removexattr, frame, -1, op_errno); - } else if (need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, NULL); - } else if (can_wind) { - STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, loc, name); - } - - return 0; -} - - -int32_t -sp_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) -{ - GF_ASSERT (frame); - SP_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict); - return 0; -} - - -int32_t -sp_getxattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO (this->name, inode_ctx, unwind); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - } - UNLOCK (&inode_ctx->lock); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "lookup-behind has failed for path (%s)(%s), " - "unwinding getxattr call waiting on it", loc->path, - strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, sp_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, loc, name); - - return 0; - -unwind: - SP_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL); - return 0; -} - - -int32_t -sp_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name) -{ - int32_t op_errno = EINVAL; - call_stub_t *stub = NULL; - char can_wind = 0, need_lookup = 0, need_unwind = 1; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - stub = fop_getxattr_stub (frame, sp_getxattr_helper, loc, name); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - sp_process_inode_ctx (frame, this, loc, stub, &need_unwind, - &need_lookup, &can_wind, &op_errno); - -out: - if (need_unwind) { - SP_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL); - } else if (need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, NULL); - } else if (can_wind) { - STACK_WIND (frame, sp_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, loc, name); - } - - return 0; -} - - -int32_t -sp_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) -{ - GF_ASSERT (frame); - SP_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict); - return 0; -} - - -int32_t -sp_xattrop_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, - gf_xattrop_flags_t flags, dict_t *dict) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO (this->name, inode_ctx, unwind); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - } - UNLOCK (&inode_ctx->lock); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "lookup-behind has failed for path (%s)(%s), " - "unwinding xattrop call waiting on it", loc->path, - strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, sp_xattrop_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->xattrop, loc, flags, dict); - - return 0; - -unwind: - SP_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL); - return 0; -} - - -int32_t -sp_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc, - gf_xattrop_flags_t flags, dict_t *dict) -{ - int32_t op_errno = EINVAL; - call_stub_t *stub = NULL; - char can_wind = 0, need_lookup = 0, need_unwind = 1; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->name, out); - - sp_remove_caches_from_all_fds_opened (this, loc->parent, - (char *)loc->name); - - stub = fop_xattrop_stub (frame, sp_xattrop_helper, loc, flags, dict); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - sp_process_inode_ctx (frame, this, loc, stub, &need_unwind, - &need_lookup, &can_wind, &op_errno); - -out: - if (need_unwind) { - SP_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL); - } else if (need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, NULL); - } else if (can_wind) { - STACK_WIND (frame, sp_xattrop_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->xattrop, loc, flags, dict); - } - - return 0; -} - - -int32_t -sp_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd, - gf_xattrop_flags_t flags, dict_t *dict) -{ - sp_fd_ctx_t *fd_ctx = NULL; - uint64_t value = 0; - int32_t ret = 0, op_errno = EINVAL; - inode_t *parent = NULL; - char *name = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, fd, unwind); - - ret = fd_ctx_get (fd, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, "stat-prefetch context not " - "set in fd (%p) opened on inode (ino:%"PRId64", " - "gfid:%s", fd, fd->inode->ino, - uuid_utoa (fd->inode->gfid)); - goto unwind; - } - - fd_ctx = (void *)(long)value; - name = fd_ctx->name; - parent = fd_ctx->parent_inode; - - sp_remove_caches_from_all_fds_opened (this, parent, name); - - STACK_WIND (frame, sp_xattrop_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict); - return 0; - -unwind: - SP_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL); - return 0; -} - -int32_t -sp_stbuf_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *buf) -{ - GF_ASSERT (frame); - SP_STACK_UNWIND (stat, frame, op_ret, op_errno, buf); - return 0; -} - - -int32_t -sp_stat_helper (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO (this->name, inode_ctx, unwind); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - } - UNLOCK (&inode_ctx->lock); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "lookup-behind has failed for path (%s)(%s), " - "unwinding stat call waiting on it", loc->path, - strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, sp_stbuf_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc); - - return 0; - -unwind: - SP_STACK_UNWIND (stat, frame, -1, op_errno, NULL); - return 0; -} - - -int32_t -sp_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - int32_t op_errno = -1; - call_stub_t *stub = NULL; - char can_wind = 0, need_lookup = 0, need_unwind = 1; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - stub = fop_stat_stub (frame, sp_stat_helper, loc); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - sp_process_inode_ctx (frame, this, loc, stub, &need_unwind, - &need_lookup, &can_wind, &op_errno); - -out: - if (need_unwind) { - SP_STACK_UNWIND (stat, frame, -1, op_errno, NULL); - } else if (need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, NULL); - } else if (can_wind) { - STACK_WIND (frame, sp_stbuf_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc); - } - - return 0; -} - - -int32_t -sp_access_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO (this->name, inode_ctx, unwind); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - } - UNLOCK (&inode_ctx->lock); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "lookup-behind has failed for path (%s)(%s), " - "unwinding access call waiting on it", loc->path, - strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->access, loc, mask); - - return 0; - -unwind: - SP_STACK_UNWIND (access, frame, -1, op_errno); - return 0; -} - - -int32_t -sp_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask) -{ - int32_t op_errno = -1; - call_stub_t *stub = NULL; - char can_wind = 0, need_lookup = 0, need_unwind = 1; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - stub = fop_access_stub (frame, sp_access_helper, loc, mask); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - sp_process_inode_ctx (frame, this, loc, stub, &need_unwind, - &need_lookup, &can_wind, &op_errno); - -out: - if (need_unwind) { - SP_STACK_UNWIND (access, frame, -1, op_errno); - } else if (need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, NULL); - } else if (can_wind) { - STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->access, loc, mask); - } - - return 0; -} - - -int32_t -sp_inodelk_helper (call_frame_t *frame, xlator_t *this, const char *volume, - loc_t *loc, int32_t cmd, struct gf_flock *lock) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO (this->name, inode_ctx, unwind); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - } - UNLOCK (&inode_ctx->lock); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "lookup-behind has failed for path (%s)(%s), " - "unwinding inodelk call waiting on it", loc->path, - strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, volume, loc, cmd, lock); - - return 0; - -unwind: - SP_STACK_UNWIND (inodelk, frame, -1, op_errno); - return 0; -} - - -int32_t -sp_inodelk (call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, - int32_t cmd, struct gf_flock *lock) -{ - int32_t op_errno = EINVAL; - call_stub_t *stub = NULL; - char can_wind = 0, need_lookup = 0, need_unwind = 1; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - stub = fop_inodelk_stub (frame, sp_inodelk_helper, volume, loc, cmd, - lock); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - sp_process_inode_ctx (frame, this, loc, stub, &need_unwind, - &need_lookup, &can_wind, &op_errno); - -out: - if (need_unwind) { - SP_STACK_UNWIND (inodelk, frame, -1, op_errno); - } else if (need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, NULL); - } else if (can_wind) { - STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->inodelk, volume, loc, cmd, - lock); - } - - return 0; -} - - -int32_t -sp_entrylk_helper (call_frame_t *frame, xlator_t *this, const char *volume, - loc_t *loc, const char *basename, entrylk_cmd cmd, - entrylk_type type) -{ - uint64_t value = 0; - sp_inode_ctx_t *inode_ctx = NULL; - int32_t ret = 0, op_ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, unwind); - GF_VALIDATE_OR_GOTO (this->name, loc, unwind); - - ret = inode_ctx_get (loc->inode, this, &value); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "stat-prefetch context not set in inode " - "(ino:%"PRId64" gfid:%s)", loc->inode->ino, - uuid_utoa (loc->inode->gfid)); - goto unwind; - } - - inode_ctx = (sp_inode_ctx_t *)(long) value; - GF_VALIDATE_OR_GOTO (this->name, inode_ctx, unwind); - - LOCK (&inode_ctx->lock); - { - op_ret = inode_ctx->op_ret; - op_errno = inode_ctx->op_errno; - } - UNLOCK (&inode_ctx->lock); - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "lookup-behind has failed for path (%s)(%s), " - "unwinding entrylk call waiting on it", loc->path, - strerror (op_errno)); - goto unwind; - } - - STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->entrylk, volume, loc, basename, - cmd, type); - - return 0; - -unwind: - SP_STACK_UNWIND (entrylk, frame, -1, op_errno); - return 0; -} - - -int32_t -sp_entrylk (call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc, - const char *basename, entrylk_cmd cmd, entrylk_type type) -{ - int32_t op_errno = EINVAL; - call_stub_t *stub = NULL; - char can_wind = 0, need_lookup = 0, need_unwind = 1; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - GF_VALIDATE_OR_GOTO (this->name, loc, out); - GF_VALIDATE_OR_GOTO (this->name, loc->inode, out); - - stub = fop_entrylk_stub (frame, sp_entrylk_helper, volume, loc, - basename, cmd, type); - if (stub == NULL) { - op_errno = ENOMEM; - goto out; - } - - sp_process_inode_ctx (frame, this, loc, stub, &need_unwind, - &need_lookup, &can_wind, &op_errno); - -out: - if (need_unwind) { - SP_STACK_UNWIND (entrylk, frame, -1, op_errno); - } else if (need_lookup) { - STACK_WIND (frame, sp_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, NULL); - } else if (can_wind) { - STACK_WIND (frame, sp_err_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->entrylk, volume, loc, - basename, cmd, type); - } - - return 0; -} - - -int32_t -sp_forget (xlator_t *this, inode_t *inode) -{ - struct iatt *buf = NULL; - uint64_t value = 0; - - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - GF_VALIDATE_OR_GOTO (this->name, inode, out); - - inode_ctx_del (inode, this, &value); - - if (value) { - buf = (void *)(long)value; - GF_FREE (buf); - } - -out: - return 0; -} - - -int32_t -sp_release (xlator_t *this, fd_t *fd) -{ - sp_fd_ctx_t *fd_ctx = NULL; - uint64_t value = 0; - int32_t ret = 0; - sp_cache_t *cache = NULL; - - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - ret = fd_ctx_del (fd, this, &value); - if (!ret) { - fd_ctx = (void *)(long) value; - cache = fd_ctx->cache; - if (cache) { - gf_log (this->name, GF_LOG_TRACE, "cache hits: %lu, " - "cache miss: %lu", cache->hits, cache->miss); - } - - sp_fd_ctx_free (fd_ctx); - } - -out: - return 0; -} - - -struct sp_cache_dump { - int i; - char *key_prefix; -}; -typedef struct sp_cache_dump sp_cache_dump_t; - -void -sp_cache_traverse (void *data, void *mydata) -{ - gf_dirent_t *dirent = NULL; - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; - char uuidbuf[256] = {0, }; - sp_cache_dump_t *dump = NULL; - - if ((data == NULL) || (mydata == NULL)) { - goto out; - } - - dirent = data; - dump = mydata; - - gf_proc_dump_build_key (key, dump->key_prefix, "entry[%d].name", - dump->i); - gf_proc_dump_write (key, "%s", dirent->d_name); - - uuid_unparse (dirent->d_stat.ia_gfid, uuidbuf); - gf_proc_dump_build_key (key, dump->key_prefix, "entry[%d].inode.gfid", - dump->i); - gf_proc_dump_write (key, "%s", uuidbuf); - - gf_proc_dump_build_key(key, dump->key_prefix, "entry[%d].inode.ino", - dump->i); - gf_proc_dump_write(key, "%ld", dirent->d_stat.ia_ino); - - dump->i++; -out: - return; -} - - -int32_t -sp_fdctx_dump (xlator_t *this, fd_t *fd) -{ - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; - char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; - char uuidbuf[256] = {0, }; - sp_cache_t *cache = NULL; - int32_t ret = -1; - sp_cache_dump_t *dump = NULL; - char *parent = NULL; - - cache = sp_get_cache_fd (this, fd); - if (cache == NULL) { - ret = 0; - goto out; - } - - dump = GF_CALLOC(1, sizeof (*dump), gf_common_mt_char); - if (dump == NULL) { - goto out; - } - - gf_proc_dump_build_key (key_prefix, - "xlator.performance.stat-prefetch", - "fdctx"); - gf_proc_dump_add_section (key_prefix); - - gf_proc_dump_build_key (key, key_prefix, "fd"); - gf_proc_dump_write (key, "%p", fd); - - ret = __inode_path (fd->inode, NULL, &parent); - if (parent != NULL) { - gf_proc_dump_build_key (key, key_prefix, "name"); - gf_proc_dump_write (key, "%s", parent); - GF_FREE (parent); - } - - uuid_unparse (fd->inode->gfid, uuidbuf); - gf_proc_dump_build_key (key, key_prefix, "fd.inode.gfid"); - gf_proc_dump_write (key, "%s", uuidbuf); - - gf_proc_dump_build_key (key, key_prefix, "fd.inode.ino"); - gf_proc_dump_write (key, "%ld", fd->inode->ino); - - gf_proc_dump_build_key (key, key_prefix, "miss"); - gf_proc_dump_write (key, "%lu", cache->miss); - - gf_proc_dump_build_key (key, key_prefix, "hits"); - gf_proc_dump_write (key, "%lu", cache->hits); - - gf_proc_dump_build_key (key, key_prefix, "cache"); - dump->key_prefix = key; - - rbthash_table_traverse (cache->table, sp_cache_traverse, dump); - - GF_FREE (dump); - ret = 0; -out: - return ret; -} - -int32_t -sp_inodectx_dump (xlator_t *this, inode_t *inode) -{ - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; - char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; - char uuidbuf[256] = {0, }; - sp_inode_ctx_t *inode_ctx = NULL; - call_stub_t *stub = NULL; - uint64_t value = 0; - int32_t ret = -1, i = 0; - - if ((this == NULL) || (inode == NULL)) { - goto out; - } - - ret = inode_ctx_get (inode, this, &value); - if (ret == 0) { - inode_ctx = (sp_inode_ctx_t *)(long)value; - } - - if (inode_ctx == NULL) { - goto out; - } - - gf_proc_dump_build_key (key_prefix, - "xlator.performance.stat-prefetch", - "inodectx"); - gf_proc_dump_add_section (key_prefix); - - uuid_unparse (inode->gfid, uuidbuf); - gf_proc_dump_build_key (key, key_prefix, "inode.gfid"); - gf_proc_dump_write (key, "%s", uuidbuf); - - gf_proc_dump_build_key (key, key_prefix, "inode.ino"); - gf_proc_dump_write (key, "%ld", inode->ino); - - LOCK (&inode_ctx->lock); - { - gf_proc_dump_build_key (key, key_prefix, "looked_up"); - gf_proc_dump_write (key, "%s", - inode_ctx->looked_up ? "yes" : "no"); - - gf_proc_dump_build_key (key, key_prefix, "lookup_in_progress"); - gf_proc_dump_write (key, "%s", - inode_ctx->lookup_in_progress ? - "yes" : "no"); - - gf_proc_dump_build_key (key, key_prefix, "need_unwind"); - gf_proc_dump_write (key, "%s", inode_ctx->need_unwind ? - "yes" : "no"); - - gf_proc_dump_build_key (key, key_prefix, "op_ret"); - gf_proc_dump_write (key, "%d", inode_ctx->op_ret); - - gf_proc_dump_build_key (key, key_prefix, "op_errno"); - gf_proc_dump_write (key, "%d", inode_ctx->op_errno); - - list_for_each_entry (stub, &inode_ctx->waiting_ops, list) { - gf_proc_dump_build_key (key, key_prefix, - "waiting-ops[%d].frame", i); - gf_proc_dump_write (key, "%"PRId64, - stub->frame->root->unique); - - gf_proc_dump_build_key (key, key_prefix, - "waiting-ops[%d].fop", i); - gf_proc_dump_write (key, "%s", gf_fop_list[stub->fop]); - - i++; - } - } - UNLOCK (&inode_ctx->lock); -out: - return ret; -} - -int -sp_priv_dump (xlator_t *this) -{ - sp_private_t *priv = NULL; - uint32_t total_entries = 0; - uint32_t ret = -1; - char key[GF_DUMP_MAX_BUF_LEN]; - char key_prefix[GF_DUMP_MAX_BUF_LEN]; - - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - GF_VALIDATE_OR_GOTO (this->name, this->private, out); - - priv = this->private; - - total_entries = priv->entries; - - gf_proc_dump_build_key (key_prefix, "xlator.performance.stat-prefetch", - "priv"); - gf_proc_dump_add_section (key_prefix); - - gf_proc_dump_build_key (key, key_prefix, "max_allowed_entries"); - gf_proc_dump_write (key, "%lu", GF_SP_CACHE_ENTRIES_EXPECTED); - gf_proc_dump_build_key (key, key_prefix, "num_entries_cached"); - gf_proc_dump_write (key, "%lu",(unsigned long)total_entries); - ret = 0; - -out: - return ret; -} - - -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - - ret = xlator_mem_acct_init (this, gf_sp_mt_end + 1); - - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - goto out; - } - -out: - return ret; -} - - -int32_t -init (xlator_t *this) -{ - int32_t ret = -1; - sp_private_t *priv = NULL; - - GF_VALIDATE_OR_GOTO ("stat-prefetch", this, out); - - if (!this->children || this->children->next) { - gf_log ("stat-prefetch", - GF_LOG_ERROR, - "FATAL: translator %s does not have exactly one child " - "node", this->name); - goto out; - } - - priv = GF_CALLOC (1, sizeof(sp_private_t), gf_sp_mt_sp_private_t); - LOCK_INIT (&priv->lock); - - this->private = priv; - - ret = 0; -out: - return ret; -} - -void -fini (xlator_t *this) -{ - sp_private_t *priv = NULL; - - if (!this) - goto out; - else { - priv = this->private; - if (priv) { - if (priv->mem_pool) - mem_pool_destroy (priv->mem_pool); - LOCK_DESTROY (&priv->lock); - GF_FREE (priv); - this->private = NULL; - } - } -out: - return; -} - - -struct xlator_fops fops = { - .lookup = sp_lookup, - .readdir = sp_readdir, - .readdirp = sp_readdir, - .open = sp_open, - .create = sp_create, - .opendir = sp_opendir, - .mkdir = sp_mkdir, - .mknod = sp_mknod, - .symlink = sp_symlink, - .link = sp_link, - .truncate = sp_truncate, - .ftruncate = sp_ftruncate, - .readlink = sp_readlink, - .unlink = sp_unlink, - .rmdir = sp_rmdir, - .readv = sp_readv, - .writev = sp_writev, - .fsync = sp_fsync, - .rename = sp_rename, - .setxattr = sp_setxattr, - .removexattr = sp_removexattr, - .xattrop = sp_xattrop, - .fxattrop = sp_fxattrop, - .setattr = sp_setattr, - .stat = sp_stat, - .access = sp_access, - .getxattr = sp_getxattr, - .inodelk = sp_inodelk, - .entrylk = sp_entrylk, -}; - -struct xlator_cbks cbks = { - .forget = sp_forget, - .release = sp_release, - .releasedir = sp_release -}; - -struct xlator_dumpops dumpops = { - .priv = sp_priv_dump, - .inodectx = sp_inodectx_dump, - .fdctx = sp_fdctx_dump -}; diff --git a/xlators/performance/stat-prefetch/src/stat-prefetch.h b/xlators/performance/stat-prefetch/src/stat-prefetch.h deleted file mode 100644 index ed84719e4..000000000 --- a/xlators/performance/stat-prefetch/src/stat-prefetch.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - Copyright (c) 2009-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ - -#ifndef _STAT_PREFETCH_H -#define _STAT_PREFETCH_H - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "locking.h" -#include "inode.h" -#include "glusterfs.h" -#include "dict.h" -#include "xlator.h" -#include "rbthash.h" -#include "hashfn.h" -#include "call-stub.h" -#include "stat-prefetch-mem-types.h" -#include <libgen.h> - -struct sp_cache { - rbthash_table_t *table; - xlator_t *this; - uint64_t expected_offset; /* Offset where the next read will - * happen. - */ - gf_lock_t lock; - unsigned long miss; - unsigned long hits; - uint32_t ref; -}; -typedef struct sp_cache sp_cache_t; - -struct sp_fd_ctx { - sp_cache_t *cache; - inode_t *parent_inode; /* - * inode corresponding to dirname (path) - */ - char *name; /* - * basename of path on which this fd is - * opened - */ -}; -typedef struct sp_fd_ctx sp_fd_ctx_t; - -struct sp_local { - loc_t loc; - fd_t *fd; - char is_lookup; -}; -typedef struct sp_local sp_local_t; - -struct sp_inode_ctx { - char looked_up; - char lookup_in_progress; - char need_unwind; - int32_t op_ret; - int32_t op_errno; - struct iatt stbuf; - gf_lock_t lock; - struct list_head waiting_ops; -}; -typedef struct sp_inode_ctx sp_inode_ctx_t; - -struct sp_private { - struct mem_pool *mem_pool; - uint32_t entries; - gf_lock_t lock; -}; -typedef struct sp_private sp_private_t; - -void sp_local_free (sp_local_t *local); - -#define SP_STACK_UNWIND(op, frame, params ...) do { \ - sp_local_t *__local = frame->local; \ - frame->local = NULL; \ - STACK_UNWIND_STRICT (op, frame, params); \ - sp_local_free (__local); \ - } while (0) - -#define SP_STACK_DESTROY(frame) do { \ - sp_local_t *__local = frame->local; \ - frame->local = NULL; \ - STACK_DESTROY (frame->root); \ - sp_local_free (__local); \ - } while (0) - -#endif /* #ifndef _STAT_PREFETCH_H */ diff --git a/xlators/performance/symlink-cache/src/Makefile.am b/xlators/performance/symlink-cache/src/Makefile.am index 06e85fc92..4091c3293 100644 --- a/xlators/performance/symlink-cache/src/Makefile.am +++ b/xlators/performance/symlink-cache/src/Makefile.am @@ -1,12 +1,13 @@ xlator_LTLIBRARIES = symlink-cache.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/performance -symlink_cache_la_LDFLAGS = -module -avoidversion +symlink_cache_la_LDFLAGS = -module -avoid-version symlink_cache_la_SOURCES = symlink-cache.c symlink_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ - -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) CLEANFILES = diff --git a/xlators/performance/symlink-cache/src/symlink-cache.c b/xlators/performance/symlink-cache/src/symlink-cache.c index a82786cce..3b5fbc252 100644 --- a/xlators/performance/symlink-cache/src/symlink-cache.c +++ b/xlators/performance/symlink-cache/src/symlink-cache.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ @@ -146,8 +137,7 @@ sc_cache_set (xlator_t *this, inode_t *inode, struct iatt *buf, err: if (sc) { - if (sc->readlink) - FREE (sc->readlink); + FREE (sc->readlink); sc->readlink = NULL; FREE (sc); } @@ -242,7 +232,7 @@ sc_cache_get (xlator_t *this, inode_t *inode, char **link) int sc_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, - const char *link, struct iatt *sbuf) + const char *link, struct iatt *sbuf, dict_t *xdata) { if (op_ret > 0) sc_cache_update (this, frame->local, link); @@ -250,14 +240,15 @@ sc_readlink_cbk (call_frame_t *frame, void *cookie, inode_unref (frame->local); frame->local = NULL; - STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, link, sbuf); + STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, link, sbuf, + xdata); return 0; } int sc_readlink (call_frame_t *frame, xlator_t *this, - loc_t *loc, size_t size) + loc_t *loc, size_t size, dict_t *xdata) { char *link = NULL; struct iatt buf = {0, }; @@ -275,7 +266,8 @@ sc_readlink (call_frame_t *frame, xlator_t *this, using buf in readlink_cbk should be aware that @buf is 0 filled */ - STACK_UNWIND_STRICT (readlink, frame, strlen (link), 0, link, &buf); + STACK_UNWIND_STRICT (readlink, frame, strlen (link), 0, link, + &buf, NULL); FREE (link); return 0; } @@ -285,7 +277,7 @@ sc_readlink (call_frame_t *frame, xlator_t *this, STACK_WIND (frame, sc_readlink_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readlink, - loc, size); + loc, size, xdata); return 0; } @@ -295,7 +287,7 @@ int sc_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { if (op_ret == 0) { if (frame->local) { @@ -303,22 +295,22 @@ sc_symlink_cbk (call_frame_t *frame, void *cookie, } } - STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf, preparent, - postparent); + STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); return 0; } int sc_symlink (call_frame_t *frame, xlator_t *this, - const char *dst, loc_t *src, dict_t *params) + const char *dst, loc_t *src, mode_t umask, dict_t *xdata) { frame->local = strdup (dst); STACK_WIND (frame, sc_symlink_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink, - dst, src, params); + dst, src, umask, xdata); return 0; } @@ -327,7 +319,7 @@ sc_symlink (call_frame_t *frame, xlator_t *this, int sc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, - inode_t *inode, struct iatt *buf, dict_t *xattr, + inode_t *inode, struct iatt *buf, dict_t *xdata, struct iatt *postparent) { if (op_ret == 0) @@ -335,19 +327,20 @@ sc_lookup_cbk (call_frame_t *frame, void *cookie, else sc_cache_flush (this, inode); - STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xattr, postparent); + STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, + xdata, postparent); return 0; } int sc_lookup (call_frame_t *frame, xlator_t *this, - loc_t *loc, dict_t *xattr_req) + loc_t *loc, dict_t *xdata) { STACK_WIND (frame, sc_lookup_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, - loc, xattr_req); + loc, xdata); return 0; } @@ -363,10 +356,9 @@ sc_forget (xlator_t *this, } -int32_t +int32_t init (xlator_t *this) { - if (!this->children || this->children->next) { gf_log (this->name, GF_LOG_ERROR, diff --git a/xlators/performance/write-behind/src/Makefile.am b/xlators/performance/write-behind/src/Makefile.am index a5ebc90bd..6c829d8ee 100644 --- a/xlators/performance/write-behind/src/Makefile.am +++ b/xlators/performance/write-behind/src/Makefile.am @@ -1,14 +1,15 @@ xlator_LTLIBRARIES = write-behind.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance -write_behind_la_LDFLAGS = -module -avoidversion +write_behind_la_LDFLAGS = -module -avoid-version write_behind_la_SOURCES = write-behind.c write_behind_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la noinst_HEADERS = write-behind-mem-types.h -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\ - -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) CLEANFILES = diff --git a/xlators/performance/write-behind/src/write-behind-mem-types.h b/xlators/performance/write-behind/src/write-behind-mem-types.h index 5a3ee4aed..f64f429ce 100644 --- a/xlators/performance/write-behind/src/write-behind-mem-types.h +++ b/xlators/performance/write-behind/src/write-behind-mem-types.h @@ -1,20 +1,11 @@ /* - Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com> - This file is part of GlusterFS. + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ @@ -25,10 +16,10 @@ enum gf_wb_mem_types_ { gf_wb_mt_wb_file_t = gf_common_mt_end + 1, - gf_wb_mt_wb_local_t, gf_wb_mt_wb_request_t, gf_wb_mt_iovec, gf_wb_mt_wb_conf_t, + gf_wb_mt_wb_inode_t, gf_wb_mt_end }; #endif diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c index 4d47eb54e..95c5921c6 100644 --- a/xlators/performance/write-behind/src/write-behind.c +++ b/xlators/performance/write-behind/src/write-behind.c @@ -1,24 +1,13 @@ /* - Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ -/*TODO: check for non null wb_file_data before getting wb_file */ - #ifndef _CONFIG_H #define _CONFIG_H @@ -35,1581 +24,977 @@ #include "common-utils.h" #include "call-stub.h" #include "statedump.h" +#include "defaults.h" #include "write-behind-mem-types.h" -#define MAX_VECTOR_COUNT 8 -#define WB_AGGREGATE_SIZE 131072 /* 128 KB */ -#define WB_WINDOW_SIZE 1048576 /* 1MB */ +#define MAX_VECTOR_COUNT 8 +#define WB_AGGREGATE_SIZE 131072 /* 128 KB */ +#define WB_WINDOW_SIZE 1048576 /* 1MB */ typedef struct list_head list_head_t; struct wb_conf; -struct wb_page; -struct wb_file; - -typedef struct wb_file { - int disabled; - uint64_t disable_till; - size_t window_conf; - size_t window_current; - int32_t flags; - size_t aggregate_current; - int32_t refcount; - int32_t op_ret; - int32_t op_errno; - list_head_t request; - list_head_t passive_requests; - fd_t *fd; +struct wb_inode; + +typedef struct wb_inode { + ssize_t window_conf; + ssize_t window_current; + ssize_t transit; /* size of data stack_wound, and yet + to be fulfilled (wb_fulfill_cbk). + used for trickling_writes + */ + + list_head_t all; /* All requests, from enqueue() till destroy(). + Used only for resetting generation + number when empty. + */ + list_head_t todo; /* Work to do (i.e, STACK_WIND to server). + Once we STACK_WIND, the entry is taken + off the list. If it is non-sync write, + then we continue to track it via @liability + or @temptation depending on the status + of its writeback. + */ + list_head_t liability; /* Non-sync writes which are lied + (STACK_UNWIND'ed to caller) but ack + from server not yet complete. This + is the "liability" which we hold, and + must guarantee that dependent operations + which arrive later (which overlap, etc.) + are issued only after their dependencies + in this list are "fulfilled". + + Server acks for entries in this list + shrinks the window. + + The sum total of all req->write_size + of entries in this list must be kept less + than the permitted window size. + */ + list_head_t temptation; /* Operations for which we are tempted + to 'lie' (write-behind), but temporarily + holding off (because of insufficient + window capacity, etc.) + + This is the list to look at to grow + the window (in __wb_pick_unwinds()). + + Entries typically get chosen from + write-behind from this list, and therefore + get "upgraded" to the "liability" list. + */ + list_head_t wip; /* List of write calls in progress, SYNC or non-SYNC + which are currently STACK_WIND'ed towards the server. + This is for guaranteeing that no two overlapping + writes are in progress at the same time. Modules + like eager-lock in AFR depend on this behavior. + */ + uint64_t gen; /* Liability generation number. Represents + the current 'state' of liability. Every + new addition to the liability list bumps + the generation number. + + a newly arrived request is only required + to perform causal checks against the entries + in the liability list which were present + at the time of its addition. the generation + number at the time of its addition is stored + in the request and used during checks. + + the liability list can grow while the request + waits in the todo list waiting for its + dependent operations to complete. however + it is not of the request's concern to depend + itself on those new entries which arrived + after it arrived (i.e, those that have a + liability generation higher than itself) + */ gf_lock_t lock; xlator_t *this; -}wb_file_t; +} wb_inode_t; + typedef struct wb_request { - list_head_t list; - list_head_t winds; - list_head_t unwinds; - list_head_t other_requests; - call_stub_t *stub; - size_t write_size; - int32_t refcount; - wb_file_t *file; - glusterfs_fop_t fop; - union { - struct { - char write_behind; - char stack_wound; - char got_reply; - char virgin; - char flush_all; /* while trying to sync to back-end, - * don't wait till a data of size - * equal to configured aggregate-size - * is accumulated, instead sync - * whatever data currently present in - * request queue. - */ - - }write_request; - - struct { - char marked_for_resume; - }other_requests; - }flags; + list_head_t all; + list_head_t todo; + list_head_t lie; /* either in @liability or @temptation */ + list_head_t winds; + list_head_t unwinds; + list_head_t wip; + + call_stub_t *stub; + + ssize_t write_size; /* currently held size + (after collapsing) */ + size_t orig_size; /* size which arrived with the request. + This is the size by which we grow + the window when unwinding the frame. + */ + size_t total_size; /* valid only in @head in wb_fulfill(). + This is the size with which we perform + STACK_WIND to server and therefore the + amount by which we shrink the window. + */ + + int op_ret; + int op_errno; + + int32_t refcount; + wb_inode_t *wb_inode; + glusterfs_fop_t fop; + gf_lkowner_t lk_owner; + struct iobref *iobref; + uint64_t gen; /* inode liability state at the time of + request arrival */ + + fd_t *fd; + struct { + size_t size; /* 0 size == till infinity */ + off_t off; + int append:1; /* offset is invalid. only one + outstanding append at a time */ + int tempted:1; /* true only for non-sync writes */ + int lied:1; /* sin committed */ + int fulfilled:1; /* got server acknowledgement */ + int go:1; /* enough aggregating, good to go */ + } ordering; } wb_request_t; -struct wb_conf { - uint64_t aggregate_size; - uint64_t window_size; - uint64_t disable_till; - gf_boolean_t enable_O_SYNC; - gf_boolean_t flush_behind; - gf_boolean_t enable_trickling_writes; -}; - -typedef struct wb_local { - list_head_t winds; - int32_t flags; - int32_t wbflags; - struct wb_file *file; - wb_request_t *request; - int op_ret; - int op_errno; - call_frame_t *frame; - int32_t reply_count; -} wb_local_t; - -typedef struct wb_conf wb_conf_t; -typedef struct wb_page wb_page_t; -int32_t -wb_process_queue (call_frame_t *frame, wb_file_t *file); +typedef struct wb_conf { + uint64_t aggregate_size; + uint64_t window_size; + gf_boolean_t flush_behind; + gf_boolean_t trickling_writes; + gf_boolean_t strict_write_ordering; + gf_boolean_t strict_O_DIRECT; +} wb_conf_t; -ssize_t -wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds); -ssize_t -__wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_size, - char enable_trickling_writes); +void +wb_process_queue (wb_inode_t *wb_inode); -static int -__wb_request_unref (wb_request_t *this) +wb_inode_t * +__wb_inode_ctx_get (xlator_t *this, inode_t *inode) { - int ret = -1; + uint64_t value = 0; + wb_inode_t *wb_inode = NULL; - GF_VALIDATE_OR_GOTO ("write-behind", this, out); + __inode_ctx_get (inode, this, &value); + wb_inode = (wb_inode_t *)(unsigned long) value; - if (this->refcount <= 0) { - gf_log ("wb-request", GF_LOG_WARNING, - "refcount(%d) is <= 0", this->refcount); - goto out; - } - - ret = --this->refcount; - if (this->refcount == 0) { - list_del_init (&this->list); - if (this->stub && this->stub->fop == GF_FOP_WRITE) { - call_stub_destroy (this->stub); - } - - GF_FREE (this); - } - -out: - return ret; + return wb_inode; } -static int -wb_request_unref (wb_request_t *this) +wb_inode_t * +wb_inode_ctx_get (xlator_t *this, inode_t *inode) { - wb_file_t *file = NULL; - int ret = -1; + wb_inode_t *wb_inode = NULL; GF_VALIDATE_OR_GOTO ("write-behind", this, out); + GF_VALIDATE_OR_GOTO (this->name, inode, out); - file = this->file; - - LOCK (&file->lock); + LOCK (&inode->lock); { - ret = __wb_request_unref (this); + wb_inode = __wb_inode_ctx_get (this, inode); } - UNLOCK (&file->lock); - + UNLOCK (&inode->lock); out: - return ret; + return wb_inode; } -static wb_request_t * -__wb_request_ref (wb_request_t *this) +gf_boolean_t +wb_fd_err (fd_t *fd, xlator_t *this, int32_t *op_errno) { - GF_VALIDATE_OR_GOTO ("write-behind", this, out); + gf_boolean_t err = _gf_false; + uint64_t value = 0; + int32_t tmp = 0; - if (this->refcount < 0) { - gf_log ("wb-request", GF_LOG_WARNING, - "refcount(%d) is < 0", this->refcount); - this = NULL; - goto out; - } + if (fd_ctx_get (fd, this, &value) == 0) { + if (op_errno) { + tmp = value; + *op_errno = tmp; + } - this->refcount++; + err = _gf_true; + } -out: - return this; + return err; } -wb_request_t * -wb_request_ref (wb_request_t *this) -{ - wb_file_t *file = NULL; +/* + Below is a succinct explanation of the code deciding whether two regions + overlap, from Pavan <tcp@gluster.com>. - GF_VALIDATE_OR_GOTO ("write-behind", this, out); + For any two ranges to be non-overlapping, either the end of the first + range is lesser than the start of the second, or vice versa. Example - - file = this->file; - LOCK (&file->lock); - { - this = __wb_request_ref (this); - } - UNLOCK (&file->lock); + <---------> <--------------> + p q x y -out: - return this; -} + ( q < x ) or (y < p) = > No overlap. + To check for *overlap*, we can negate this (using de morgan's laws), and + it becomes - -wb_request_t * -wb_enqueue (wb_file_t *file, call_stub_t *stub) -{ - wb_request_t *request = NULL, *tmp = NULL; - call_frame_t *frame = NULL; - wb_local_t *local = NULL; - struct iovec *vector = NULL; - int32_t count = 0; + (q >= x ) and (y >= p) - GF_VALIDATE_OR_GOTO ("write-behind", file, out); - GF_VALIDATE_OR_GOTO (file->this->name, stub, out); + Either that, or you write the negation using - - request = GF_CALLOC (1, sizeof (*request), gf_wb_mt_wb_request_t); - if (request == NULL) { - goto out; - } + if (! ((q < x) or (y < p)) ) { + "Overlap" + } +*/ - INIT_LIST_HEAD (&request->list); - INIT_LIST_HEAD (&request->winds); - INIT_LIST_HEAD (&request->unwinds); - INIT_LIST_HEAD (&request->other_requests); +gf_boolean_t +wb_requests_overlap (wb_request_t *req1, wb_request_t *req2) +{ + uint64_t r1_start = 0; + uint64_t r1_end = 0; + uint64_t r2_start = 0; + uint64_t r2_end = 0; + enum _gf_boolean do_overlap = 0; + + r1_start = req1->ordering.off; + if (req1->ordering.size) + r1_end = r1_start + req1->ordering.size - 1; + else + r1_end = ULLONG_MAX; + + r2_start = req2->ordering.off; + if (req2->ordering.size) + r2_end = r2_start + req2->ordering.size - 1; + else + r2_end = ULLONG_MAX; + + do_overlap = ((r1_end >= r2_start) && (r2_end >= r1_start)); + + return do_overlap; +} - request->stub = stub; - request->file = file; - request->fop = stub->fop; - frame = stub->frame; - local = frame->local; - if (local) { - local->request = request; - } +gf_boolean_t +wb_requests_conflict (wb_request_t *lie, wb_request_t *req) +{ + wb_conf_t *conf = NULL; - if (stub->fop == GF_FOP_WRITE) { - vector = stub->args.writev.vector; - count = stub->args.writev.count; + conf = req->wb_inode->this->private; - request->write_size = iov_length (vector, count); - if (local) { - local->op_ret = request->write_size; - local->op_errno = 0; - } + if (lie == req) + /* request cannot conflict with itself */ + return _gf_false; - request->flags.write_request.virgin = 1; - } + if (lie->gen >= req->gen) + /* this liability entry was behind + us in the todo list */ + return _gf_false; - LOCK (&file->lock); - { - list_add_tail (&request->list, &file->request); - if (stub->fop == GF_FOP_WRITE) { - /* reference for stack winding */ - __wb_request_ref (request); - - /* reference for stack unwinding */ - __wb_request_ref (request); - - file->aggregate_current += request->write_size; - } else { - list_for_each_entry (tmp, &file->request, list) { - if (tmp->stub && tmp->stub->fop - == GF_FOP_WRITE) { - tmp->flags.write_request.flush_all = 1; - } - } - - /*reference for resuming */ - __wb_request_ref (request); - } - } - UNLOCK (&file->lock); + if (lie->ordering.append) + /* all modifications wait for the completion + of outstanding append */ + return _gf_true; -out: - return request; + if (conf->strict_write_ordering) + /* We are sure (lie->gen < req->gen) by now. So + skip overlap check if strict write ordering is + requested and always return "conflict" against a + lower generation lie. */ + return _gf_true; + + return wb_requests_overlap (lie, req); } -wb_file_t * -wb_file_create (xlator_t *this, fd_t *fd, int32_t flags) +gf_boolean_t +wb_liability_has_conflict (wb_inode_t *wb_inode, wb_request_t *req) { - wb_file_t *file = NULL; - wb_conf_t *conf = NULL; + wb_request_t *each = NULL; - GF_VALIDATE_OR_GOTO ("write-behind", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - conf = this->private; - - file = GF_CALLOC (1, sizeof (*file), gf_wb_mt_wb_file_t); - if (file == NULL) { - goto out; + list_for_each_entry (each, &wb_inode->liability, lie) { + if (wb_requests_conflict (each, req)) + return _gf_true; } - INIT_LIST_HEAD (&file->request); - INIT_LIST_HEAD (&file->passive_requests); - - /* - fd_ref() not required, file should never decide the existance of - an fd - */ - file->fd= fd; - file->disable_till = conf->disable_till; - file->this = this; - file->refcount = 1; - file->window_conf = conf->window_size; - file->flags = flags; - - LOCK_INIT (&file->lock); - - fd_ctx_set (fd, this, (uint64_t)(long)file); - -out: - return file; + return _gf_false; } -void -wb_file_destroy (wb_file_t *file) +gf_boolean_t +wb_wip_has_conflict (wb_inode_t *wb_inode, wb_request_t *req) { - int32_t refcount = 0; + wb_request_t *each = NULL; - GF_VALIDATE_OR_GOTO ("write-behind", file, out); + if (req->stub->fop != GF_FOP_WRITE) + /* non-writes fundamentally never conflict with WIP requests */ + return _gf_false; - LOCK (&file->lock); - { - refcount = --file->refcount; - } - UNLOCK (&file->lock); + list_for_each_entry (each, &wb_inode->wip, wip) { + if (each == req) + /* request never conflicts with itself, + though this condition should never occur. + */ + continue; - if (!refcount){ - LOCK_DESTROY (&file->lock); - GF_FREE (file); + if (wb_requests_overlap (each, req)) + return _gf_true; } -out: - return; + return _gf_false; } -int32_t -wb_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf) +static int +__wb_request_unref (wb_request_t *req) { - wb_local_t *local = NULL; - list_head_t *winds = NULL; - wb_file_t *file = NULL; - wb_request_t *request = NULL, *dummy = NULL; - wb_local_t *per_request_local = NULL; - int32_t ret = -1; - fd_t *fd = NULL; + int ret = -1; + wb_inode_t *wb_inode = NULL; - GF_ASSERT (frame); - GF_ASSERT (this); + wb_inode = req->wb_inode; - local = frame->local; - winds = &local->winds; + if (req->refcount <= 0) { + gf_log ("wb-request", GF_LOG_WARNING, + "refcount(%d) is <= 0", req->refcount); + goto out; + } - file = local->file; - GF_VALIDATE_OR_GOTO (this->name, file, out); + ret = --req->refcount; + if (req->refcount == 0) { + list_del_init (&req->todo); + list_del_init (&req->lie); + list_del_init (&req->wip); - LOCK (&file->lock); - { - list_for_each_entry_safe (request, dummy, winds, winds) { - request->flags.write_request.got_reply = 1; + list_del_init (&req->all); + if (list_empty (&wb_inode->all)) { + wb_inode->gen = 0; + /* in case of accounting errors? */ + wb_inode->window_current = 0; + } - if (!request->flags.write_request.write_behind - && (op_ret == -1)) { - per_request_local = request->stub->frame->local; - per_request_local->op_ret = op_ret; - per_request_local->op_errno = op_errno; - } + list_del_init (&req->winds); + list_del_init (&req->unwinds); - if (request->flags.write_request.write_behind) { - file->window_current -= request->write_size; - } + if (req->stub && req->ordering.tempted) { + call_stub_destroy (req->stub); + req->stub = NULL; + } /* else we would have call_resume()'ed */ - __wb_request_unref (request); - } + if (req->iobref) + iobref_unref (req->iobref); - if (op_ret == -1) { - file->op_ret = op_ret; - file->op_errno = op_errno; - } - fd = file->fd; - } - UNLOCK (&file->lock); - - ret = wb_process_queue (frame, file); - if (ret == -1) { - if (errno == ENOMEM) { - LOCK (&file->lock); - { - file->op_ret = -1; - file->op_errno = ENOMEM; - } - UNLOCK (&file->lock); - } + if (req->fd) + fd_unref (req->fd); - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); + GF_FREE (req); } - - /* safe place to do fd_unref */ - fd_unref (fd); - - STACK_DESTROY (frame->root); - out: - return 0; + return ret; } -ssize_t -wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds) +static int +wb_request_unref (wb_request_t *req) { - wb_request_t *dummy = NULL, *request = NULL; - wb_request_t *first_request = NULL, *next = NULL; - size_t total_count = 0, count = 0; - size_t copied = 0; - call_frame_t *sync_frame = NULL; - struct iobref *iobref = NULL; - wb_local_t *local = NULL; - struct iovec *vector = NULL; - ssize_t current_size = 0, bytes = 0; - size_t bytecount = 0; - wb_conf_t *conf = NULL; - fd_t *fd = NULL; - int32_t op_errno = -1; - - GF_VALIDATE_OR_GOTO_WITH_ERROR ((file ? file->this->name - : "write-behind"), frame, - out, bytes, -1); - GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, file, out, bytes, - -1); - GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, winds, out, bytes, - -1); - - conf = file->this->private; - list_for_each_entry (request, winds, winds) { - total_count += request->stub->args.writev.count; - if (total_count > 0) { - break; - } - } - - if (total_count == 0) { - gf_log (file->this->name, GF_LOG_TRACE, "no vectors are to be" - "synced"); - goto out; - } - - list_for_each_entry_safe (request, dummy, winds, winds) { - if (!vector) { - vector = GF_MALLOC (VECTORSIZE (MAX_VECTOR_COUNT), - gf_wb_mt_iovec); - if (vector == NULL) { - bytes = -1; - op_errno = ENOMEM; - goto out; - } - - iobref = iobref_new (); - if (iobref == NULL) { - bytes = -1; - op_errno = ENOMEM; - goto out; - } - - local = GF_CALLOC (1, sizeof (*local), - gf_wb_mt_wb_local_t); - if (local == NULL) { - bytes = -1; - op_errno = ENOMEM; - goto out; - } - - INIT_LIST_HEAD (&local->winds); - - first_request = request; - current_size = 0; - } + wb_inode_t *wb_inode = NULL; + int ret = -1; - count += request->stub->args.writev.count; - bytecount = VECTORSIZE (request->stub->args.writev.count); - memcpy (((char *)vector)+copied, - request->stub->args.writev.vector, - bytecount); - copied += bytecount; + GF_VALIDATE_OR_GOTO ("write-behind", req, out); - current_size += request->write_size; + wb_inode = req->wb_inode; - if (request->stub->args.writev.iobref) { - iobref_merge (iobref, - request->stub->args.writev.iobref); - } + LOCK (&wb_inode->lock); + { + ret = __wb_request_unref (req); + } + UNLOCK (&wb_inode->lock); - next = NULL; - if (request->winds.next != winds) { - next = list_entry (request->winds.next, - wb_request_t, winds); - } +out: + return ret; +} - list_del_init (&request->winds); - list_add_tail (&request->winds, &local->winds); - - if ((!next) - || ((count + next->stub->args.writev.count) - > MAX_VECTOR_COUNT) - || ((current_size + next->write_size) - > conf->aggregate_size)) { - - sync_frame = copy_frame (frame); - if (sync_frame == NULL) { - bytes = -1; - op_errno = ENOMEM; - goto out; - } - - sync_frame->local = local; - local->file = file; - - LOCK (&file->lock); - { - fd = file->fd; - } - UNLOCK (&file->lock); - - fd_ref (fd); - - bytes += current_size; - STACK_WIND (sync_frame, wb_sync_cbk, - FIRST_CHILD(sync_frame->this), - FIRST_CHILD(sync_frame->this)->fops->writev, - fd, vector, count, - first_request->stub->args.writev.off, - iobref); - iobref_unref (iobref); - GF_FREE (vector); - first_request = NULL; - iobref = NULL; - vector = NULL; - sync_frame = NULL; - local = NULL; - copied = count = 0; - } - } +static wb_request_t * +__wb_request_ref (wb_request_t *req) +{ + GF_VALIDATE_OR_GOTO ("write-behind", req, out); -out: - if (sync_frame != NULL) { - sync_frame->local = NULL; - STACK_DESTROY (sync_frame->root); + if (req->refcount < 0) { + gf_log ("wb-request", GF_LOG_WARNING, + "refcount(%d) is < 0", req->refcount); + req = NULL; + goto out; } - if (local != NULL) { - /* had we winded these requests, we would have unrefed - * in wb_sync_cbk. - */ - list_for_each_entry_safe (request, dummy, &local->winds, - winds) { - wb_request_unref (request); - } + req->refcount++; - GF_FREE (local); - local = NULL; - } +out: + return req; +} - if (iobref != NULL) { - iobref_unref (iobref); - } - if (vector != NULL) { - GF_FREE (vector); - } +wb_request_t * +wb_request_ref (wb_request_t *req) +{ + wb_inode_t *wb_inode = NULL; - if (bytes == -1) { - /* - * had we winded these requests, we would have unrefed - * in wb_sync_cbk. - */ - if (local) { - list_for_each_entry_safe (request, dummy, &local->winds, - winds) { - wb_request_unref (request); - } - } + GF_VALIDATE_OR_GOTO ("write-behind", req, out); - if (file != NULL) { - LOCK (&file->lock); - { - file->op_ret = -1; - file->op_errno = op_errno; - } - UNLOCK (&file->lock); - } + wb_inode = req->wb_inode; + LOCK (&wb_inode->lock); + { + req = __wb_request_ref (req); } + UNLOCK (&wb_inode->lock); - return bytes; +out: + return req; } -int32_t -wb_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *buf) +gf_boolean_t +wb_enqueue_common (wb_inode_t *wb_inode, call_stub_t *stub, int tempted) { - wb_local_t *local = NULL; - wb_request_t *request = NULL; - call_frame_t *process_frame = NULL; - wb_file_t *file = NULL; - int32_t ret = -1; - fd_t *fd = NULL; - - GF_ASSERT (frame); - GF_ASSERT (this); - - local = frame->local; - file = local->file; - - request = local->request; - if (request) { - process_frame = copy_frame (frame); - if (process_frame == NULL) { - op_ret = -1; - op_errno = ENOMEM; - } - } + wb_request_t *req = NULL; - STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf); + GF_VALIDATE_OR_GOTO ("write-behind", wb_inode, out); + GF_VALIDATE_OR_GOTO (wb_inode->this->name, stub, out); - if (request != NULL) { - wb_request_unref (request); - } + req = GF_CALLOC (1, sizeof (*req), gf_wb_mt_wb_request_t); + if (!req) + goto out; - if (process_frame != NULL) { - ret = wb_process_queue (process_frame, file); - if (ret == -1) { - if ((errno == ENOMEM) && (file != NULL)) { - LOCK (&file->lock); - { - file->op_ret = -1; - file->op_errno = ENOMEM; - } - UNLOCK (&file->lock); - } - - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); - } + INIT_LIST_HEAD (&req->all); + INIT_LIST_HEAD (&req->todo); + INIT_LIST_HEAD (&req->lie); + INIT_LIST_HEAD (&req->winds); + INIT_LIST_HEAD (&req->unwinds); + INIT_LIST_HEAD (&req->wip); - STACK_DESTROY (process_frame->root); - } + req->stub = stub; + req->wb_inode = wb_inode; + req->fop = stub->fop; + req->ordering.tempted = tempted; - if (file) { - LOCK (&file->lock); - { - fd = file->fd; - } - UNLOCK (&file->lock); + if (stub->fop == GF_FOP_WRITE) { + req->write_size = iov_length (stub->args.vector, + stub->args.count); - fd_unref (fd); - } + /* req->write_size can change as we collapse + small writes. But the window needs to grow + only by how much we acknowledge the app. so + copy the original size in orig_size for the + purpose of accounting. + */ + req->orig_size = req->write_size; - return 0; -} + /* Let's be optimistic that we can + lie about it + */ + req->op_ret = req->write_size; + req->op_errno = 0; + if (stub->args.fd->flags & O_APPEND) + req->ordering.append = 1; + } -static int32_t -wb_stat_helper (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - GF_ASSERT (frame); - GF_ASSERT (this); + req->lk_owner = stub->frame->root->lk_owner; - STACK_WIND (frame, wb_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc); - return 0; -} + switch (stub->fop) { + case GF_FOP_WRITE: + req->ordering.off = stub->args.offset; + req->ordering.size = req->write_size; + req->fd = fd_ref (stub->args.fd); -int32_t -wb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - wb_file_t *file = NULL; - fd_t *iter_fd = NULL; - wb_local_t *local = NULL; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - wb_request_t *request = NULL; - int32_t ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind); - - if (loc->inode) { - /* FIXME: fd_lookup extends life of fd till stat returns */ - iter_fd = fd_lookup (loc->inode, frame->root->pid); - if (iter_fd) { - if (!fd_ctx_get (iter_fd, this, &tmp_file)) { - file = (wb_file_t *)(long)tmp_file; - } else { - fd_unref (iter_fd); - iter_fd = NULL; - } - } - } + break; + case GF_FOP_READ: + req->ordering.off = stub->args.offset; + req->ordering.size = stub->args.size; - local = GF_CALLOC (1, sizeof (*local), gf_wb_mt_wb_local_t); - if (local == NULL) { - op_errno = ENOMEM; - goto unwind; - } + req->fd = fd_ref (stub->args.fd); - local->file = file; + break; + case GF_FOP_TRUNCATE: + req->ordering.off = stub->args.offset; + req->ordering.size = 0; /* till infinity */ + break; + case GF_FOP_FTRUNCATE: + req->ordering.off = stub->args.offset; + req->ordering.size = 0; /* till infinity */ - frame->local = local; + req->fd = fd_ref (stub->args.fd); - if (file) { - stub = fop_stat_stub (frame, wb_stat_helper, loc); - if (stub == NULL) { - op_errno = ENOMEM; - goto unwind; - } + break; + default: + break; + } - request = wb_enqueue (file, stub); - if (request == NULL) { - op_errno = ENOMEM; - goto unwind; - } + LOCK (&wb_inode->lock); + { + list_add_tail (&req->all, &wb_inode->all); - ret = wb_process_queue (frame, file); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); - } - } else { - STACK_WIND (frame, wb_stat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc); - } + req->gen = wb_inode->gen; - return 0; -unwind: - STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL); + list_add_tail (&req->todo, &wb_inode->todo); + __wb_request_ref (req); /* for wind */ - if (stub) { - call_stub_destroy (stub); + if (req->ordering.tempted) { + list_add_tail (&req->lie, &wb_inode->temptation); + __wb_request_ref (req); /* for unwind */ + } } + UNLOCK (&wb_inode->lock); - if (iter_fd != NULL) { - fd_unref (iter_fd); - } +out: + if (!req) + return _gf_false; - return 0; + return _gf_true; } -int32_t -wb_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *buf) +gf_boolean_t +wb_enqueue (wb_inode_t *wb_inode, call_stub_t *stub) { - wb_local_t *local = NULL; - wb_request_t *request = NULL; - wb_file_t *file = NULL; - int32_t ret = -1; - - GF_ASSERT (frame); - - local = frame->local; - file = local->file; - - request = local->request; - if ((file != NULL) && (request != NULL)) { - wb_request_unref (request); - ret = wb_process_queue (frame, file); - if (ret == -1) { - if (errno == ENOMEM) { - op_ret = -1; - op_errno = ENOMEM; - } - - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); - } - } - - STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf); - - return 0; + return wb_enqueue_common (wb_inode, stub, 0); } -int32_t -wb_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd) +gf_boolean_t +wb_enqueue_tempted (wb_inode_t *wb_inode, call_stub_t *stub) { - GF_ASSERT (frame); - GF_ASSERT (this); - - STACK_WIND (frame, wb_fstat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd); - return 0; + return wb_enqueue_common (wb_inode, stub, 1); } -int32_t -wb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd) +wb_inode_t * +__wb_inode_create (xlator_t *this, inode_t *inode) { - wb_file_t *file = NULL; - wb_local_t *local = NULL; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - wb_request_t *request = NULL; - int32_t ret = -1; - int op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if ((!IA_ISDIR (fd->inode->ia_type)) - && fd_ctx_get (fd, this, &tmp_file)) { - gf_log (this->name, GF_LOG_WARNING, - "write behind file pointer is" - " not stored in context of fd(%p), returning EBADFD", - fd); - op_errno = EBADFD; - goto unwind; - } - - file = (wb_file_t *)(long)tmp_file; - local = GF_CALLOC (1, sizeof (*local), - gf_wb_mt_wb_local_t); - if (local == NULL) { - op_errno = ENOMEM; - goto unwind; - } + wb_inode_t *wb_inode = NULL; + wb_conf_t *conf = NULL; - local->file = file; + GF_VALIDATE_OR_GOTO (this->name, inode, out); - frame->local = local; + conf = this->private; - if (file) { - stub = fop_fstat_stub (frame, wb_fstat_helper, fd); - if (stub == NULL) { - op_errno = ENOMEM; - goto unwind; - } + wb_inode = GF_CALLOC (1, sizeof (*wb_inode), gf_wb_mt_wb_inode_t); + if (!wb_inode) + goto out; - request = wb_enqueue (file, stub); - if (request == NULL) { - op_errno = ENOMEM; - goto unwind; - } + INIT_LIST_HEAD (&wb_inode->all); + INIT_LIST_HEAD (&wb_inode->todo); + INIT_LIST_HEAD (&wb_inode->liability); + INIT_LIST_HEAD (&wb_inode->temptation); + INIT_LIST_HEAD (&wb_inode->wip); - /* - FIXME:should the request queue be emptied in case of error? - */ - ret = wb_process_queue (frame, file); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); - } - } else { - STACK_WIND (frame, wb_fstat_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fstat, fd); - } + wb_inode->this = this; - return 0; + wb_inode->window_conf = conf->window_size; -unwind: - STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL); + LOCK_INIT (&wb_inode->lock); - if (stub) { - call_stub_destroy (stub); - } + __inode_ctx_put (inode, this, (uint64_t)(unsigned long)wb_inode); - return 0; +out: + return wb_inode; } -int32_t -wb_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) +wb_inode_t * +wb_inode_create (xlator_t *this, inode_t *inode) { - wb_local_t *local = NULL; - wb_request_t *request = NULL; - wb_file_t *file = NULL; - call_frame_t *process_frame = NULL; - int32_t ret = -1; - fd_t *fd = NULL; - - GF_ASSERT (frame); - - local = frame->local; - file = local->file; - request = local->request; - - if ((request != NULL) && (file != NULL)) { - process_frame = copy_frame (frame); - if (process_frame == NULL) { - op_ret = -1; - op_errno = ENOMEM; - } - } + wb_inode_t *wb_inode = NULL; - STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, - postbuf); + GF_VALIDATE_OR_GOTO (this->name, inode, out); - if (request) { - wb_request_unref (request); - } - - if (process_frame != NULL) { - ret = wb_process_queue (process_frame, file); - if (ret == -1) { - if ((errno == ENOMEM) && (file != NULL)) { - LOCK (&file->lock); - { - file->op_ret = -1; - file->op_errno = ENOMEM; - } - UNLOCK (&file->lock); - } - - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); - } - - STACK_DESTROY (process_frame->root); - } - - if (file) { - LOCK (&file->lock); - { - fd = file->fd; - } - UNLOCK (&file->lock); - - fd_unref (fd); + LOCK (&inode->lock); + { + wb_inode = __wb_inode_ctx_get (this, inode); + if (!wb_inode) + wb_inode = __wb_inode_create (this, inode); } + UNLOCK (&inode->lock); - return 0; +out: + return wb_inode; } -static int32_t -wb_truncate_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, - off_t offset) +void +wb_inode_destroy (wb_inode_t *wb_inode) { - GF_ASSERT (frame); - GF_ASSERT (this); + GF_VALIDATE_OR_GOTO ("write-behind", wb_inode, out); - STACK_WIND (frame, wb_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset); - - return 0; + LOCK_DESTROY (&wb_inode->lock); + GF_FREE (wb_inode); +out: + return; } -int32_t -wb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) +void +__wb_fulfill_request (wb_request_t *req) { - wb_file_t *file = NULL; - fd_t *iter_fd = NULL; - wb_local_t *local = NULL; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - wb_request_t *request = NULL; - int32_t ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind); - - if (loc->inode) { - /* - FIXME: fd_lookup extends life of fd till the execution of - truncate_cbk - */ - iter_fd = fd_lookup (loc->inode, frame->root->pid); - if (iter_fd) { - if (!fd_ctx_get (iter_fd, this, &tmp_file)){ - file = (wb_file_t *)(long)tmp_file; - } else { - fd_unref (iter_fd); - } - } - } + wb_inode_t *wb_inode = NULL; - local = GF_CALLOC (1, sizeof (*local), - gf_wb_mt_wb_local_t); - if (local == NULL) { - op_errno = ENOMEM; - goto unwind; - } + wb_inode = req->wb_inode; - local->file = file; + req->ordering.fulfilled = 1; + wb_inode->window_current -= req->total_size; + wb_inode->transit -= req->total_size; - frame->local = local; - if (file) { - stub = fop_truncate_stub (frame, wb_truncate_helper, loc, - offset); - if (stub == NULL) { - op_errno = ENOMEM; - goto unwind; - } + if (!req->ordering.lied) { + /* TODO: fail the req->frame with error if + necessary + */ + } - request = wb_enqueue (file, stub); - if (request == NULL) { - op_errno = ENOMEM; - goto unwind; - } - - ret = wb_process_queue (frame, file); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); - } - } else { - STACK_WIND (frame, wb_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset); - } - - return 0; - -unwind: - STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL); - - if (stub) { - call_stub_destroy (stub); - } - - return 0; + __wb_request_unref (req); } -int32_t -wb_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) +void +wb_head_done (wb_request_t *head) { - wb_local_t *local = NULL; - wb_request_t *request = NULL; - wb_file_t *file = NULL; - int32_t ret = -1; - - GF_ASSERT (frame); - - local = frame->local; - file = local->file; - request = local->request; - - if ((request != NULL) && (file != NULL)) { - wb_request_unref (request); - ret = wb_process_queue (frame, file); - if (ret == -1) { - if (errno == ENOMEM) { - op_ret = -1; - op_errno = ENOMEM; - } - - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); - } - } - - STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, - postbuf); - - return 0; + wb_request_t *req = NULL; + wb_request_t *tmp = NULL; + wb_inode_t *wb_inode = NULL; + + wb_inode = head->wb_inode; + + LOCK (&wb_inode->lock); + { + list_for_each_entry_safe (req, tmp, &head->winds, winds) { + __wb_fulfill_request (req); + } + __wb_fulfill_request (head); + } + UNLOCK (&wb_inode->lock); } -static int32_t -wb_ftruncate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset) +void +wb_fulfill_err (wb_request_t *head, int op_errno) { - GF_ASSERT (frame); - GF_ASSERT (this); - - STACK_WIND (frame, wb_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset); - return 0; + wb_inode_t *wb_inode; + wb_request_t *req; + + wb_inode = head->wb_inode; + + /* for all future requests yet to arrive */ + fd_ctx_set (head->fd, THIS, op_errno); + + LOCK (&wb_inode->lock); + { + /* for all requests already arrived */ + list_for_each_entry (req, &wb_inode->all, all) { + if (req->fd != head->fd) + continue; + req->op_ret = -1; + req->op_errno = op_errno; + } + } + UNLOCK (&wb_inode->lock); } -int32_t -wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) +int +wb_fulfill_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - wb_file_t *file = NULL; - wb_local_t *local = NULL; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - wb_request_t *request = NULL; - int32_t ret = -1; - int op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if ((!IA_ISDIR (fd->inode->ia_type)) - && fd_ctx_get (fd, this, &tmp_file)) { - gf_log (this->name, GF_LOG_WARNING, - "write behind file pointer is" - " not stored in context of fd(%p), returning EBADFD", - fd); - op_errno = EBADFD; - goto unwind; - } + wb_inode_t *wb_inode = NULL; + wb_request_t *head = NULL; - file = (wb_file_t *)(long)tmp_file; + head = frame->local; + frame->local = NULL; - local = GF_CALLOC (1, sizeof (*local), gf_wb_mt_wb_local_t); - if (local == NULL) { - op_errno = ENOMEM; - goto unwind; - } - - local->file = file; + wb_inode = head->wb_inode; - frame->local = local; + if (op_ret == -1) { + wb_fulfill_err (head, op_errno); + } else if (op_ret < head->total_size) { + /* + * We've encountered a short write, for whatever reason. + * Set an EIO error for the next fop. This should be + * valid for writev or flush (close). + * + * TODO: Retry the write so we can potentially capture + * a real error condition (i.e., ENOSPC). + */ + wb_fulfill_err (head, EIO); + } - if (file) { - stub = fop_ftruncate_stub (frame, wb_ftruncate_helper, fd, - offset); - if (stub == NULL) { - op_errno = ENOMEM; - goto unwind; - } + wb_head_done (head); - request = wb_enqueue (file, stub); - if (request == NULL) { - op_errno = ENOMEM; - goto unwind; - } + wb_process_queue (wb_inode); - ret = wb_process_queue (frame, file); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); - } - } else { - STACK_WIND (frame, wb_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset); - } + STACK_DESTROY (frame->root); return 0; +} -unwind: - STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno, NULL, NULL); - - if (stub) { - call_stub_destroy (stub); - } - return 0; -} +#define WB_IOV_LOAD(vec, cnt, req, head) do { \ + memcpy (&vec[cnt], req->stub->args.vector, \ + (req->stub->args.count * sizeof(vec[0]))); \ + cnt += req->stub->args.count; \ + head->total_size += req->write_size; \ + } while (0) -int32_t -wb_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *statpre, - struct iatt *statpost) +int +wb_fulfill_head (wb_inode_t *wb_inode, wb_request_t *head) { - wb_local_t *local = NULL; - wb_request_t *request = NULL; - call_frame_t *process_frame = NULL; - wb_file_t *file = NULL; - int32_t ret = -1; - fd_t *fd = NULL; - - GF_ASSERT (frame); - - local = frame->local; - file = local->file; - request = local->request; - - if (request) { - process_frame = copy_frame (frame); - if (process_frame == NULL) { - op_ret = -1; - op_errno = ENOMEM; - } - } + struct iovec vector[MAX_VECTOR_COUNT]; + int count = 0; + wb_request_t *req = NULL; + call_frame_t *frame = NULL; + gf_boolean_t fderr = _gf_false; + xlator_t *this = NULL; - STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre, - statpost); + this = THIS; - if (request) { - wb_request_unref (request); - } + /* make sure head->total_size is updated before we run into any + * errors + */ - if (request && (process_frame != NULL)) { - ret = wb_process_queue (process_frame, file); - if (ret == -1) { - if ((errno == ENOMEM) && (file != NULL)) { - LOCK (&file->lock); - { - file->op_ret = -1; - file->op_errno = ENOMEM; - } - UNLOCK (&file->lock); - } - - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); - } + WB_IOV_LOAD (vector, count, head, head); - STACK_DESTROY (process_frame->root); - } + list_for_each_entry (req, &head->winds, winds) { + WB_IOV_LOAD (vector, count, req, head); - if (file) { - LOCK (&file->lock); - { - fd = file->fd; - } - UNLOCK (&file->lock); + iobref_merge (head->stub->args.iobref, + req->stub->args.iobref); + } - fd_unref (fd); + if (wb_fd_err (head->fd, this, NULL)) { + fderr = _gf_true; + goto err; } - return 0; -} - - -static int32_t -wb_setattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid) -{ - GF_ASSERT (frame); - GF_ASSERT (this); - - STACK_WIND (frame, wb_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid); - return 0; -} + frame = create_frame (wb_inode->this, wb_inode->this->ctx->pool); + if (!frame) + goto err; + frame->root->lk_owner = head->lk_owner; + frame->local = head; -int32_t -wb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid) -{ - wb_file_t *file = NULL; - fd_t *iter_fd = NULL; - wb_local_t *local = NULL; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - wb_request_t *request = NULL; - int32_t ret = -1, op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind); - - local = GF_CALLOC (1, sizeof (*local), gf_wb_mt_wb_local_t); - if (local == NULL) { - op_errno = ENOMEM; - goto unwind; - } + LOCK (&wb_inode->lock); + { + wb_inode->transit += head->total_size; + } + UNLOCK (&wb_inode->lock); - frame->local = local; + STACK_WIND (frame, wb_fulfill_cbk, FIRST_CHILD (frame->this), + FIRST_CHILD (frame->this)->fops->writev, + head->fd, vector, count, + head->stub->args.offset, + head->stub->args.flags, + head->stub->args.iobref, NULL); - if (!(valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME))) { - STACK_WIND (frame, wb_setattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, loc, stbuf, - valid); - goto out; + return 0; +err: + if (!fderr) { + /* frame creation failure */ + fderr = ENOMEM; + wb_fulfill_err (head, fderr); } - if (loc->inode) { - /* - FIXME: fd_lookup extends life of fd till the execution - of wb_utimens_cbk - */ - iter_fd = fd_lookup (loc->inode, frame->root->pid); - if (iter_fd) { - if (!fd_ctx_get (iter_fd, this, &tmp_file)) { - file = (wb_file_t *)(long)tmp_file; - } else { - fd_unref (iter_fd); - } - } + wb_head_done (head); - } + return fderr; +} - local->file = file; - if (file) { - stub = fop_setattr_stub (frame, wb_setattr_helper, loc, stbuf, - valid); - if (stub == NULL) { - op_errno = ENOMEM; - goto unwind; - } +#define NEXT_HEAD(head, req) do { \ + if (head) \ + ret |= wb_fulfill_head (wb_inode, head); \ + head = req; \ + expected_offset = req->stub->args.offset + \ + req->write_size; \ + curr_aggregate = 0; \ + vector_count = 0; \ + } while (0) - request = wb_enqueue (file, stub); - if (request == NULL) { - op_errno = ENOMEM; - goto unwind; - } - ret = wb_process_queue (frame, file); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); - } - } else { - STACK_WIND (frame, wb_setattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setattr, loc, stbuf, - valid); - } - - return 0; -unwind: - STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL); - - if (stub) { - call_stub_destroy (stub); - } -out: - return 0; +int +wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities) +{ + wb_request_t *req = NULL; + wb_request_t *head = NULL; + wb_request_t *tmp = NULL; + wb_conf_t *conf = NULL; + off_t expected_offset = 0; + size_t curr_aggregate = 0; + size_t vector_count = 0; + int ret = 0; + + conf = wb_inode->this->private; + + list_for_each_entry_safe (req, tmp, liabilities, winds) { + list_del_init (&req->winds); + + if (!head) { + NEXT_HEAD (head, req); + continue; + } + + if (req->fd != head->fd) { + NEXT_HEAD (head, req); + continue; + } + + if (!is_same_lkowner (&req->lk_owner, &head->lk_owner)) { + NEXT_HEAD (head, req); + continue; + } + + if (expected_offset != req->stub->args.offset) { + NEXT_HEAD (head, req); + continue; + } + + if ((curr_aggregate + req->write_size) > conf->aggregate_size) { + NEXT_HEAD (head, req); + continue; + } + + if (vector_count + req->stub->args.count > + MAX_VECTOR_COUNT) { + NEXT_HEAD (head, req); + continue; + } + + list_add_tail (&req->winds, &head->winds); + curr_aggregate += req->write_size; + vector_count += req->stub->args.count; + } + + if (head) + ret |= wb_fulfill_head (wb_inode, head); + + return ret; } -int32_t -wb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, fd_t *fd) +void +wb_do_unwinds (wb_inode_t *wb_inode, list_head_t *lies) { - int32_t wbflags = 0, flags = 0; - wb_file_t *file = NULL; - wb_conf_t *conf = NULL; - wb_local_t *local = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, out, op_errno, - EINVAL); - - conf = this->private; - - local = frame->local; - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno, - EINVAL); + wb_request_t *req = NULL; + wb_request_t *tmp = NULL; + call_frame_t *frame = NULL; + struct iatt buf = {0, }; - flags = local->flags; - wbflags = local->wbflags; + list_for_each_entry_safe (req, tmp, lies, unwinds) { + frame = req->stub->frame; - if (op_ret != -1) { - file = wb_file_create (this, fd, flags); - if (file == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } + STACK_UNWIND_STRICT (writev, frame, req->op_ret, req->op_errno, + &buf, &buf, NULL); /* :O */ + req->stub->frame = NULL; - LOCK (&file->lock); - { - /* If O_DIRECT then, we disable chaching */ - if (((flags & O_DIRECT) == O_DIRECT) - || ((flags & O_ACCMODE) == O_RDONLY) - || (((flags & O_SYNC) == O_SYNC) - && conf->enable_O_SYNC == _gf_true)) { - file->window_conf = 0; - } - - if (wbflags & GF_OPEN_NOWB) { - file->disabled = 1; - } - } - UNLOCK (&file->lock); + list_del_init (&req->unwinds); + wb_request_unref (req); } -out: - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd); - return 0; + return; } -int32_t -wb_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, int32_t wbflags) +void +__wb_pick_unwinds (wb_inode_t *wb_inode, list_head_t *lies) { - wb_local_t *local = NULL; - int32_t op_errno = EINVAL; + wb_request_t *req = NULL; + wb_request_t *tmp = NULL; - local = GF_CALLOC (1, sizeof (*local), gf_wb_mt_wb_local_t); - if (local == NULL) { - op_errno = ENOMEM; - goto unwind; - } + list_for_each_entry_safe (req, tmp, &wb_inode->temptation, lie) { + if (!req->ordering.fulfilled && + wb_inode->window_current > wb_inode->window_conf) + continue; - local->flags = flags; - local->wbflags = wbflags; + list_del_init (&req->lie); + list_move_tail (&req->unwinds, lies); - frame->local = local; + wb_inode->window_current += req->orig_size; - STACK_WIND (frame, wb_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags); - return 0; + if (!req->ordering.fulfilled) { + /* burden increased */ + list_add_tail (&req->lie, &wb_inode->liability); -unwind: - STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL); - return 0; -} + req->ordering.lied = 1; + wb_inode->gen++; + } + } -int32_t -wb_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, - struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) -{ - long flags = 0; - wb_file_t *file = NULL; - wb_conf_t *conf = NULL; + return; +} - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, out, - op_errno, EINVAL); - conf = this->private; - if (op_ret != -1) { - if (frame->local) { - flags = (long) frame->local; +int +__wb_collapse_small_writes (wb_request_t *holder, wb_request_t *req) +{ + char *ptr = NULL; + struct iobuf *iobuf = NULL; + struct iobref *iobref = NULL; + int ret = -1; + ssize_t required_size = 0; + size_t holder_len = 0; + size_t req_len = 0; + + if (!holder->iobref) { + holder_len = iov_length (holder->stub->args.vector, + holder->stub->args.count); + req_len = iov_length (req->stub->args.vector, + req->stub->args.count); + + required_size = max ((THIS->ctx->page_size), + (holder_len + req_len)); + iobuf = iobuf_get2 (req->wb_inode->this->ctx->iobuf_pool, + required_size); + if (iobuf == NULL) { + goto out; } - file = wb_file_create (this, fd, flags); - if (file == NULL) { - op_ret = -1; - op_errno = ENOMEM; + iobref = iobref_new (); + if (iobref == NULL) { + iobuf_unref (iobuf); goto out; } - LOCK (&file->lock); - { - /* If O_DIRECT then, we disable chaching */ - if (frame->local) { - if (((flags & O_DIRECT) == O_DIRECT) - || ((flags & O_ACCMODE) == O_RDONLY) - || (((flags & O_SYNC) == O_SYNC) - && (conf->enable_O_SYNC == _gf_true))) { - file->window_conf = 0; - } - } + ret = iobref_add (iobref, iobuf); + if (ret != 0) { + iobuf_unref (iobuf); + iobref_unref (iobref); + gf_log (req->wb_inode->this->name, GF_LOG_WARNING, + "cannot add iobuf (%p) into iobref (%p)", + iobuf, iobref); + goto out; } - UNLOCK (&file->lock); - } - - frame->local = NULL; - -out: - STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, - preparent, postparent); - return 0; -} - - -int32_t -wb_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, fd_t *fd, dict_t *params) -{ - int32_t op_errno = EINVAL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind); - - frame->local = (void *)(long)flags; - - STACK_WIND (frame, wb_create_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, - loc, flags, mode, fd, params); - return 0; - -unwind: - STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, NULL, NULL, - NULL, NULL); - return 0; -} + iov_unload (iobuf->ptr, holder->stub->args.vector, + holder->stub->args.count); + holder->stub->args.vector[0].iov_base = iobuf->ptr; + holder->stub->args.count = 1; -/* Mark all the contiguous write requests for winding starting from head of - * request list. Stops marking at the first non-write request found. If - * file is opened with O_APPEND, make sure all the writes marked for winding - * will fit into a single write call to server. - */ -size_t -__wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds) -{ - wb_request_t *request = NULL; - size_t size = 0; - char first_request = 1; - off_t offset_expected = 0; - wb_conf_t *conf = NULL; - int count = 0; - - GF_VALIDATE_OR_GOTO ("write-behind", file, out); - GF_VALIDATE_OR_GOTO (file->this->name, list, out); - GF_VALIDATE_OR_GOTO (file->this->name, winds, out); + iobref_unref (holder->stub->args.iobref); + holder->stub->args.iobref = iobref; - conf = file->this->private; - - list_for_each_entry (request, list, list) - { - if ((request->stub == NULL) - || (request->stub->fop != GF_FOP_WRITE)) { - break; - } + iobuf_unref (iobuf); - if (!request->flags.write_request.stack_wound) { - if (first_request) { - first_request = 0; - offset_expected - = request->stub->args.writev.off; - } - - if (request->stub->args.writev.off != offset_expected) { - break; - } - - if ((file->flags & O_APPEND) - && (((size + request->write_size) - > conf->aggregate_size) - || ((count + request->stub->args.writev.count) - > MAX_VECTOR_COUNT))) { - break; - } - - size += request->write_size; - offset_expected += request->write_size; - file->aggregate_current -= request->write_size; - count += request->stub->args.writev.count; - - request->flags.write_request.stack_wound = 1; - list_add_tail (&request->winds, winds); - } + holder->iobref = iobref_ref (iobref); } -out: - return size; -} - - -int32_t -__wb_can_wind (list_head_t *list, char *other_fop_in_queue, - char *non_contiguous_writes, char *incomplete_writes, - char *wind_all) -{ - wb_request_t *request = NULL; - char first_request = 1; - off_t offset_expected = 0; - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("write-behind", list, out); - - list_for_each_entry (request, list, list) - { - if ((request->stub == NULL) - || (request->stub->fop != GF_FOP_WRITE)) { - if (request->stub && other_fop_in_queue) { - *other_fop_in_queue = 1; - } - break; - } + ptr = holder->stub->args.vector[0].iov_base + holder->write_size; - if (request->flags.write_request.stack_wound - && !request->flags.write_request.got_reply - && (incomplete_writes != NULL)) { - *incomplete_writes = 1; - break; - } + iov_unload (ptr, req->stub->args.vector, + req->stub->args.count); - if (!request->flags.write_request.stack_wound) { - if (first_request) { - char flush = 0; - first_request = 0; - offset_expected - = request->stub->args.writev.off; - - flush = request->flags.write_request.flush_all; - if (wind_all != NULL) { - *wind_all = flush; - } - } - - if (offset_expected != request->stub->args.writev.off) { - if (non_contiguous_writes) { - *non_contiguous_writes = 1; - } - break; - } - - offset_expected += request->write_size; - } - } + holder->stub->args.vector[0].iov_len += req->write_size; + holder->write_size += req->write_size; + holder->ordering.size += req->write_size; ret = 0; out: @@ -1617,1059 +1002,796 @@ out: } -ssize_t -__wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf, - char enable_trickling_writes) +void +__wb_preprocess_winds (wb_inode_t *wb_inode) { - size_t size = 0; - char other_fop_in_queue = 0; - char incomplete_writes = 0; - char non_contiguous_writes = 0; - wb_request_t *request = NULL; - wb_file_t *file = NULL; - char wind_all = 0; - int32_t ret = 0; - - GF_VALIDATE_OR_GOTO ("write-behind", list, out); - GF_VALIDATE_OR_GOTO ("write-behind", winds, out); - - if (list_empty (list)) { - goto out; - } + off_t offset_expected = 0; + ssize_t space_left = 0; + wb_request_t *req = NULL; + wb_request_t *tmp = NULL; + wb_request_t *holder = NULL; + wb_conf_t *conf = NULL; + int ret = 0; + ssize_t page_size = 0; + + /* With asynchronous IO from a VM guest (as a file), there + can be two sequential writes happening in two regions + of the file. But individual (broken down) IO requests + can arrive interleaved. + + TODO: cycle for each such sequence sifting + through the interleaved ops + */ + + page_size = wb_inode->this->ctx->page_size; + conf = wb_inode->this->private; + + list_for_each_entry_safe (req, tmp, &wb_inode->todo, todo) { + if (!req->ordering.tempted) { + if (holder) { + if (wb_requests_conflict (holder, req)) + /* do not hold on write if a + dependent write is in queue */ + holder->ordering.go = 1; + } + /* collapse only non-sync writes */ + continue; + } else if (!holder) { + /* holder is always a non-sync write */ + holder = req; + continue; + } + + offset_expected = holder->stub->args.offset + + holder->write_size; + + if (req->stub->args.offset != offset_expected) { + holder->ordering.go = 1; + holder = req; + continue; + } + + if (!is_same_lkowner (&req->lk_owner, &holder->lk_owner)) { + holder->ordering.go = 1; + holder = req; + continue; + } + + if (req->fd != holder->fd) { + holder->ordering.go = 1; + holder = req; + continue; + } - request = list_entry (list->next, typeof (*request), list); - file = request->file; + space_left = page_size - holder->write_size; - ret = __wb_can_wind (list, &other_fop_in_queue, - &non_contiguous_writes, &incomplete_writes, - &wind_all); - if (ret == -1) { - gf_log (file->this->name, GF_LOG_WARNING, - "cannot decide whether to wind or not"); - goto out; - } + if (space_left < req->write_size) { + holder->ordering.go = 1; + holder = req; + continue; + } - if (!incomplete_writes && ((enable_trickling_writes) - || (wind_all) || (non_contiguous_writes) - || (other_fop_in_queue) - || (file->aggregate_current - >= aggregate_conf))) { - size = __wb_mark_wind_all (file, list, winds); - } + ret = __wb_collapse_small_writes (holder, req); + if (ret) + continue; -out: - return size; -} + /* collapsed request is as good as wound + (from its p.o.v) + */ + list_del_init (&req->todo); + __wb_fulfill_request (req); + /* Only the last @holder in queue which -size_t -__wb_mark_unwind_till (list_head_t *list, list_head_t *unwinds, size_t size) -{ - size_t written_behind = 0; - wb_request_t *request = NULL; - wb_file_t *file = NULL; + - does not have any non-buffered-writes following it + - has not yet filled its capacity - if (list_empty (list)) { - goto out; + does not get its 'go' set, in anticipation of the arrival + of consecutive smaller writes. + */ } - request = list_entry (list->next, typeof (*request), list); - file = request->file; - - list_for_each_entry (request, list, list) - { - if ((request->stub == NULL) - || (request->stub->fop != GF_FOP_WRITE)) { - continue; - } + /* but if trickling writes are enabled, then do not hold back + writes if there are no outstanding requests + */ - if (written_behind <= size) { - if (!request->flags.write_request.write_behind) { - written_behind += request->write_size; - request->flags.write_request.write_behind = 1; - list_add_tail (&request->unwinds, unwinds); - - if (!request->flags.write_request.got_reply) { - file->window_current - += request->write_size; - } - } - } else { - break; - } - } + if (conf->trickling_writes && !wb_inode->transit && holder) + holder->ordering.go = 1; -out: - return written_behind; + return; } void -__wb_mark_unwinds (list_head_t *list, list_head_t *unwinds) +__wb_pick_winds (wb_inode_t *wb_inode, list_head_t *tasks, + list_head_t *liabilities) { - wb_request_t *request = NULL; - wb_file_t *file = NULL; - - GF_VALIDATE_OR_GOTO ("write-behind", list, out); - GF_VALIDATE_OR_GOTO ("write-behind", unwinds, out); - - if (list_empty (list)) { - goto out; - } - - request = list_entry (list->next, typeof (*request), list); - file = request->file; + wb_request_t *req = NULL; + wb_request_t *tmp = NULL; - if (file->window_current <= file->window_conf) { - __wb_mark_unwind_till (list, unwinds, - file->window_conf - - file->window_current); - } - -out: - return; -} + list_for_each_entry_safe (req, tmp, &wb_inode->todo, todo) { + if (wb_liability_has_conflict (wb_inode, req)) + continue; + if (req->ordering.tempted && !req->ordering.go) + /* wait some more */ + continue; -uint32_t -__wb_get_other_requests (list_head_t *list, list_head_t *other_requests) -{ - wb_request_t *request = NULL; - uint32_t count = 0; + if (req->stub->fop == GF_FOP_WRITE) { + if (wb_wip_has_conflict (wb_inode, req)) + continue; - GF_VALIDATE_OR_GOTO ("write-behind", list, out); - GF_VALIDATE_OR_GOTO ("write-behind", other_requests, out); + list_add_tail (&req->wip, &wb_inode->wip); - list_for_each_entry (request, list, list) { - if ((request->stub == NULL) - || (request->stub->fop == GF_FOP_WRITE)) { - break; - } + if (!req->ordering.tempted) + /* unrefed in wb_writev_cbk */ + req->stub->frame->local = + __wb_request_ref (req); + } - if (!request->flags.other_requests.marked_for_resume) { - request->flags.other_requests.marked_for_resume = 1; - list_add_tail (&request->other_requests, - other_requests); - count++; - } - } + list_del_init (&req->todo); -out: - return count; + if (req->ordering.tempted) + list_add_tail (&req->winds, liabilities); + else + list_add_tail (&req->winds, tasks); + } } -int32_t -wb_stack_unwind (list_head_t *unwinds) +void +wb_do_winds (wb_inode_t *wb_inode, list_head_t *tasks) { - struct iatt buf = {0,}; - wb_request_t *request = NULL, *dummy = NULL; - call_frame_t *frame = NULL; - wb_local_t *local = NULL; - int ret = 0, write_requests_removed = 0; + wb_request_t *req = NULL; + wb_request_t *tmp = NULL; - GF_VALIDATE_OR_GOTO ("write-behind", unwinds, out); + list_for_each_entry_safe (req, tmp, tasks, winds) { + list_del_init (&req->winds); - list_for_each_entry_safe (request, dummy, unwinds, unwinds) { - frame = request->stub->frame; - local = frame->local; + call_resume (req->stub); - STACK_UNWIND (frame, local->op_ret, local->op_errno, &buf, - &buf); - - ret = wb_request_unref (request); - if (ret == 0) { - write_requests_removed++; - } - } - -out: - return write_requests_removed; + wb_request_unref (req); + } } -int32_t -wb_resume_other_requests (call_frame_t *frame, wb_file_t *file, - list_head_t *other_requests) +void +wb_process_queue (wb_inode_t *wb_inode) { - int32_t ret = -1; - wb_request_t *request = NULL, *dummy = NULL; - int32_t fops_removed = 0; - char wind = 0; - call_stub_t *stub = NULL; + list_head_t tasks = {0, }; + list_head_t lies = {0, }; + list_head_t liabilities = {0, }; + int retry = 0; - GF_VALIDATE_OR_GOTO ((file ? file->this->name : "write-behind"), frame, - out); - GF_VALIDATE_OR_GOTO (frame->this->name, file, out); - GF_VALIDATE_OR_GOTO (frame->this->name, other_requests, out); + INIT_LIST_HEAD (&tasks); + INIT_LIST_HEAD (&lies); + INIT_LIST_HEAD (&liabilities); - if (list_empty (other_requests)) { - ret = 0; - goto out; - } + do { + LOCK (&wb_inode->lock); + { + __wb_preprocess_winds (wb_inode); - list_for_each_entry_safe (request, dummy, other_requests, - other_requests) { - wind = request->stub->wind; - stub = request->stub; + __wb_pick_winds (wb_inode, &tasks, &liabilities); - LOCK (&file->lock); - { - request->stub = NULL; - } - UNLOCK (&file->lock); + __wb_pick_unwinds (wb_inode, &lies); - if (!wind) { - wb_request_unref (request); - fops_removed++; } + UNLOCK (&wb_inode->lock); - call_resume (stub); - } + wb_do_unwinds (wb_inode, &lies); - ret = 0; + wb_do_winds (wb_inode, &tasks); - if (fops_removed > 0) { - ret = wb_process_queue (frame, file); - if (ret == -1) { - gf_log (frame->this->name, GF_LOG_WARNING, - "request queue processing failed"); - } - } + /* fd might've been marked bad due to previous errors. + * Since, caller of wb_process_queue might be the last fop on + * inode, make sure we keep processing request queue, till there + * are no requests left. + */ + retry = wb_fulfill (wb_inode, &liabilities); + } while (retry); -out: - return ret; + return; } -int32_t -wb_do_ops (call_frame_t *frame, wb_file_t *file, list_head_t *winds, - list_head_t *unwinds, list_head_t *other_requests) +int +wb_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - int32_t ret = -1, write_requests_removed = 0; + wb_request_t *req = NULL; + wb_inode_t *wb_inode; - GF_VALIDATE_OR_GOTO ((file ? file->this->name : "write-behind"), - frame, out); - GF_VALIDATE_OR_GOTO (frame->this->name, file, out); + req = frame->local; + frame->local = NULL; + wb_inode = req->wb_inode; - ret = wb_stack_unwind (unwinds); + wb_request_unref (req); - write_requests_removed = ret; + /* requests could be pending while this was in progress */ + wb_process_queue(wb_inode); - ret = wb_sync (frame, file, winds); - if (ret == -1) { - gf_log (frame->this->name, GF_LOG_WARNING, - "syncing of write requests failed"); - } + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; +} - ret = wb_resume_other_requests (frame, file, other_requests); - if (ret == -1) { - gf_log (frame->this->name, GF_LOG_WARNING, - "cannot resume non-write requests in request queue"); - } - /* wb_stack_unwind does wb_request_unref after unwinding a write - * request. Hence if a write-request was just freed in wb_stack_unwind, - * we have to process request queue once again to unblock requests - * blocked on the writes just unwound. - */ - if (write_requests_removed > 0) { - ret = wb_process_queue (frame, file); - if (ret == -1) { - gf_log (frame->this->name, GF_LOG_WARNING, - "request queue processing failed"); - } - } - -out: - return ret; +int +wb_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iovec *vector, int32_t count, off_t offset, + uint32_t flags, struct iobref *iobref, dict_t *xdata) +{ + STACK_WIND (frame, wb_writev_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev, + fd, vector, count, offset, flags, iobref, xdata); + return 0; } -inline int -__wb_copy_into_holder (wb_request_t *holder, wb_request_t *request) +int +wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) { - char *ptr = NULL; - struct iobuf *iobuf = NULL; - struct iobref *iobref = NULL; - int ret = -1; + wb_inode_t *wb_inode = NULL; + wb_conf_t *conf = NULL; + gf_boolean_t wb_disabled = 0; + call_stub_t *stub = NULL; + int ret = -1; + int32_t op_errno = EINVAL; + int o_direct = O_DIRECT; - if (holder->flags.write_request.virgin) { - iobuf = iobuf_get (request->file->this->ctx->iobuf_pool); - if (iobuf == NULL) { - goto out; - } + conf = this->private; - iobref = iobref_new (); - if (iobref == NULL) { - iobuf_unref (iobuf); - goto out; - } + if (wb_fd_err (fd, this, &op_errno)) { + goto unwind; + } - ret = iobref_add (iobref, iobuf); - if (ret != 0) { - iobuf_unref (iobuf); - iobref_unref (iobref); - gf_log (request->file->this->name, GF_LOG_WARNING, - "cannot add iobuf (%p) into iobref (%p)", - iobuf, iobref); - goto out; - } + wb_inode = wb_inode_create (this, fd->inode); + if (!wb_inode) { + op_errno = ENOMEM; + goto unwind; + } - iov_unload (iobuf->ptr, holder->stub->args.writev.vector, - holder->stub->args.writev.count); - holder->stub->args.writev.vector[0].iov_base = iobuf->ptr; + if (!conf->strict_O_DIRECT) + o_direct = 0; - iobref_unref (holder->stub->args.writev.iobref); - holder->stub->args.writev.iobref = iobref; + if (fd->flags & (O_SYNC|O_DSYNC|o_direct)) + wb_disabled = 1; - iobuf_unref (iobuf); + if (flags & (O_SYNC|O_DSYNC|o_direct)) + wb_disabled = 1; - holder->flags.write_request.virgin = 0; + if (wb_disabled) + stub = fop_writev_stub (frame, wb_writev_helper, fd, vector, + count, offset, flags, iobref, xdata); + else + stub = fop_writev_stub (frame, NULL, fd, vector, count, offset, + flags, iobref, xdata); + if (!stub) { + op_errno = ENOMEM; + goto unwind; } - ptr = holder->stub->args.writev.vector[0].iov_base + holder->write_size; - - iov_unload (ptr, request->stub->args.writev.vector, - request->stub->args.writev.count); + if (wb_disabled) + ret = wb_enqueue (wb_inode, stub); + else + ret = wb_enqueue_tempted (wb_inode, stub); - holder->stub->args.writev.vector[0].iov_len += request->write_size; - holder->write_size += request->write_size; + if (!ret) { + op_errno = ENOMEM; + goto unwind; + } - request->flags.write_request.stack_wound = 1; - list_move_tail (&request->list, &request->file->passive_requests); - - ret = 0; -out: - return ret; -} + wb_process_queue (wb_inode); + return 0; -/* this procedure assumes that write requests have only one vector to write */ -void -__wb_collapse_write_bufs (list_head_t *requests, size_t page_size) -{ - off_t offset_expected = 0; - size_t space_left = 0; - wb_request_t *request = NULL, *tmp = NULL, *holder = NULL; - int ret = 0; +unwind: + STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, NULL); - GF_VALIDATE_OR_GOTO ("write-behind", requests, out); + if (stub) + call_stub_destroy (stub); - list_for_each_entry_safe (request, tmp, requests, list) { - if ((request->stub == NULL) - || (request->stub->fop != GF_FOP_WRITE) - || (request->flags.write_request.stack_wound)) { - holder = NULL; - continue; - } + return 0; +} - if (request->flags.write_request.write_behind) { - if (holder == NULL) { - holder = request; - continue; - } - - offset_expected = holder->stub->args.writev.off - + holder->write_size; - - if (request->stub->args.writev.off != offset_expected) { - holder = request; - continue; - } - - space_left = page_size - holder->write_size; - - if (space_left >= request->write_size) { - ret = __wb_copy_into_holder (holder, request); - if (ret != 0) { - break; - } - - __wb_request_unref (request); - } else { - holder = request; - } - } else { - break; - } - } -out: - return; +int +wb_readv_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, + xdata); + return 0; } -int32_t -wb_process_queue (call_frame_t *frame, wb_file_t *file) +int +wb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - list_head_t winds = {0, }, unwinds = {0, }, other_requests = {0, }; - size_t size = 0; - wb_conf_t *conf = NULL; - uint32_t count = 0; - int32_t ret = -1; - - INIT_LIST_HEAD (&winds); - INIT_LIST_HEAD (&unwinds); - INIT_LIST_HEAD (&other_requests); + wb_inode_t *wb_inode = NULL; + call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO ((file ? file->this->name : "write-behind"), frame, - out); - GF_VALIDATE_OR_GOTO (file->this->name, frame, out); + wb_inode = wb_inode_ctx_get (this, fd->inode); + if (!wb_inode) + goto noqueue; - conf = file->this->private; - GF_VALIDATE_OR_GOTO (file->this->name, conf, out); + stub = fop_readv_stub (frame, wb_readv_helper, fd, size, + offset, flags, xdata); + if (!stub) + goto unwind; - size = conf->aggregate_size; - LOCK (&file->lock); - { - /* - * make sure requests are marked for unwinding and adjacent - * continguous write buffers (each of size less than that of - * an iobuf) are packed properly so that iobufs are filled to - * their maximum capacity, before calling __wb_mark_winds. - */ - __wb_mark_unwinds (&file->request, &unwinds); + if (!wb_enqueue (wb_inode, stub)) + goto unwind; - __wb_collapse_write_bufs (&file->request, - file->this->ctx->page_size); + wb_process_queue (wb_inode); - count = __wb_get_other_requests (&file->request, - &other_requests); + return 0; - if (count == 0) { - __wb_mark_winds (&file->request, &winds, size, - conf->enable_trickling_writes); - } +unwind: + STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL, + NULL); + return 0; - } - UNLOCK (&file->lock); +noqueue: + STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, + xdata); + return 0; +} - ret = wb_do_ops (frame, file, &winds, &unwinds, &other_requests); -out: - return ret; +int +wb_flush_bg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + STACK_DESTROY (frame->root); + return 0; } -int32_t -wb_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) +int +wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - GF_ASSERT (frame); + wb_conf_t *conf = NULL; + wb_inode_t *wb_inode = NULL; + call_frame_t *bg_frame = NULL; + int32_t op_errno = 0; + int op_ret = 0; + + conf = this->private; + + wb_inode = wb_inode_ctx_get (this, fd->inode); + if (!wb_inode) { + op_ret = -1; + op_errno = EINVAL; + goto unwind; + } + + if (wb_fd_err (fd, this, &op_errno)) { + op_ret = -1; + goto unwind; + } + + if (conf->flush_behind) + goto flushbehind; + + STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, fd, xdata); + return 0; + +flushbehind: + bg_frame = copy_frame (frame); + if (!bg_frame) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + STACK_WIND (bg_frame, wb_flush_bg_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, fd, xdata); + /* fall through */ +unwind: + STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL); - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf); return 0; } -int32_t -wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, - int32_t count, off_t offset, struct iobref *iobref) +int +wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - wb_file_t *file = NULL; - char wb_disabled = 0; - call_frame_t *process_frame = NULL; - size_t size = 0; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - wb_local_t *local = NULL; - wb_request_t *request = NULL; - int32_t ret = -1; - int32_t op_ret = -1, op_errno = EINVAL; + wb_inode_t *wb_inode = NULL; + call_stub_t *stub = NULL; - GF_ASSERT (frame); + wb_inode = wb_inode_ctx_get (this, fd->inode); + if (!wb_inode) + goto noqueue; - GF_VALIDATE_OR_GOTO_WITH_ERROR ("write-behind", this, unwind, op_errno, - EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, fd, unwind, op_errno, - EINVAL); + stub = fop_flush_stub (frame, wb_flush_helper, fd, xdata); + if (!stub) + goto unwind; - if (vector != NULL) - size = iov_length (vector, count); + if (!wb_enqueue (wb_inode, stub)) + goto unwind; - if ((!IA_ISDIR (fd->inode->ia_type)) - && fd_ctx_get (fd, this, &tmp_file)) { - gf_log (this->name, GF_LOG_WARNING, - "write behind file pointer is" - " not stored in context of fd(%p), returning EBADFD", - fd); + wb_process_queue (wb_inode); - op_errno = EBADFD; - goto unwind; - } + return 0; - file = (wb_file_t *)(long)tmp_file; - if ((!IA_ISDIR (fd->inode->ia_type)) && (file == NULL)) { - gf_log (this->name, GF_LOG_WARNING, - "wb_file not found for fd %p", fd); - op_errno = EBADFD; - goto unwind; - } +unwind: + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL); - if (file != NULL) { - LOCK (&file->lock); - { - op_ret = file->op_ret; - op_errno = file->op_errno; - - file->op_ret = 0; - - if ((op_ret == 0) - && (file->disabled || file->disable_till)) { - if (size > file->disable_till) { - file->disable_till = 0; - } else { - file->disable_till -= size; - } - wb_disabled = 1; - } - } - UNLOCK (&file->lock); - } else { - wb_disabled = 1; - } + return 0; - if (op_ret == -1) { - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, NULL, - NULL); - return 0; - } +noqueue: + STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, fd, xdata); + return 0; +} - if (wb_disabled) { - STACK_WIND (frame, wb_writev_cbk, FIRST_CHILD (frame->this), - FIRST_CHILD (frame->this)->fops->writev, - fd, vector, count, offset, iobref); - return 0; - } - process_frame = copy_frame (frame); - if (process_frame == NULL) { - op_errno = ENOMEM; - goto unwind; - } - local = GF_CALLOC (1, sizeof (*local), - gf_wb_mt_wb_local_t); - if (local == NULL) { - op_errno = ENOMEM; - goto unwind; - } +int +wb_fsync_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t datasync, dict_t *xdata) +{ + STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); + return 0; +} - frame->local = local; - local->file = file; - stub = fop_writev_stub (frame, NULL, fd, vector, count, offset, iobref); - if (stub == NULL) { - op_errno = ENOMEM; - goto unwind; - } +int +wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, + dict_t *xdata) +{ + wb_inode_t *wb_inode = NULL; + call_stub_t *stub = NULL; + int32_t op_errno = EINVAL; - request = wb_enqueue (file, stub); - if (request == NULL) { - op_errno = ENOMEM; - goto unwind; - } + if (wb_fd_err (fd, this, &op_errno)) + goto unwind; - ret = wb_process_queue (process_frame, file); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); - } + wb_inode = wb_inode_ctx_get (this, fd->inode); + if (!wb_inode) + goto noqueue; - STACK_DESTROY (process_frame->root); + stub = fop_fsync_stub (frame, wb_fsync_helper, fd, datasync, xdata); + if (!stub) + goto unwind; + + if (!wb_enqueue (wb_inode, stub)) + goto unwind; + + wb_process_queue (wb_inode); return 0; unwind: - STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL); + STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, NULL); - if (process_frame) { - STACK_DESTROY (process_frame->root); - } + return 0; + +noqueue: + STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); + return 0; +} - if (stub) { - call_stub_destroy (stub); - } +int +wb_stat_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); return 0; } -int32_t -wb_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iovec *vector, int32_t count, - struct iatt *stbuf, struct iobref *iobref) +int +wb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - wb_local_t *local = NULL; - wb_file_t *file = NULL; - wb_request_t *request = NULL; - int32_t ret = 0; + wb_inode_t *wb_inode = NULL; + call_stub_t *stub = NULL; - GF_ASSERT (frame); - local = frame->local; - file = local->file; - request = local->request; + wb_inode = wb_inode_ctx_get (this, loc->inode); + if (!wb_inode) + goto noqueue; - if ((request != NULL) && (file != NULL)) { - wb_request_unref (request); + stub = fop_stat_stub (frame, wb_stat_helper, loc, xdata); + if (!stub) + goto unwind; - ret = wb_process_queue (frame, file); - if (ret == -1) { - if (errno == ENOMEM) { - op_ret = -1; - op_errno = ENOMEM; - } + if (!wb_enqueue (wb_inode, stub)) + goto unwind; - gf_log (frame->this->name, GF_LOG_WARNING, - "request queue processing failed"); - } - } + wb_process_queue (wb_inode); - STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, - stbuf, iobref); + return 0; +unwind: + STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM, NULL, NULL); + + if (stub) + call_stub_destroy (stub); return 0; + +noqueue: + STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; } -static int32_t -wb_readv_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset) +int +wb_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - STACK_WIND (frame, wb_readv_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, fd, size, offset); - + STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); return 0; } -int32_t -wb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset) +int +wb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - wb_file_t *file = NULL; - wb_local_t *local = NULL; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - int32_t ret = -1, op_errno = 0; - wb_request_t *request = NULL; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, unwind, - op_errno, EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, fd, unwind, op_errno, - EINVAL); - - if ((!IA_ISDIR (fd->inode->ia_type)) - && fd_ctx_get (fd, this, &tmp_file)) { - gf_log (this->name, GF_LOG_WARNING, - "write behind file pointer is" - " not stored in context of fd(%p), returning EBADFD", - fd); - op_errno = EBADFD; - goto unwind; - } + wb_inode_t *wb_inode = NULL; + call_stub_t *stub = NULL; - file = (wb_file_t *)(long)tmp_file; - local = GF_CALLOC (1, sizeof (*local), gf_wb_mt_wb_local_t); - if (local == NULL) { - op_errno = ENOMEM; - goto unwind; - } + wb_inode = wb_inode_ctx_get (this, fd->inode); + if (!wb_inode) + goto noqueue; - local->file = file; + stub = fop_fstat_stub (frame, wb_fstat_helper, fd, xdata); + if (!stub) + goto unwind; - frame->local = local; - if (file) { - stub = fop_readv_stub (frame, wb_readv_helper, fd, size, - offset); - if (stub == NULL) { - op_errno = ENOMEM; - goto unwind; - } - - request = wb_enqueue (file, stub); - if (request == NULL) { - call_stub_destroy (stub); - op_errno = ENOMEM; - goto unwind; - } + if (!wb_enqueue (wb_inode, stub)) + goto unwind; - ret = wb_process_queue (frame, file); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); - } - } else { - STACK_WIND (frame, wb_readv_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readv, - fd, size, offset); - } + wb_process_queue (wb_inode); return 0; unwind: - STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL); + STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, NULL, NULL); + + if (stub) + call_stub_destroy (stub); + return 0; + +noqueue: + STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); return 0; } -int32_t -wb_ffr_bg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +int +wb_truncate_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, + off_t offset, dict_t *xdata) { - STACK_DESTROY (frame->root); + STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); return 0; } -int32_t -wb_ffr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno) +int +wb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - wb_local_t *local = NULL; - wb_file_t *file = NULL; + wb_inode_t *wb_inode = NULL; + call_stub_t *stub = NULL; - GF_ASSERT (frame); + wb_inode = wb_inode_create (this, loc->inode); + if (!wb_inode) + goto unwind; - local = frame->local; - file = local->file; + stub = fop_truncate_stub (frame, wb_truncate_helper, loc, + offset, xdata); + if (!stub) + goto unwind; - if (file != NULL) { - LOCK (&file->lock); - { - if (file->op_ret == -1) { - op_ret = file->op_ret; - op_errno = file->op_errno; + if (!wb_enqueue (wb_inode, stub)) + goto unwind; - file->op_ret = 0; - } - } - UNLOCK (&file->lock); - } + wb_process_queue (wb_inode); + + return 0; - STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno); +unwind: + STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL); + + if (stub) + call_stub_destroy (stub); return 0; } -int32_t -wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd) +int +wb_ftruncate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, + off_t offset, dict_t *xdata) { - wb_conf_t *conf = NULL; - wb_local_t *local = NULL; - wb_file_t *file = NULL; - call_frame_t *flush_frame = NULL, *process_frame = NULL; - int32_t op_ret = -1, op_errno = -1, ret = -1; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, unwind, - op_errno, EINVAL); - - conf = this->private; + STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; +} - local = frame->local; - file = local->file; - LOCK (&file->lock); - { - op_ret = file->op_ret; - op_errno = file->op_errno; - } - UNLOCK (&file->lock); - - if (local && local->request) { - process_frame = copy_frame (frame); - if (process_frame == NULL) { - op_errno = ENOMEM; - goto unwind; - } +int +wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + wb_inode_t *wb_inode = NULL; + call_stub_t *stub = NULL; + int32_t op_errno = 0; - wb_request_unref (local->request); + wb_inode = wb_inode_create (this, fd->inode); + if (!wb_inode) { + op_errno = ENOMEM; + goto unwind; } - if (conf->flush_behind) { - flush_frame = copy_frame (frame); - if (flush_frame == NULL) { - op_errno = ENOMEM; - goto unwind; - } + if (wb_fd_err (fd, this, &op_errno)) + goto unwind; - STACK_WIND (flush_frame, wb_ffr_bg_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, fd); - } else { - STACK_WIND (frame, wb_ffr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, fd); + stub = fop_ftruncate_stub (frame, wb_ftruncate_helper, fd, + offset, xdata); + if (!stub) { + op_errno = ENOMEM; + goto unwind; } - if (process_frame != NULL) { - ret = wb_process_queue (process_frame, file); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); - } - - STACK_DESTROY (process_frame->root); + if (!wb_enqueue (wb_inode, stub)) { + op_errno = ENOMEM; + goto unwind; } - if (conf->flush_behind) { - STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno); - } + wb_process_queue (wb_inode); return 0; unwind: - STACK_UNWIND_STRICT (flush, frame, -1, op_errno); + STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno, NULL, NULL, NULL); + + if (stub) + call_stub_destroy (stub); return 0; } -int32_t -wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) +int +wb_setattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - wb_conf_t *conf = NULL; - wb_file_t *file = NULL; - wb_local_t *local = NULL; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - call_frame_t *flush_frame = NULL; - wb_request_t *request = NULL; - int32_t ret = 0, op_errno = 0; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, unwind, - op_errno, EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, fd, unwind, op_errno, - EINVAL); - - conf = this->private; - - if ((!IA_ISDIR (fd->inode->ia_type)) - && fd_ctx_get (fd, this, &tmp_file)) { - gf_log (this->name, GF_LOG_WARNING, - "write behind file pointer is" - " not stored in context of fd(%p), returning EBADFD", - fd); - - op_errno = EBADFD; - goto unwind; - } + STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + return 0; +} - file = (wb_file_t *)(long)tmp_file; - if (file != NULL) { - local = GF_CALLOC (1, sizeof (*local), gf_wb_mt_wb_local_t); - if (local == NULL) { - op_errno = ENOMEM; - goto unwind; - } +int +wb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + wb_inode_t *wb_inode = NULL; + call_stub_t *stub = NULL; - local->file = file; + wb_inode = wb_inode_ctx_get (this, loc->inode); + if (!wb_inode) + goto noqueue; - frame->local = local; + stub = fop_setattr_stub (frame, wb_setattr_helper, loc, stbuf, + valid, xdata); + if (!stub) + goto unwind; - stub = fop_flush_stub (frame, wb_flush_helper, fd); - if (stub == NULL) { - op_errno = ENOMEM; - goto unwind; - } + if (!wb_enqueue (wb_inode, stub)) + goto unwind; - request = wb_enqueue (file, stub); - if (request == NULL) { - call_stub_destroy (stub); - op_errno = ENOMEM; - goto unwind; - } - - ret = wb_process_queue (frame, file); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); - } - } else { - if (conf->flush_behind) { - flush_frame = copy_frame (frame); - if (flush_frame == NULL) { - op_errno = ENOMEM; - goto unwind; - } - - STACK_UNWIND_STRICT (flush, frame, 0, 0); - - STACK_WIND (flush_frame, wb_ffr_bg_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, fd); - } else { - STACK_WIND (frame, wb_ffr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, fd); - } - } + wb_process_queue (wb_inode); return 0; - unwind: - STACK_UNWIND_STRICT (flush, frame, -1, op_errno); + STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL); + + if (stub) + call_stub_destroy (stub); + return 0; + +noqueue: + STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); return 0; } -static int32_t -wb_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf) +int +wb_fsetattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - wb_local_t *local = NULL; - wb_file_t *file = NULL; - wb_request_t *request = NULL; - int32_t ret = -1; + STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); + return 0; +} - GF_ASSERT (frame); - local = frame->local; - file = local->file; - request = local->request; +int +wb_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + wb_inode_t *wb_inode = NULL; + call_stub_t *stub = NULL; - if (file != NULL) { - LOCK (&file->lock); - { - if (file->op_ret == -1) { - op_ret = file->op_ret; - op_errno = file->op_errno; + wb_inode = wb_inode_ctx_get (this, fd->inode); + if (!wb_inode) + goto noqueue; - file->op_ret = 0; - } - } - UNLOCK (&file->lock); - - if (request) { - wb_request_unref (request); - ret = wb_process_queue (frame, file); - if (ret == -1) { - if (errno == ENOMEM) { - op_ret = -1; - op_errno = ENOMEM; - } - - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); - } - } + stub = fop_fsetattr_stub (frame, wb_fsetattr_helper, fd, stbuf, + valid, xdata); + if (!stub) + goto unwind; - } + if (!wb_enqueue (wb_inode, stub)) + goto unwind; - STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf); + wb_process_queue (wb_inode); return 0; -} +unwind: + STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL); + if (stub) + call_stub_destroy (stub); + return 0; -static int32_t -wb_fsync_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - int32_t datasync) -{ - STACK_WIND (frame, wb_fsync_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsync, fd, datasync); +noqueue: + STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata); return 0; } -int32_t -wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync) +int +wb_forget (xlator_t *this, inode_t *inode) { - wb_file_t *file = NULL; - wb_local_t *local = NULL; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - wb_request_t *request = NULL; - int32_t ret = -1, op_errno = 0; - - GF_ASSERT (frame); - GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, unwind, - op_errno, EINVAL); - GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, fd, unwind, - op_errno, EINVAL); - - if ((!IA_ISDIR (fd->inode->ia_type)) - && fd_ctx_get (fd, this, &tmp_file)) { - gf_log (this->name, GF_LOG_WARNING, - "write behind file pointer is" - " not stored in context of fd(%p), returning EBADFD", - fd); - op_errno = EBADFD; - goto unwind; - } - - file = (wb_file_t *)(long)tmp_file; - - local = GF_CALLOC (1, sizeof (*local), gf_wb_mt_wb_local_t); - if (local == NULL) { - op_errno = ENOMEM; - goto unwind; - } + uint64_t tmp = 0; + wb_inode_t *wb_inode = NULL; - local->file = file; + inode_ctx_del (inode, this, &tmp); - frame->local = local; + wb_inode = (wb_inode_t *)(long)tmp; - if (file) { - stub = fop_fsync_stub (frame, wb_fsync_helper, fd, datasync); - if (stub == NULL) { - op_errno = ENOMEM; - goto unwind; - } + if (!wb_inode) + return 0; - request = wb_enqueue (file, stub); - if (request == NULL) { - op_errno = ENOMEM; - call_stub_destroy (stub); - goto unwind; - } + GF_ASSERT (list_empty (&wb_inode->todo)); + GF_ASSERT (list_empty (&wb_inode->liability)); + GF_ASSERT (list_empty (&wb_inode->temptation)); - ret = wb_process_queue (frame, file); - if (ret == -1) { - gf_log (this->name, GF_LOG_WARNING, - "request queue processing failed"); - } - } else { - STACK_WIND (frame, wb_fsync_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsync, fd, datasync); - } + GF_FREE (wb_inode); return 0; - -unwind: - STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL); - return 0; } -int32_t +int wb_release (xlator_t *this, fd_t *fd) { - uint64_t file_ptr = 0; - wb_file_t *file = NULL; - - GF_VALIDATE_OR_GOTO ("write-behind", this, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - - fd_ctx_get (fd, this, &file_ptr); - file = (wb_file_t *) (long) file_ptr; + uint64_t tmp = 0; - if (file != NULL) { - LOCK (&file->lock); - { - GF_ASSERT (list_empty (&file->request)); - } - UNLOCK (&file->lock); - - wb_file_destroy (file); - } + fd_ctx_del (fd, this, &tmp); -out: return 0; } @@ -2678,7 +1800,6 @@ int wb_priv_dump (xlator_t *this) { wb_conf_t *conf = NULL; - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; int ret = -1; @@ -2692,18 +1813,10 @@ wb_priv_dump (xlator_t *this) gf_proc_dump_add_section (key_prefix); - gf_proc_dump_build_key (key, key_prefix, "aggregate_size"); - gf_proc_dump_write (key, "%d", conf->aggregate_size); - gf_proc_dump_build_key (key, key_prefix, "window_size"); - gf_proc_dump_write (key, "%d", conf->window_size); - gf_proc_dump_build_key (key, key_prefix, "disable_till"); - gf_proc_dump_write (key, "%d", conf->disable_till); - gf_proc_dump_build_key (key, key_prefix, "enable_O_SYNC"); - gf_proc_dump_write (key, "%d", conf->enable_O_SYNC); - gf_proc_dump_build_key (key, key_prefix, "flush_behind"); - gf_proc_dump_write (key, "%d", conf->flush_behind); - gf_proc_dump_build_key (key, key_prefix, "enable_trickling_writes"); - gf_proc_dump_write (key, "%d", conf->enable_trickling_writes); + gf_proc_dump_write ("aggregate_size", "%d", conf->aggregate_size); + gf_proc_dump_write ("window_size", "%d", conf->window_size); + gf_proc_dump_write ("flush_behind", "%d", conf->flush_behind); + gf_proc_dump_write ("trickling_writes", "%d", conf->trickling_writes); ret = 0; out: @@ -2712,155 +1825,111 @@ out: void -__wb_dump_requests (struct list_head *head, char *prefix, char passive) +__wb_dump_requests (struct list_head *head, char *prefix) { char key[GF_DUMP_MAX_BUF_LEN] = {0, }; char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }, flag = 0; - wb_request_t *request = NULL; + wb_request_t *req = NULL; - list_for_each_entry (request, head, list) { - gf_proc_dump_build_key (key, prefix, passive ? "passive-request" - : "active-request"); + list_for_each_entry (req, head, all) { gf_proc_dump_build_key (key_prefix, key, - gf_fop_list[request->fop]); + (char *)gf_fop_list[req->fop]); gf_proc_dump_add_section(key_prefix); - gf_proc_dump_build_key (key, key_prefix, "request-ptr"); - gf_proc_dump_write (key, "%p", request); - - gf_proc_dump_build_key (key, key_prefix, "refcount"); - gf_proc_dump_write (key, "%d", request->refcount); - - if (request->fop == GF_FOP_WRITE) { - flag = request->flags.write_request.stack_wound; - gf_proc_dump_build_key (key, key_prefix, "stack_wound"); - gf_proc_dump_write (key, "%d", flag); - - gf_proc_dump_build_key (key, key_prefix, "size"); - gf_proc_dump_write (key, "%"GF_PRI_SIZET, - request->write_size); - - gf_proc_dump_build_key (key, key_prefix, "offset"); - gf_proc_dump_write (key, "%"PRId64, - request->stub->args.writev.off); - - flag = request->flags.write_request.write_behind; - gf_proc_dump_build_key (key, key_prefix, - "write_behind"); - gf_proc_dump_write (key, "%d", flag); - - flag = request->flags.write_request.got_reply; - gf_proc_dump_build_key (key, key_prefix, "got_reply"); - gf_proc_dump_write (key, "%d", flag); - - flag = request->flags.write_request.virgin; - gf_proc_dump_build_key (key, key_prefix, "virgin"); - gf_proc_dump_write (key, "%d", flag); - - flag = request->flags.write_request.flush_all; - gf_proc_dump_build_key (key, key_prefix, "flush_all"); - gf_proc_dump_write (key, "%d", flag); - } else { - flag = request->flags.other_requests.marked_for_resume; - gf_proc_dump_build_key (key, key_prefix, - "marked_for_resume"); - gf_proc_dump_write (key, "%d", flag); + gf_proc_dump_write ("request-ptr", "%p", req); + + gf_proc_dump_write ("refcount", "%d", req->refcount); + + if (list_empty (&req->todo)) + gf_proc_dump_write ("wound", "yes"); + else + gf_proc_dump_write ("wound", "no"); + + if (req->fop == GF_FOP_WRITE) { + gf_proc_dump_write ("size", "%"GF_PRI_SIZET, + req->write_size); + + gf_proc_dump_write ("offset", "%"PRId64, + req->stub->args.offset); + + flag = req->ordering.lied; + gf_proc_dump_write ("lied", "%d", flag); + + flag = req->ordering.append; + gf_proc_dump_write ("append", "%d", flag); + + flag = req->ordering.fulfilled; + gf_proc_dump_write ("fulfilled", "%d", flag); + + flag = req->ordering.go; + gf_proc_dump_write ("go", "%d", flag); } } } int -wb_file_dump (xlator_t *this, fd_t *fd) +wb_inode_dump (xlator_t *this, inode_t *inode) { - wb_file_t *file = NULL; - uint64_t tmp_file = 0; - int32_t ret = -1; - char *path = NULL; - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; + wb_inode_t *wb_inode = NULL; + int32_t ret = -1; + char *path = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; + char uuid_str[64] = {0,}; - if ((fd == NULL) || (this == NULL)) { - ret = 0; - goto out; - } - - ret = fd_ctx_get (fd, this, &tmp_file); - if (ret == -1) { + if ((inode == NULL) || (this == NULL)) { ret = 0; goto out; } - file = (wb_file_t *)(long)tmp_file; - if (file == NULL) { + wb_inode = wb_inode_ctx_get (this, inode); + if (wb_inode == NULL) { ret = 0; goto out; } gf_proc_dump_build_key (key_prefix, "xlator.performance.write-behind", - "file"); + "wb_inode"); gf_proc_dump_add_section (key_prefix); - __inode_path (fd->inode, NULL, &path); + __inode_path (inode, NULL, &path); if (path != NULL) { - gf_proc_dump_build_key (key, key_prefix, "path"); - gf_proc_dump_write (key, "%s", path); + gf_proc_dump_write ("path", "%s", path); GF_FREE (path); } - gf_proc_dump_build_key (key, key_prefix, "fd"); - gf_proc_dump_write (key, "%p", fd); - - gf_proc_dump_build_key (key, key_prefix, "disabled"); - gf_proc_dump_write (key, "%d", file->disabled); + gf_proc_dump_write ("inode", "%p", inode); - gf_proc_dump_build_key (key, key_prefix, "disable_till"); - gf_proc_dump_write (key, "%lu", file->disable_till); + gf_proc_dump_write ("window_conf", "%"GF_PRI_SIZET, + wb_inode->window_conf); - gf_proc_dump_build_key (key, key_prefix, "window_conf"); - gf_proc_dump_write (key, "%"GF_PRI_SIZET, file->window_conf); + gf_proc_dump_write ("window_current", "%"GF_PRI_SIZET, + wb_inode->window_current); - gf_proc_dump_build_key (key, key_prefix, "window_current"); - gf_proc_dump_write (key, "%"GF_PRI_SIZET, file->window_current); - gf_proc_dump_build_key (key, key_prefix, "flags"); - gf_proc_dump_write (key, "%s", (file->flags & O_APPEND) ? "O_APPEND" - : "!O_APPEND"); - - gf_proc_dump_build_key (key, key_prefix, "aggregate_current"); - gf_proc_dump_write (key, "%"GF_PRI_SIZET, file->aggregate_current); - - gf_proc_dump_build_key (key, key_prefix, "refcount"); - gf_proc_dump_write (key, "%d", file->refcount); - - gf_proc_dump_build_key (key, key_prefix, "op_ret"); - gf_proc_dump_write (key, "%d", file->op_ret); - - gf_proc_dump_build_key (key, key_prefix, "op_errno"); - gf_proc_dump_write (key, "%d", file->op_errno); - - LOCK (&file->lock); + ret = TRY_LOCK (&wb_inode->lock); + if (!ret) { - if (!list_empty (&file->request)) { - __wb_dump_requests (&file->request, key_prefix, 0); - } - - if (!list_empty (&file->passive_requests)) { - __wb_dump_requests (&file->passive_requests, key_prefix, - 1); + if (!list_empty (&wb_inode->all)) { + __wb_dump_requests (&wb_inode->all, key_prefix); } + UNLOCK (&wb_inode->lock); } - UNLOCK (&file->lock); + if (ret && wb_inode) + gf_proc_dump_write ("Unable to dump the inode information", + "(Lock acquisition failed) %p (gfid: %s)", + wb_inode, + uuid_utoa_r (inode->gfid, uuid_str)); ret = 0; out: return ret; } -int32_t +int mem_acct_init (xlator_t *this) { int ret = -1; @@ -2894,6 +1963,14 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("flush-behind", conf->flush_behind, options, bool, out); + GF_OPTION_RECONF ("trickling-writes", conf->trickling_writes, options, + bool, out); + + GF_OPTION_RECONF ("strict-O_DIRECT", conf->strict_O_DIRECT, options, + bool, out); + + GF_OPTION_RECONF ("strict-write-ordering", conf->strict_write_ordering, + options, bool, out); ret = 0; out: return ret; @@ -2916,7 +1993,7 @@ init (xlator_t *this) if (this->parents == NULL) { gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile"); + "dangling volume. check volfilex"); } conf = GF_CALLOC (1, sizeof (*conf), gf_wb_mt_wb_conf_t); @@ -2924,14 +2001,9 @@ init (xlator_t *this) goto out; } - GF_OPTION_INIT("enable-O_SYNC", conf->enable_O_SYNC, bool, out); - /* configure 'options aggregate-size <size>' */ conf->aggregate_size = WB_AGGREGATE_SIZE; - GF_OPTION_INIT("disable-for-first-nbytes", conf->disable_till, size, - out); - /* configure 'option window-size <size>' */ GF_OPTION_INIT ("cache-size", conf->window_size, size, out); @@ -2954,16 +2026,19 @@ init (xlator_t *this) /* configure 'option flush-behind <on/off>' */ GF_OPTION_INIT ("flush-behind", conf->flush_behind, bool, out); - GF_OPTION_INIT ("enable-trickling-writes", conf->enable_trickling_writes, - bool, out); + GF_OPTION_INIT ("trickling-writes", conf->trickling_writes, bool, out); + + GF_OPTION_INIT ("strict-O_DIRECT", conf->strict_O_DIRECT, bool, out); + + GF_OPTION_INIT ("strict-write-ordering", conf->strict_write_ordering, + bool, out); this->private = conf; ret = 0; out: if (ret) { - if (conf) - GF_FREE (conf); + GF_FREE (conf); } return ret; } @@ -2991,8 +2066,6 @@ out: struct xlator_fops fops = { .writev = wb_writev, - .open = wb_open, - .create = wb_create, .readv = wb_readv, .flush = wb_flush, .fsync = wb_fsync, @@ -3001,17 +2074,22 @@ struct xlator_fops fops = { .truncate = wb_truncate, .ftruncate = wb_ftruncate, .setattr = wb_setattr, + .fsetattr = wb_fsetattr, }; + struct xlator_cbks cbks = { + .forget = wb_forget, .release = wb_release }; + struct xlator_dumpops dumpops = { .priv = wb_priv_dump, - .fdctx = wb_file_dump, + .inodectx = wb_inode_dump, }; + struct volume_options options[] = { { .key = {"flush-behind"}, .type = GF_OPTION_TYPE_BOOL, @@ -3020,29 +2098,31 @@ struct volume_options options[] = { "translator to perform flush in background, by " "returning success (or any errors, if any of " "previous writes were failed) to application even " - "before flush is sent to backend filesystem. " + "before flush FOP is sent to backend filesystem. " }, { .key = {"cache-size", "window-size"}, .type = GF_OPTION_TYPE_SIZET, .min = 512 * GF_UNIT_KB, .max = 1 * GF_UNIT_GB, .default_value = "1MB", - .description = "Size of the per-file write-behind buffer. " - - }, - { .key = {"disable-for-first-nbytes"}, - .type = GF_OPTION_TYPE_SIZET, - .min = 0, - .max = 1 * GF_UNIT_MB, - .default_value = "0", + .description = "Size of the write-behind buffer for a single file " + "(inode)." }, - { .key = {"enable-O_SYNC"}, + { .key = {"trickling-writes"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "on", }, - { .key = {"enable-trickling-writes"}, + { .key = {"strict-O_DIRECT"}, .type = GF_OPTION_TYPE_BOOL, - .default_value = "on", + .default_value = "off", + .description = "This option when set to off, ignores the " + "O_DIRECT flag." + }, + { .key = {"strict-write-ordering"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "Do not let later writes overtake earlier writes even " + "if they do not overlap", }, { .key = {NULL} }, }; |
