diff options
Diffstat (limited to 'xlators/performance/io-cache/src')
| -rw-r--r-- | xlators/performance/io-cache/src/Makefile.am | 8 | ||||
| -rw-r--r-- | xlators/performance/io-cache/src/io-cache.c | 894 | ||||
| -rw-r--r-- | xlators/performance/io-cache/src/io-cache.h | 34 | ||||
| -rw-r--r-- | xlators/performance/io-cache/src/ioc-inode.c | 22 | ||||
| -rw-r--r-- | xlators/performance/io-cache/src/ioc-mem-types.h | 20 | ||||
| -rw-r--r-- | xlators/performance/io-cache/src/page.c | 74 |
6 files changed, 499 insertions, 553 deletions
diff --git a/xlators/performance/io-cache/src/Makefile.am b/xlators/performance/io-cache/src/Makefile.am index 6dd270e8f..155be9988 100644 --- a/xlators/performance/io-cache/src/Makefile.am +++ b/xlators/performance/io-cache/src/Makefile.am @@ -1,14 +1,16 @@ xlator_LTLIBRARIES = io-cache.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance -io_cache_la_LDFLAGS = -module -avoidversion +io_cache_la_LDFLAGS = -module -avoid-version io_cache_la_SOURCES = io-cache.c page.c ioc-inode.c io_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la noinst_HEADERS = io-cache.h ioc-mem-types.h -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ - -I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/rbtree -shared -nostartfiles $(GF_CFLAGS) +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(CONTRIBDIR)/rbtree + +AM_CFLAGS = -Wall $(GF_CFLAGS) CLEANFILES = diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c index a8c66ddf3..201777b38 100644 --- a/xlators/performance/io-cache/src/io-cache.c +++ b/xlators/performance/io-cache/src/io-cache.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef _CONFIG_H @@ -37,13 +28,10 @@ int ioc_log2_page_size; uint32_t ioc_get_priority (ioc_table_t *table, const char *path); -uint32_t -ioc_get_priority (ioc_table_t *table, const char *path); - struct volume_options options[]; -inline uint32_t +static inline uint32_t ioc_hashfn (void *data, int len) { off_t offset; @@ -53,7 +41,7 @@ ioc_hashfn (void *data, int len) return (offset >> ioc_log2_page_size); } -inline ioc_inode_t * +static inline ioc_inode_t * ioc_inode_reupdate (ioc_inode_t *ioc_inode) { ioc_table_t *table = NULL; @@ -66,7 +54,7 @@ ioc_inode_reupdate (ioc_inode_t *ioc_inode) return ioc_inode; } -inline ioc_inode_t * +static inline ioc_inode_t * ioc_get_inode (dict_t *dict, char *name) { ioc_inode_t *ioc_inode = NULL; @@ -157,15 +145,16 @@ ioc_inode_flush (ioc_inode_t *ioc_inode) int32_t ioc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop) + struct iatt *preop, struct iatt *postop, dict_t *xdata) { - STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop); + STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, + xdata); return 0; } int32_t ioc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid) + struct iatt *stbuf, int32_t valid, dict_t *xdata) { uint64_t ioc_inode = 0; @@ -177,7 +166,7 @@ ioc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); STACK_WIND (frame, ioc_setattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid); + FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata); return 0; } @@ -185,7 +174,7 @@ ioc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t ioc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *stbuf, dict_t *dict, struct iatt *postparent) + struct iatt *stbuf, dict_t *xdata, struct iatt *postparent) { ioc_inode_t *ioc_inode = NULL; ioc_table_t *table = NULL; @@ -264,19 +253,18 @@ out: } STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, stbuf, - dict, postparent); + xdata, postparent); return 0; } int32_t ioc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xattr_req) + dict_t *xdata) { ioc_local_t *local = NULL; int32_t op_errno = -1, ret = -1; - local = GF_CALLOC (1, sizeof (*local), - gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { op_errno = ENOMEM; gf_log (this->name, GF_LOG_ERROR, "out of memory"); @@ -293,7 +281,7 @@ ioc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, frame->local = local; STACK_WIND (frame, ioc_lookup_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->lookup, loc, xattr_req); + FIRST_CHILD (this)->fops->lookup, loc, xdata); return 0; @@ -325,6 +313,20 @@ ioc_forget (xlator_t *this, inode_t *inode) return 0; } +static int32_t +ioc_invalidate(xlator_t *this, inode_t *inode) +{ + uint64_t ioc_addr = 0; + ioc_inode_t *ioc_inode = NULL; + + inode_ctx_get(inode, this, (uint64_t *) &ioc_addr); + ioc_inode = (void *) ioc_addr; + + if (ioc_inode) + ioc_inode_flush(ioc_inode); + + return 0; +} /* * ioc_cache_validate_cbk - @@ -339,7 +341,8 @@ ioc_forget (xlator_t *this, inode_t *inode) */ int32_t ioc_cache_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *stbuf) + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + dict_t *xdata) { ioc_local_t *local = NULL; ioc_inode_t *ioc_inode = NULL; @@ -455,8 +458,7 @@ ioc_cache_validate (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, int32_t ret = 0; local = frame->local; - validate_local = GF_CALLOC (1, sizeof (ioc_local_t), - gf_ioc_mt_ioc_local_t); + validate_local = mem_get0 (THIS->local_pool); if (validate_local == NULL) { ret = -1; local->op_ret = -1; @@ -471,7 +473,7 @@ ioc_cache_validate (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, ret = -1; local->op_ret = -1; local->op_errno = ENOMEM; - GF_FREE (validate_local); + mem_put (validate_local); gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR, "out of memory"); goto out; @@ -483,13 +485,13 @@ ioc_cache_validate (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, STACK_WIND (validate_frame, ioc_cache_validate_cbk, FIRST_CHILD (frame->this), - FIRST_CHILD (frame->this)->fops->fstat, fd); + FIRST_CHILD (frame->this)->fops->fstat, fd, NULL); out: return ret; } -inline uint32_t +static inline uint32_t is_match (const char *path, const char *pattern) { int32_t ret = 0; @@ -530,7 +532,7 @@ ioc_get_priority (ioc_table_t *table, const char *path) */ int32_t ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, fd_t *fd) + int32_t op_errno, fd_t *fd, dict_t *xdata) { uint64_t tmp_ioc_inode = 0; ioc_local_t *local = NULL; @@ -551,6 +553,13 @@ ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, inode_ctx_get (fd->inode, this, &tmp_ioc_inode); ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; + //TODO: see why inode context is NULL and handle it. + if (!ioc_inode) { + gf_log (this->name, GF_LOG_ERROR, "inode context is " + "NULL (%s)", uuid_utoa (fd->inode->gfid)); + goto out; + } + ioc_table_lock (ioc_inode->table); { list_move_tail (&ioc_inode->inode_lru, @@ -575,10 +584,6 @@ ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, */ fd_ctx_set (fd, this, 1); } - if ((local->wbflags & GF_OPEN_NOWB) != 0) { - /* disable caching as asked by NFS */ - fd_ctx_set (fd, this, 1); - } /* weight = 0, we disable caching on it */ if (weight == 0) { @@ -589,10 +594,10 @@ ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, } out: - GF_FREE (local); + mem_put (local); frame->local = NULL; - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd); + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); return 0; } @@ -614,7 +619,7 @@ int32_t ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { ioc_local_t *local = NULL; ioc_table_t *table = NULL; @@ -649,6 +654,10 @@ ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, || ((table->max_file_size > 0) && (table->max_file_size < ioc_inode->ia_size))) { ret = fd_ctx_set (fd, this, 1); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set fd ctx", + local->file_loc.path); } } ioc_inode_unlock (ioc_inode); @@ -657,25 +666,35 @@ ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, (uint64_t)(long)ioc_inode); /* If O_DIRECT open, we disable caching on it */ - if (local->flags & O_DIRECT) + if (local->flags & O_DIRECT) { /* * O_DIRECT is only for one fd, not the inode * as a whole */ ret = fd_ctx_set (fd, this, 1); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set fd ctx", + local->file_loc.path); + } /* if weight == 0, we disable caching on it */ - if (!weight) + if (!weight) { /* we allow a pattern-matched cache disable this way */ ret = fd_ctx_set (fd, this, 1); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set fd ctx", + local->file_loc.path); + } } out: frame->local = NULL; - GF_FREE (local); + mem_put (local); STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, - preparent, postparent); + preparent, postparent, xdata); return 0; } @@ -685,7 +704,7 @@ int32_t ioc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { ioc_local_t *local = NULL; ioc_table_t *table = NULL; @@ -725,23 +744,22 @@ out: frame->local = NULL; loc_wipe (&local->file_loc); - GF_FREE (local); + mem_put (local); STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf, - preparent, postparent); + preparent, postparent, xdata); return 0; } int ioc_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, dict_t *params) + dev_t rdev, mode_t umask, dict_t *xdata) { ioc_local_t *local = NULL; int32_t op_errno = -1, ret = -1; - local = GF_CALLOC (1, sizeof (*local), - gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { op_errno = ENOMEM; gf_log (this->name, GF_LOG_ERROR, "out of memory"); @@ -760,17 +778,17 @@ ioc_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, STACK_WIND (frame, ioc_mknod_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, - loc, mode, rdev, params); + loc, mode, rdev, umask, xdata); return 0; unwind: if (local != NULL) { loc_wipe (&local->file_loc); - GF_FREE (local); + mem_put (local); } STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL, - NULL, NULL); + NULL, NULL, NULL); return 0; } @@ -786,27 +804,27 @@ unwind: */ int32_t ioc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, int32_t wbflags) + fd_t *fd, dict_t *xdata) { ioc_local_t *local = NULL; - local = GF_CALLOC (1, sizeof (ioc_local_t), gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { gf_log (this->name, GF_LOG_ERROR, "out of memory"); - STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL); + STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL, NULL); return 0; } local->flags = flags; local->file_loc.path = loc->path; local->file_loc.inode = loc->inode; - local->wbflags = wbflags; frame->local = local; STACK_WIND (frame, ioc_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags); + FIRST_CHILD(this)->fops->open, loc, flags, fd, + xdata); return 0; } @@ -823,15 +841,15 @@ ioc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, */ int32_t ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, fd_t *fd, dict_t *params) + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { ioc_local_t *local = NULL; - local = GF_CALLOC (1, sizeof (ioc_local_t), gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { gf_log (this->name, GF_LOG_ERROR, "out of memory"); STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, NULL, NULL, - NULL, NULL, NULL); + NULL, NULL, NULL, NULL); return 0; } @@ -841,7 +859,7 @@ ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, STACK_WIND (frame, ioc_create_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, loc, flags, mode, - fd, params); + umask, fd, xdata); return 0; } @@ -878,10 +896,10 @@ int32_t ioc_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, int32_t count, struct iatt *stbuf, - struct iobref *iobref) + struct iobref *iobref, dict_t *xdata) { STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, - stbuf, iobref); + stbuf, iobref, xdata); return 0; } @@ -988,7 +1006,8 @@ ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, "cache hit for trav_offset=%" PRId64"/local_offset=%"PRId64"", trav_offset, local_offset); - waitq = __ioc_page_wakeup (trav); + waitq = __ioc_page_wakeup (trav, + trav->op_errno); } else { /* if waitq already exists, fstat * revalidate is @@ -1005,7 +1024,8 @@ ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, local->op_errno = -ret; need_validate = 0; - waitq = __ioc_page_wakeup (trav); + waitq = __ioc_page_wakeup (trav, + trav->op_errno); ioc_inode_unlock (ioc_inode); ioc_waitq_return (waitq); @@ -1031,13 +1051,14 @@ ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, need_validate = 0; gf_log (frame->this->name, GF_LOG_TRACE, "sending validate request for " - "inode(%"PRId64") at offset=%"PRId64"", - fd->inode->ino, trav_offset); + "inode(%s) at offset=%"PRId64"", + uuid_utoa (fd->inode->gfid), trav_offset); ret = ioc_cache_validate (frame, ioc_inode, fd, trav); if (ret == -1) { ioc_inode_lock (ioc_inode); { - waitq = __ioc_page_wakeup (trav); + waitq = __ioc_page_wakeup (trav, + trav->op_errno); } ioc_inode_unlock (ioc_inode); @@ -1073,14 +1094,13 @@ out: */ int32_t ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t offset) + size_t size, off_t offset, uint32_t flags, dict_t *xdata) { uint64_t tmp_ioc_inode = 0; ioc_inode_t *ioc_inode = NULL; ioc_local_t *local = NULL; uint32_t weight = 0; ioc_table_t *table = NULL; - uint32_t num_pages = 0; int32_t op_errno = -1; if (!this) { @@ -1094,7 +1114,7 @@ ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, STACK_WIND (frame, ioc_readv_disabled_cbk, FIRST_CHILD (frame->this), FIRST_CHILD (frame->this)->fops->readv, fd, size, - offset); + offset, flags, xdata); return 0; } @@ -1107,29 +1127,6 @@ ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, goto out; } - - ioc_table_lock (table); - { - if (!table->mem_pool) { - - num_pages = (table->cache_size / table->page_size) - + ((table->cache_size % table->page_size) - ? 1 : 0); - - table->mem_pool - = mem_pool_new (rbthash_entry_t, num_pages); - - if (!table->mem_pool) { - gf_log (this->name, GF_LOG_ERROR, - "Unable to allocate mem_pool"); - op_errno = ENOMEM; - ioc_table_unlock (table); - goto out; - } - } - } - ioc_table_unlock (table); - ioc_inode_lock (ioc_inode); { if (!ioc_inode->cache.page_table) { @@ -1153,12 +1150,11 @@ ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, STACK_WIND (frame, ioc_readv_disabled_cbk, FIRST_CHILD (frame->this), FIRST_CHILD (frame->this)->fops->readv, fd, size, - offset); + offset, flags, xdata); return 0; } - local = (ioc_local_t *) GF_CALLOC (1, sizeof (ioc_local_t), - gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { gf_log (this->name, GF_LOG_ERROR, "out of memory"); op_errno = ENOMEM; @@ -1191,7 +1187,8 @@ ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, return 0; out: - STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL); + STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, + NULL); return 0; } @@ -1208,7 +1205,7 @@ out: int32_t ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { ioc_local_t *local = NULL; uint64_t ioc_inode = 0; @@ -1219,7 +1216,8 @@ ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (ioc_inode) ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf); + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); return 0; } @@ -1237,16 +1235,16 @@ ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, - struct iobref *iobref) + uint32_t flags, struct iobref *iobref, dict_t *xdata) { ioc_local_t *local = NULL; uint64_t ioc_inode = 0; - local = GF_CALLOC (1, sizeof (ioc_local_t), gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { gf_log (this->name, GF_LOG_ERROR, "out of memory"); - STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL); + STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } @@ -1260,7 +1258,7 @@ ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, STACK_WIND (frame, ioc_writev_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, - iobref); + flags, iobref, xdata); return 0; } @@ -1279,11 +1277,11 @@ ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t ioc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, - postbuf); + postbuf, xdata); return 0; } @@ -1302,11 +1300,11 @@ ioc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t ioc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, - postbuf); + postbuf, xdata); return 0; } @@ -1321,7 +1319,8 @@ ioc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, * */ int32_t -ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) +ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { uint64_t ioc_inode = 0; @@ -1331,7 +1330,7 @@ ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); STACK_WIND (frame, ioc_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset); + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); return 0; } @@ -1345,7 +1344,8 @@ ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) * */ int32_t -ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) +ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { uint64_t ioc_inode = 0; @@ -1355,21 +1355,21 @@ ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); STACK_WIND (frame, ioc_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset); + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); return 0; } int32_t ioc_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct gf_flock *lock) + int32_t op_errno, struct gf_flock *lock, dict_t *xdata) { - STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock); + STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata); return 0; } int32_t ioc_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, - struct gf_flock *lock) + struct gf_flock *lock, dict_t *xdata) { ioc_inode_t *ioc_inode = NULL; uint64_t tmp_inode = 0; @@ -1379,7 +1379,7 @@ ioc_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, if (!ioc_inode) { gf_log (this->name, GF_LOG_DEBUG, "inode context is NULL: returning EBADFD"); - STACK_UNWIND_STRICT (lk, frame, -1, EBADFD, NULL); + STACK_UNWIND_STRICT (lk, frame, -1, EBADFD, NULL, NULL); return 0; } @@ -1390,11 +1390,92 @@ ioc_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, ioc_inode_unlock (ioc_inode); STACK_WIND (frame, ioc_lk_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->lk, fd, cmd, lock); + FIRST_CHILD (this)->fops->lk, fd, cmd, lock, xdata); return 0; } +int +ioc_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata) +{ + gf_dirent_t *entry = NULL; + + if (op_ret <= 0) + goto unwind; + + list_for_each_entry (entry, &entries->list, list) { + /* TODO: fill things */ + } + +unwind: + STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata); + + return 0; +} +int +ioc_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *dict) +{ + STACK_WIND (frame, ioc_readdirp_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, + fd, size, offset, dict); + + return 0; +} + +static int32_t +ioc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata); + return 0; +} + +static int32_t +ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + uint64_t ioc_inode = 0; + + inode_ctx_get (fd->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND(frame, ioc_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); + return 0; +} + +static int32_t +ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + STACK_UNWIND_STRICT(zerofill, frame, op_ret, + op_errno, pre, post, xdata); + return 0; +} + +static int32_t +ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + uint64_t ioc_inode = 0; + + inode_ctx_get (fd->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + return 0; +} + + int32_t ioc_get_priority_list (const char *opt_str, struct list_head *first) { @@ -1475,13 +1556,9 @@ ioc_get_priority_list (const char *opt_str, struct list_head *first) stripe_str = strtok_r (NULL, ",", &tmp_str); } out: - if (string != NULL) { - GF_FREE (string); - } + GF_FREE (string); - if (dup_str != NULL) { - GF_FREE (dup_str); - } + GF_FREE (dup_str); if (max_pri == -1) { list_for_each_entry_safe (curr, tmp, first, list) { @@ -1513,32 +1590,41 @@ mem_acct_init (xlator_t *this) return ret; } -int -validate_options (xlator_t *this, char **op_errstr) -{ - int ret = 0; - volume_opt_list_t *vol_opt = NULL; - volume_opt_list_t *tmp = NULL; - if (!this) { - gf_log (this->name, GF_LOG_DEBUG, "'this' not a valid ptr"); - ret =-1; +static gf_boolean_t +check_cache_size_ok (xlator_t *this, uint64_t cache_size) +{ + gf_boolean_t ret = _gf_true; + uint64_t total_mem = 0; + uint64_t max_cache_size = 0; + volume_option_t *opt = NULL; + + GF_ASSERT (this); + opt = xlator_volume_option_get (this, "cache-size"); + if (!opt) { + ret = _gf_false; + gf_log (this->name, GF_LOG_ERROR, + "could not get cache-size option"); goto out; } - if (list_empty (&this->volume_options)) - goto out; + total_mem = get_mem_size (); + if (-1 == total_mem) + max_cache_size = opt->max; + else + max_cache_size = total_mem; - vol_opt = list_entry (this->volume_options.next, - volume_opt_list_t, list); - list_for_each_entry_safe (vol_opt, tmp, &this->volume_options, list) { - ret = validate_xlator_volume_options_attacherr (this, - vol_opt->given_opt, - op_errstr); - } + gf_log (this->name, GF_LOG_DEBUG, "Max cache size is %"PRIu64, + max_cache_size); + if (cache_size > max_cache_size) { + ret = _gf_false; + gf_log (this->name, GF_LOG_ERROR, "Cache size %"PRIu64 + " is greater than the max size of %"PRIu64, + cache_size, max_cache_size); + goto out; + } out: - return ret; } @@ -1547,14 +1633,8 @@ reconfigure (xlator_t *this, dict_t *options) { data_t *data = NULL; ioc_table_t *table = NULL; - int32_t cache_timeout = 0; - int64_t min_file_size = 0; - int64_t max_file_size = 0; - char *tmp = NULL; - uint64_t cache_size = 0; - char *cache_size_string = NULL; - int ret = 0; - + int ret = -1; + uint64_t cache_size_new = 0; if (!this || !this->private) goto out; @@ -1562,72 +1642,8 @@ reconfigure (xlator_t *this, dict_t *options) ioc_table_lock (table); { - data = dict_get (options, "cache-timeout"); - if (data) { - cache_timeout = data_to_uint32 (data); - if (cache_timeout < 0){ - gf_log (this->name, GF_LOG_WARNING, - "cache-timeout %d seconds invalid," - " has to be >=0", cache_timeout); - goto out; - } - - - if (cache_timeout > 60){ - gf_log (this->name, GF_LOG_WARNING, - "cache-timeout %d seconds invalid," - " has to be <=60", cache_timeout); - goto out; - } - - table->cache_timeout = cache_timeout; - - gf_log (this->name, GF_LOG_DEBUG, - "Reconfiguring %d seconds to" - " revalidate cache", table->cache_timeout); - } else - table->cache_timeout = 1; - - data = dict_get (options, "cache-size"); - if (data) - cache_size_string = data_to_str (data); - - if (cache_size_string) { - if (gf_string2bytesize (cache_size_string, - &cache_size) != 0) { - gf_log ("io-cache", GF_LOG_ERROR, - "invalid number format \"%s\" of " - "\"option cache-size\" Defaulting" - "to old value", cache_size_string); - goto out; - } - - if (cache_size < (4 * GF_UNIT_MB)) { - gf_log(this->name, GF_LOG_ERROR, - "Reconfiguration" - "'option cache-size %s' failed , " - "Max value can be 4MiB, Defaulting to " - "old value (%"PRIu64")", - cache_size_string, table->cache_size); - goto out; - } - - if (cache_size > (6 * GF_UNIT_GB)) { - gf_log (this->name, GF_LOG_ERROR, - "Reconfiguration" - "'option cache-size %s' failed , " - "Max value can be 6GiB, Defaulting to " - "old value (%"PRIu64")", - cache_size_string, table->cache_size); - goto out; - } - - - gf_log (this->name, GF_LOG_DEBUG, "Reconfiguring " - " cache-size %"PRIu64"", cache_size); - table->cache_size = cache_size; - } else - table->cache_size = IOC_CACHE_SIZE; + GF_OPTION_RECONF ("cache-timeout", table->cache_timeout, + options, int32, unlock); data = dict_get (options, "priority"); if (data) { @@ -1640,80 +1656,46 @@ reconfigure (xlator_t *this, dict_t *options) &table->priority_list); if (table->max_pri == -1) { - ret = -1; - goto out; + goto unlock; } table->max_pri ++; } - min_file_size = table->min_file_size; - data = dict_get (options, "min-file-size"); - if (data) { - tmp = data_to_str (data); - if (tmp != NULL) { - if (gf_string2bytesize (tmp, - (uint64_t *)&min_file_size) - != 0) { - gf_log ("io-cache", GF_LOG_ERROR, - "invalid number format \"%s\" of " - "\"option min-file-size\"", tmp); - ret = -1; - goto out; - } + GF_OPTION_RECONF ("max-file-size", table->max_file_size, + options, size, unlock); - gf_log (this->name, GF_LOG_DEBUG, - "Reconfiguring min-file-size %"PRIu64"", - table->min_file_size); - } - } + GF_OPTION_RECONF ("min-file-size", table->min_file_size, + options, size, unlock); - max_file_size = table->max_file_size; - data = dict_get (options, "max-file-size"); - if (data) { - tmp = data_to_str (data); - if (tmp != NULL) { - if (gf_string2bytesize (tmp, - (uint64_t *)&max_file_size) - != 0) { - gf_log ("io-cache", GF_LOG_ERROR, - "invalid number format \"%s\" of " - "\"option max-file-size\"", tmp); - ret = -1; - goto out; - } - - gf_log (this->name, GF_LOG_DEBUG, - "Reconfiguring max-file-size %"PRIu64"", - table->max_file_size); - } - } - - if ((max_file_size >= 0) && (min_file_size > max_file_size)) { - gf_log ("io-cache", GF_LOG_ERROR, "minimum size (%" + if ((table->max_file_size >= 0) && + (table->min_file_size > table->max_file_size)) { + gf_log (this->name, GF_LOG_ERROR, "minimum size (%" PRIu64") of a file that can be cached is " "greater than maximum size (%"PRIu64"). " "Hence Defaulting to old value", table->min_file_size, table->max_file_size); - goto out; + goto unlock; } - table->min_file_size = min_file_size; - table->max_file_size = max_file_size; - data = dict_get (options, "min-file-size"); - if (data && !data_to_str (data)) - table->min_file_size = 0; + GF_OPTION_RECONF ("cache-size", cache_size_new, + options, size, unlock); + if (!check_cache_size_ok (this, cache_size_new)) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Not reconfiguring cache-size"); + goto unlock; + } + table->cache_size = cache_size_new; - data = dict_get (options, "max-file-size"); - if (data && !data_to_str (data)) - table->max_file_size = 0; + ret = 0; } - +unlock: ioc_table_unlock (table); out: return ret; - } + /* * init - * @this: @@ -1725,11 +1707,10 @@ init (xlator_t *this) ioc_table_t *table = NULL; dict_t *xl_options = NULL; uint32_t index = 0; - char *cache_size_string = NULL, *tmp = NULL; int32_t ret = -1; glusterfs_ctx_t *ctx = NULL; data_t *data = 0; - char *def_val = NULL; + uint32_t num_pages = 0; xl_options = this->options; @@ -1754,60 +1735,17 @@ init (xlator_t *this) table->xl = this; table->page_size = this->ctx->page_size; - if (xlator_get_volopt_info (&this->volume_options, "cache-size", - &def_val, NULL)) { - gf_log (this->name, GF_LOG_ERROR, "Default value of cache-size " - "not found"); - ret = -1; - goto out; - } else { - if (gf_string2bytesize (def_val, &table->cache_size)) { - gf_log (this->name, GF_LOG_ERROR, "Default value of " - "cache-size corrupt"); - ret = -1; - goto out; - } - } + GF_OPTION_INIT ("cache-size", table->cache_size, size, out); - data = dict_get (xl_options, "cache-size"); - if (data) - cache_size_string = data_to_str (data); + GF_OPTION_INIT ("cache-timeout", table->cache_timeout, int32, out); - if (cache_size_string) { - if (gf_string2bytesize (cache_size_string, - &table->cache_size) != 0) { - gf_log ("io-cache", GF_LOG_ERROR, - "invalid number format \"%s\" of " - "\"option cache-size\"", - cache_size_string); - goto out; - } + GF_OPTION_INIT ("min-file-size", table->min_file_size, size, out); - gf_log (this->name, GF_LOG_TRACE, - "using cache-size %"PRIu64"", table->cache_size); - } + GF_OPTION_INIT ("max-file-size", table->max_file_size, size, out); - if (xlator_get_volopt_info (&this->volume_options, "cache-timeout", - &def_val, NULL)) { - gf_log (this->name, GF_LOG_ERROR, "Default value of " - "cache-timeout not found"); + if (!check_cache_size_ok (this, table->cache_size)) { ret = -1; goto out; - } else { - if (gf_string2int32 (def_val, &table->cache_timeout)) { - gf_log (this->name, GF_LOG_ERROR, "Default value of " - "cache-timeout corrupt"); - ret = -1; - goto out; - } - } - - data = dict_get (xl_options, "cache-timeout"); - if (data) { - table->cache_timeout = data_to_uint32 (data); - gf_log (this->name, GF_LOG_TRACE, - "Using %d seconds to revalidate cache", - table->cache_timeout); } INIT_LIST_HEAD (&table->priority_list); @@ -1827,74 +1765,6 @@ init (xlator_t *this) } table->max_pri ++; - if (xlator_get_volopt_info (&this->volume_options, "min-file-size", - &def_val, NULL)) { - gf_log (this->name, GF_LOG_ERROR, "Default value of " - "min-file-size not found"); - ret = -1; - goto out; - } else { - if (gf_string2bytesize (def_val, - (uint64_t *) &table->min_file_size)) { - gf_log (this->name, GF_LOG_ERROR, "Default value of " - "min-file-size corrupt"); - ret = -1; - goto out; - } - } - - data = dict_get (xl_options, "min-file-size"); - if (data) - tmp = data_to_str (data); - - if (tmp != NULL) { - if (gf_string2bytesize (tmp, - (uint64_t *)&table->min_file_size) - != 0) { - gf_log ("io-cache", GF_LOG_ERROR, - "invalid number format \"%s\" of " - "\"option min-file-size\"", tmp); - goto out; - } - - gf_log (this->name, GF_LOG_TRACE, - "using min-file-size %"PRIu64"", table->min_file_size); - } - - if (xlator_get_volopt_info (&this->volume_options, "max-file-size", - &def_val, NULL)) { - gf_log (this->name, GF_LOG_ERROR, "Default value of " - "max-file-size not found"); - ret = -1; - goto out; - } else { - if (gf_string2bytesize (def_val, - (uint64_t *) &table->max_file_size)) { - gf_log (this->name, GF_LOG_ERROR, "Default value of " - "max-file-size corrupt"); - ret = -1; - goto out; - } - } - - tmp = NULL; - data = dict_get (xl_options, "max-file-size"); - if (data) - tmp = data_to_str (data); - - if (tmp != NULL) { - if (gf_string2bytesize (tmp, - (uint64_t *)&table->max_file_size) - != 0) { - gf_log ("io-cache", GF_LOG_ERROR, - "invalid number format \"%s\" of " - "\"option max-file-size\"", tmp); - goto out; - } - - gf_log (this->name, GF_LOG_TRACE, - "using max-file-size %"PRIu64"", table->max_file_size); - } INIT_LIST_HEAD (&table->inodes); if ((table->max_file_size >= 0) @@ -1916,8 +1786,28 @@ init (xlator_t *this) for (index = 0; index < (table->max_pri); index++) INIT_LIST_HEAD (&table->inode_lru[index]); + this->local_pool = mem_pool_new (ioc_local_t, 64); + if (!this->local_pool) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + goto out; + } + pthread_mutex_init (&table->table_lock, NULL); this->private = table; + + num_pages = (table->cache_size / table->page_size) + + ((table->cache_size % table->page_size) + ? 1 : 0); + + table->mem_pool = mem_pool_new (rbthash_entry_t, num_pages); + if (!table->mem_pool) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to allocate mem_pool"); + goto out; + } + ret = 0; ctx = this->ctx; @@ -1946,8 +1836,7 @@ ioc_page_waitq_dump (ioc_page_t *page, char *prefix) while (trav) { frame = trav->data; - gf_proc_dump_build_key (key, prefix, - "waitq.frame[%d]", i++); + sprintf (key, "waitq.frame[%d]", i++); gf_proc_dump_write (key, "%"PRId64, frame->root->unique); trav = trav->next; @@ -1966,9 +1855,8 @@ __ioc_inode_waitq_dump (ioc_inode_t *ioc_inode, char *prefix) while (trav) { page = trav->data; - gf_proc_dump_build_key (key, prefix, - "cache-validation-waitq.page[%d].offset", - i++); + + sprintf (key, "cache-validation-waitq.page[%d].offset", i++); gf_proc_dump_write (key, "%"PRId64, page->offset); trav = trav->next; @@ -1978,21 +1866,32 @@ __ioc_inode_waitq_dump (ioc_inode_t *ioc_inode, char *prefix) void __ioc_page_dump (ioc_page_t *page, char *prefix) { - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; - ioc_page_lock (page); + int ret = -1; + + if (!page) + return; + /* ioc_page_lock can be used to hold the mutex. But in statedump + * its better to use trylock to avoid deadlocks. + */ + ret = pthread_mutex_trylock (&page->page_lock); + if (ret) + goto out; { - gf_proc_dump_build_key (key, prefix, "offset"); - gf_proc_dump_write (key, "%"PRId64, page->offset); - gf_proc_dump_build_key (key, prefix, "size"); - gf_proc_dump_write (key, "%"PRId64, page->size); - gf_proc_dump_build_key (key, prefix, "dirty"); - gf_proc_dump_write (key, "%s", page->dirty ? "yes" : "no"); - gf_proc_dump_build_key (key, prefix, "ready"); - gf_proc_dump_write (key, "%s", page->ready ? "yes" : "no"); + gf_proc_dump_write ("offset", "%"PRId64, page->offset); + gf_proc_dump_write ("size", "%"PRId64, page->size); + gf_proc_dump_write ("dirty", "%s", page->dirty ? "yes" : "no"); + gf_proc_dump_write ("ready", "%s", page->ready ? "yes" : "no"); ioc_page_waitq_dump (page, prefix); } - ioc_page_unlock (page); + pthread_mutex_unlock (&page->page_lock); + +out: + if (ret && page) + gf_proc_dump_write ("Unable to dump the page information", + "(Lock acquisition failed) %p", page); + + return; } void @@ -2002,7 +1901,6 @@ __ioc_cache_dump (ioc_inode_t *ioc_inode, char *prefix) ioc_table_t *table = NULL; ioc_page_t *page = NULL; int i = 0; - struct tm *tm = NULL; char key[GF_DUMP_MAX_BUF_LEN] = {0, }; char timestr[256] = {0, }; @@ -2012,13 +1910,15 @@ __ioc_cache_dump (ioc_inode_t *ioc_inode, char *prefix) table = ioc_inode->table; - tm = localtime (&ioc_inode->cache.tv.tv_sec); - strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm); - snprintf (timestr + strlen (timestr), 256 - strlen (timestr), - ".%"GF_PRI_SUSECONDS, ioc_inode->cache.tv.tv_usec); + if (ioc_inode->cache.tv.tv_sec) { + gf_time_fmt (timestr, sizeof timestr, + ioc_inode->cache.tv.tv_sec, gf_timefmt_FT); + snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr), + ".%"GF_PRI_SUSECONDS, ioc_inode->cache.tv.tv_usec); - gf_proc_dump_build_key (key, prefix, "last-cache-validation-time"); - gf_proc_dump_write (key, "%s", timestr); + gf_proc_dump_write ("last-cache-validation-time", "%s", + timestr); + } for (offset = 0; offset < ioc_inode->ia_size; offset += table->page_size) { @@ -2027,9 +1927,7 @@ __ioc_cache_dump (ioc_inode_t *ioc_inode, char *prefix) continue; } - gf_proc_dump_build_key (key, prefix, - "inode.cache.page[%d]", i++); - + sprintf (key, "inode.cache.page[%d]", i++); __ioc_page_dump (page, key); } out: @@ -2037,72 +1935,113 @@ out: } -void -ioc_inode_dump (ioc_inode_t *ioc_inode, char *prefix) +int +ioc_inode_dump (xlator_t *this, inode_t *inode) { - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; - char uuidbuf[256] = {0, }; - if ((ioc_inode == NULL) || (prefix == NULL)) { + char *path = NULL; + int ret = -1; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; + uint64_t tmp_ioc_inode = 0; + ioc_inode_t *ioc_inode = NULL; + gf_boolean_t section_added = _gf_false; + char uuid_str[64] = {0,}; + + if (this == NULL || inode == NULL) + goto out; + + gf_proc_dump_build_key (key_prefix, "io-cache", "inode"); + + inode_ctx_get (inode, this, &tmp_ioc_inode); + ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; + if (ioc_inode == NULL) + goto out; + + /* Similar to ioc_page_dump function its better to use + * pthread_mutex_trylock and not to use gf_log in statedump + * to avoid deadlocks. + */ + ret = pthread_mutex_trylock (&ioc_inode->inode_lock); + if (ret) goto out; - } - ioc_inode_lock (ioc_inode); { - gf_proc_dump_build_key (key, prefix, "\ninode.gfid"); - uuid_unparse (ioc_inode->inode->gfid, uuidbuf); - gf_proc_dump_write (key, "%s", uuidbuf); - gf_proc_dump_build_key (key, prefix, "inode.ino"); - gf_proc_dump_write (key, "%ld", ioc_inode->inode->ino); - gf_proc_dump_build_key (key, prefix, "inode.weight"); - gf_proc_dump_write (key, "%d", ioc_inode->weight); - __ioc_cache_dump (ioc_inode, prefix); - __ioc_inode_waitq_dump (ioc_inode, prefix); + if (uuid_is_null (ioc_inode->inode->gfid)) + goto unlock; + + gf_proc_dump_add_section (key_prefix); + section_added = _gf_true; + + __inode_path (ioc_inode->inode, NULL, &path); + + gf_proc_dump_write ("inode.weight", "%d", ioc_inode->weight); + + if (path) { + gf_proc_dump_write ("path", "%s", path); + GF_FREE (path); + } + + gf_proc_dump_write ("uuid", "%s", uuid_utoa_r + (ioc_inode->inode->gfid, uuid_str)); + __ioc_cache_dump (ioc_inode, key_prefix); + __ioc_inode_waitq_dump (ioc_inode, key_prefix); } - ioc_inode_unlock (ioc_inode); +unlock: + pthread_mutex_unlock (&ioc_inode->inode_lock); + out: - return; + if (ret && ioc_inode) { + if (section_added == _gf_false) + gf_proc_dump_add_section (key_prefix); + gf_proc_dump_write ("Unable to print the status of ioc_inode", + "(Lock acquisition failed) %s", + uuid_utoa (inode->gfid)); + } + return ret; } int ioc_priv_dump (xlator_t *this) { ioc_table_t *priv = NULL; - ioc_inode_t *ioc_inode = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; + int ret = -1; + gf_boolean_t add_section = _gf_false; if (!this || !this->private) goto out; priv = this->private; - gf_proc_dump_build_key (key_prefix, "xlator.performance.io-cache", - "priv"); + + gf_proc_dump_build_key (key_prefix, "io-cache", "priv"); gf_proc_dump_add_section (key_prefix); + add_section = _gf_true; - ioc_table_lock (priv); + ret = pthread_mutex_trylock (&priv->table_lock); + if (ret) + goto out; { - gf_proc_dump_build_key (key, key_prefix, "page_size"); - gf_proc_dump_write (key, "%ld", priv->page_size); - gf_proc_dump_build_key (key, key_prefix, "cache_size"); - gf_proc_dump_write (key, "%ld", priv->cache_size); - gf_proc_dump_build_key (key, key_prefix, "cache_used"); - gf_proc_dump_write (key, "%ld", priv->cache_used); - gf_proc_dump_build_key (key, key_prefix, "inode_count"); - gf_proc_dump_write (key, "%u", priv->inode_count); - gf_proc_dump_build_key (key, key_prefix, "cache_timeout"); - gf_proc_dump_write (key, "%u", priv->cache_timeout); - gf_proc_dump_build_key (key, key_prefix, "min-file-size"); - gf_proc_dump_write (key, "%u", priv->min_file_size); - gf_proc_dump_build_key (key, key_prefix, "max-file-size"); - gf_proc_dump_write (key, "%u", priv->max_file_size); - - list_for_each_entry (ioc_inode, &priv->inodes, inode_list) { - ioc_inode_dump (ioc_inode, key_prefix); + gf_proc_dump_write ("page_size", "%ld", priv->page_size); + gf_proc_dump_write ("cache_size", "%ld", priv->cache_size); + gf_proc_dump_write ("cache_used", "%ld", priv->cache_used); + gf_proc_dump_write ("inode_count", "%u", priv->inode_count); + gf_proc_dump_write ("cache_timeout", "%u", priv->cache_timeout); + gf_proc_dump_write ("min-file-size", "%u", priv->min_file_size); + gf_proc_dump_write ("max-file-size", "%u", priv->max_file_size); + } + pthread_mutex_unlock (&priv->table_lock); +out: + if (ret && priv) { + if (!add_section) { + gf_proc_dump_build_key (key_prefix, "xlator." + "performance.io-cache", "priv"); + gf_proc_dump_add_section (key_prefix); } + gf_proc_dump_write ("Unable to dump the state of private " + "structure of io-cache xlator", "(Lock " + "acquisition failed) %s", this->name); } - ioc_table_unlock (priv); -out: + return 0; } @@ -2115,18 +2054,33 @@ out: void fini (xlator_t *this) { - ioc_table_t *table = NULL; + ioc_table_t *table = NULL; + struct ioc_priority *curr = NULL, *tmp = NULL; + int i = 0; table = this->private; if (table == NULL) return; + this->private = NULL; + if (table->mem_pool != NULL) { mem_pool_destroy (table->mem_pool); table->mem_pool = NULL; } + list_for_each_entry_safe (curr, tmp, &table->priority_list, list) { + list_del_init (&curr->list); + GF_FREE (curr->pattern); + GF_FREE (curr); + } + + for (i = 0; i < table->max_pri; i++) { + GF_ASSERT (list_empty (&table->inode_lru[i])); + } + + GF_ASSERT (list_empty (&table->inodes)); pthread_mutex_destroy (&table->table_lock); GF_FREE (table); @@ -2144,22 +2098,28 @@ struct xlator_fops fops = { .lookup = ioc_lookup, .lk = ioc_lk, .setattr = ioc_setattr, - .mknod = ioc_mknod + .mknod = ioc_mknod, + + .readdirp = ioc_readdirp, + .discard = ioc_discard, + .zerofill = ioc_zerofill, }; struct xlator_dumpops dumpops = { .priv = ioc_priv_dump, + .inodectx = ioc_inode_dump, }; struct xlator_cbks cbks = { .forget = ioc_forget, - .release = ioc_release + .release = ioc_release, + .invalidate = ioc_invalidate, }; struct volume_options options[] = { { .key = {"priority"}, - .type = GF_OPTION_TYPE_ANY, + .type = GF_OPTION_TYPE_PRIORITY_LIST, .default_value = "", .description = "Assigns priority to filenames with specific " "patterns so that when a page needs to be ejected " @@ -2178,7 +2138,7 @@ struct volume_options options[] = { { .key = {"cache-size"}, .type = GF_OPTION_TYPE_SIZET, .min = 4 * GF_UNIT_MB, - .max = 6 * GF_UNIT_GB, + .max = 32 * GF_UNIT_GB, .default_value = "32MB", .description = "Size of the read cache." }, diff --git a/xlators/performance/io-cache/src/io-cache.h b/xlators/performance/io-cache/src/io-cache.h index 17d286623..46d758a66 100644 --- a/xlators/performance/io-cache/src/io-cache.h +++ b/xlators/performance/io-cache/src/io-cache.h @@ -1,20 +1,11 @@ /* - Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef __IO_CACHE_H @@ -84,7 +75,6 @@ struct ioc_fill { struct ioc_local { mode_t mode; int32_t flags; - int32_t wbflags; loc_t file_loc; off_t offset; size_t size; @@ -126,6 +116,8 @@ struct ioc_page { struct ioc_waitq *waitq; struct iobref *iobref; pthread_mutex_t page_lock; + int32_t op_errno; + char stale; }; struct ioc_cache { @@ -164,8 +156,8 @@ struct ioc_table { uint64_t page_size; uint64_t cache_size; uint64_t cache_used; - int64_t min_file_size; - int64_t max_file_size; + uint64_t min_file_size; + uint64_t max_file_size; struct list_head inodes; /* list of inodes cached */ struct list_head active; struct list_head *inode_lru; @@ -196,7 +188,7 @@ int32_t ioc_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, int32_t count, struct iatt *stbuf, - struct iobref *iobref); + struct iobref *iobref, dict_t *xdata); ioc_page_t * __ioc_page_get (ioc_inode_t *ioc_inode, off_t offset); @@ -212,7 +204,7 @@ __ioc_wait_on_page (ioc_page_t *page, call_frame_t *frame, off_t offset, size_t size); ioc_waitq_t * -__ioc_page_wakeup (ioc_page_t *page); +__ioc_page_wakeup (ioc_page_t *page, int32_t op_errno); void ioc_page_flush (ioc_page_t *page); @@ -228,7 +220,7 @@ ioc_waitq_return (ioc_waitq_t *waitq); int32_t ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset, - size_t size); + size_t size, int32_t op_errno); #define ioc_inode_lock(ioc_inode) \ do { \ @@ -337,6 +329,4 @@ ioc_prune (ioc_table_t *table); int32_t ioc_need_prune (ioc_table_t *table); -inline uint32_t -ioc_hashfn (void *data, int len); #endif /* __IO_CACHE_H */ diff --git a/xlators/performance/io-cache/src/ioc-inode.c b/xlators/performance/io-cache/src/ioc-inode.c index 208dbd357..86a54bb14 100644 --- a/xlators/performance/io-cache/src/ioc-inode.c +++ b/xlators/performance/io-cache/src/ioc-inode.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef _CONFIG_H @@ -123,7 +114,8 @@ ioc_inode_wakeup (call_frame_t *frame, ioc_inode_t *ioc_inode, ioc_inode_lock (ioc_inode); { page_waitq = - __ioc_page_wakeup (waiter_page); + __ioc_page_wakeup (waiter_page, + waiter_page->op_errno); } ioc_inode_unlock (ioc_inode); if (page_waitq) diff --git a/xlators/performance/io-cache/src/ioc-mem-types.h b/xlators/performance/io-cache/src/ioc-mem-types.h index 08596d5f4..9b68f9fce 100644 --- a/xlators/performance/io-cache/src/ioc-mem-types.h +++ b/xlators/performance/io-cache/src/ioc-mem-types.h @@ -1,20 +1,11 @@ /* - Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef __IOC_MT_H__ @@ -26,7 +17,6 @@ enum gf_ioc_mem_types_ { gf_ioc_mt_iovec = gf_common_mt_end + 1, gf_ioc_mt_ioc_table_t, gf_ioc_mt_char, - gf_ioc_mt_ioc_local_t, gf_ioc_mt_ioc_waitq_t, gf_ioc_mt_ioc_priority, gf_ioc_mt_list_head, diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c index 895775d93..b2e20ba65 100644 --- a/xlators/performance/io-cache/src/page.c +++ b/xlators/performance/io-cache/src/page.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2007-2010 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU Affero General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Affero General Public License for more details. - - You should have received a copy of the GNU Affero General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef _CONFIG_H @@ -111,6 +102,7 @@ __ioc_page_destroy (ioc_page_t *page) if (page->waitq) { /* frames waiting on this page, do not destroy this page */ page_size = -1; + page->stale = 1; } else { rbthash_remove (page->inode->cache.page_table, &page->offset, sizeof (page->offset)); @@ -416,7 +408,8 @@ ioc_waitq_return (ioc_waitq_t *waitq) int ioc_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct iatt *stbuf, struct iobref *iobref) + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata) { ioc_local_t *local = NULL; off_t offset = 0; @@ -517,6 +510,7 @@ ioc_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this, * byte replies */ page_size = iov_length(vector, count); page->size = page_size; + page->op_errno = op_errno; iobref_page_size = iobref_size (page->iobref); @@ -524,7 +518,8 @@ ioc_fault_cbk (call_frame_t *frame, void *cookie, xlator_t *this, /* wake up all the frames waiting on * this page, including * the frame which triggered fault */ - waitq = __ioc_page_wakeup (page); + waitq = __ioc_page_wakeup (page, + op_errno); } /* if(page->waitq) */ } /* if(!page)...else */ } /* if(op_ret < 0)...else */ @@ -602,8 +597,7 @@ ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd, goto err; } - fault_local = GF_CALLOC (1, sizeof (ioc_local_t), - gf_ioc_mt_ioc_local_t); + fault_local = mem_get0 (THIS->local_pool); if (fault_local == NULL) { op_ret = -1; op_errno = ENOMEM; @@ -630,7 +624,7 @@ ioc_page_fault (ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd, STACK_WIND (fault_frame, ioc_fault_cbk, FIRST_CHILD(fault_frame->this), FIRST_CHILD(fault_frame->this)->fops->readv, fd, - table->page_size, offset); + table->page_size, offset, 0, NULL); return; err: @@ -651,7 +645,7 @@ err: int32_t __ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset, - size_t size) + size_t size, int32_t op_errno) { ioc_local_t *local = NULL; ioc_fill_t *fill = NULL; @@ -686,7 +680,13 @@ __ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset, /* immediately move this page to the end of the page_lru list */ list_move_tail (&page->page_lru, &ioc_inode->cache.page_lru); /* fill local->pending_size bytes from local->pending_offset */ - if (local->op_ret != -1 && page->size) { + if (local->op_ret != -1) { + local->op_errno = op_errno; + + if (page->size == 0) { + goto done; + } + if (offset > page->offset) /* offset is offset in file, convert it to offset in * page */ @@ -779,6 +779,7 @@ __ioc_frame_fill (ioc_page_t *page, call_frame_t *frame, off_t offset, local->op_ret += copy_size; } +done: ret = 0; out: return ret; @@ -803,7 +804,7 @@ ioc_frame_unwind (call_frame_t *frame) int32_t copied = 0; struct iobref *iobref = NULL; struct iatt stbuf = {0,}; - int32_t op_ret = 0; + int32_t op_ret = 0, op_errno = 0; GF_ASSERT (frame); @@ -812,16 +813,21 @@ ioc_frame_unwind (call_frame_t *frame) gf_log (frame->this->name, GF_LOG_WARNING, "local is NULL"); op_ret = -1; - local->op_errno = ENOMEM; + op_errno = ENOMEM; + goto unwind; + } + + if (local->op_ret < 0) { + op_ret = local->op_ret; + op_errno = local->op_errno; goto unwind; } // ioc_local_lock (local); - frame->local = NULL; iobref = iobref_new (); if (iobref == NULL) { op_ret = -1; - local->op_errno = ENOMEM; + op_errno = ENOMEM; } if (list_empty (&local->fill_list)) { @@ -838,7 +844,7 @@ ioc_frame_unwind (call_frame_t *frame) vector = GF_CALLOC (count, sizeof (*vector), gf_ioc_mt_iovec); if (vector == NULL) { op_ret = -1; - local->op_errno = ENOMEM; + op_errno = ENOMEM; } list_for_each_entry_safe (fill, next, &local->fill_list, list) { @@ -868,8 +874,9 @@ unwind: // ioc_local_unlock (local); - STACK_UNWIND_STRICT (readv, frame, op_ret, local->op_errno, vector, - count, &stbuf, iobref); + frame->local = NULL; + STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, + count, &stbuf, iobref, NULL); if (iobref != NULL) { iobref_unref (iobref); @@ -881,7 +888,8 @@ unwind: } pthread_mutex_destroy (&local->local_lock); - GF_FREE (local); + if (local) + mem_put (local); return; } @@ -923,7 +931,7 @@ ioc_frame_return (call_frame_t *frame) * to be called only when a frame is waiting on an in-transit page */ ioc_waitq_t * -__ioc_page_wakeup (ioc_page_t *page) +__ioc_page_wakeup (ioc_page_t *page, int32_t op_errno) { ioc_waitq_t *waitq = NULL, *trav = NULL; call_frame_t *frame = NULL; @@ -942,12 +950,16 @@ __ioc_page_wakeup (ioc_page_t *page) for (trav = waitq; trav; trav = trav->next) { frame = trav->data; ret = __ioc_frame_fill (page, frame, trav->pending_offset, - trav->pending_size); + trav->pending_size, op_errno); if (ret == -1) { break; } } + if (page->stale) { + __ioc_page_destroy (page); + } + out: return waitq; } |
