diff options
Diffstat (limited to 'xlators/performance/io-cache')
| -rw-r--r-- | xlators/performance/io-cache/src/Makefile.am | 11 | ||||
| -rw-r--r-- | xlators/performance/io-cache/src/io-cache-messages.h | 69 | ||||
| -rw-r--r-- | xlators/performance/io-cache/src/io-cache.c | 3109 | ||||
| -rw-r--r-- | xlators/performance/io-cache/src/io-cache.h | 446 | ||||
| -rw-r--r-- | xlators/performance/io-cache/src/ioc-inode.c | 332 | ||||
| -rw-r--r-- | xlators/performance/io-cache/src/ioc-mem-types.h | 29 | ||||
| -rw-r--r-- | xlators/performance/io-cache/src/page.c | 1523 |
7 files changed, 3314 insertions, 2205 deletions
diff --git a/xlators/performance/io-cache/src/Makefile.am b/xlators/performance/io-cache/src/Makefile.am index b1bf5bfbf71..bfa34ce5502 100644 --- a/xlators/performance/io-cache/src/Makefile.am +++ b/xlators/performance/io-cache/src/Makefile.am @@ -1,14 +1,17 @@ xlator_LTLIBRARIES = io-cache.la xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance -io_cache_la_LDFLAGS = -module -avoidversion +io_cache_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) io_cache_la_SOURCES = io-cache.c page.c ioc-inode.c io_cache_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la -noinst_HEADERS = io-cache.h +noinst_HEADERS = io-cache.h ioc-mem-types.h io-cache-messages.h -AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \ - -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ + -I$(CONTRIBDIR)/rbtree + +AM_CFLAGS = -Wall $(GF_CFLAGS) CLEANFILES = diff --git a/xlators/performance/io-cache/src/io-cache-messages.h b/xlators/performance/io-cache/src/io-cache-messages.h new file mode 100644 index 00000000000..38ad0b14d0e --- /dev/null +++ b/xlators/performance/io-cache/src/io-cache-messages.h @@ -0,0 +1,69 @@ +/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _IO_CACHE_MESSAGES_H_ +#define _IO_CACHE_MESSAGES_H_ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(IO_CACHE, IO_CACHE_MSG_ENFORCEMENT_FAILED, + IO_CACHE_MSG_INVALID_ARGUMENT, + IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED, IO_CACHE_MSG_NO_MEMORY, + IO_CACHE_MSG_VOL_MISCONFIGURED, IO_CACHE_MSG_INODE_NULL, + IO_CACHE_MSG_PAGE_WAIT_VALIDATE, IO_CACHE_MSG_STR_COVERSION_FAILED, + IO_CACHE_MSG_WASTED_COPY, IO_CACHE_MSG_SET_FD_FAILED, + IO_CACHE_MSG_TABLE_NULL, IO_CACHE_MSG_MEMORY_INIT_FAILED, + IO_CACHE_MSG_NO_CACHE_SIZE_OPT, IO_CACHE_MSG_NOT_RECONFIG_CACHE_SIZE, + IO_CACHE_MSG_CREATE_MEM_POOL_FAILED, + IO_CACHE_MSG_ALLOC_MEM_POOL_FAILED, IO_CACHE_MSG_NULL_PAGE_WAIT, + IO_CACHE_MSG_FRAME_NULL, IO_CACHE_MSG_PAGE_FAULT, + IO_CACHE_MSG_SERVE_READ_REQUEST, IO_CACHE_MSG_LOCAL_NULL, + IO_CACHE_MSG_DEFAULTING_TO_OLD); + +#define IO_CACHE_MSG_NO_MEMORY_STR "out of memory" +#define IO_CACHE_MSG_ENFORCEMENT_FAILED_STR "inode context is NULL" +#define IO_CACHE_MSG_SET_FD_FAILED_STR "failed to set fd ctx" +#define IO_CACHE_MSG_TABLE_NULL_STR "table is NULL" +#define IO_CACHE_MSG_MEMORY_INIT_FAILED_STR "Memory accounting init failed" +#define IO_CACHE_MSG_NO_CACHE_SIZE_OPT_STR "could not get cache-size option" +#define IO_CACHE_MSG_INVALID_ARGUMENT_STR \ + "file size is greater than the max size" +#define IO_CACHE_MSG_NOT_RECONFIG_CACHE_SIZE_STR "Not reconfiguring cache-size" +#define IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED_STR \ + "FATAL: io-cache not configured with exactly one child" +#define IO_CACHE_MSG_VOL_MISCONFIGURED_STR "dangling volume. check volfile" +#define IO_CACHE_MSG_CREATE_MEM_POOL_FAILED_STR \ + "failed to create local_t's memory pool" +#define IO_CACHE_MSG_ALLOC_MEM_POOL_FAILED_STR "Unable to allocate mem_pool" +#define IO_CACHE_MSG_STR_COVERSION_FAILED_STR \ + "asprintf failed while converting prt to str" +#define IO_CACHE_MSG_INODE_NULL_STR "ioc_inode is NULL" +#define IO_CACHE_MSG_PAGE_WAIT_VALIDATE_STR \ + "cache validate called without any page waiting to be validated" +#define IO_CACHE_MSG_NULL_PAGE_WAIT_STR "asked to wait on a NULL page" +#define IO_CACHE_MSG_WASTED_COPY_STR "wasted copy" +#define IO_CACHE_MSG_FRAME_NULL_STR "frame>root>rsp_refs is null" +#define IO_CACHE_MSG_PAGE_FAULT_STR "page fault on a NULL frame" +#define IO_CACHE_MSG_SERVE_READ_REQUEST_STR \ + "NULL page has been provided to serve read request" +#define IO_CACHE_MSG_LOCAL_NULL_STR "local is NULL" +#define IO_CACHE_MSG_DEFAULTING_TO_OLD_STR \ + "minimum size of file that can be cached is greater than maximum size. " \ + "Hence Defaulting to old value" +#endif /* _IO_CACHE_MESSAGES_H_ */ diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c index 4d5ffe29298..9375d29c17f 100644 --- a/xlators/performance/io-cache/src/io-cache.c +++ b/xlators/performance/io-cache/src/io-cache.c @@ -1,406 +1,358 @@ /* - Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" +#include <math.h> +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> #include "io-cache.h" +#include "ioc-mem-types.h" +#include <glusterfs/statedump.h> #include <assert.h> #include <sys/time.h> +#include "io-cache-messages.h" +int ioc_log2_page_size; -static uint32_t -ioc_get_priority (ioc_table_t *table, - const char *path); +uint32_t +ioc_get_priority(ioc_table_t *table, const char *path); + +struct volume_options options[]; static uint32_t -ioc_get_priority (ioc_table_t *table, - const char *path); +ioc_hashfn(void *data, int len) +{ + off_t offset; + + offset = *(off_t *)data; -static inline ioc_inode_t * + return (offset >> ioc_log2_page_size); +} + +/* TODO: This function is not used, uncomment when we find a + usage for this function. + +static ioc_inode_t * ioc_inode_reupdate (ioc_inode_t *ioc_inode) { - ioc_table_t *table = ioc_inode->table; + ioc_table_t *table = NULL; + + table = ioc_inode->table; - list_add_tail (&ioc_inode->inode_lru, - &table->inode_lru[ioc_inode->weight]); - - return ioc_inode; + list_add_tail (&ioc_inode->inode_lru, + &table->inode_lru[ioc_inode->weight]); + + return ioc_inode; } -static inline ioc_inode_t * -ioc_get_inode (dict_t *dict, - char *name) + +static ioc_inode_t * +ioc_get_inode (dict_t *dict, char *name) { - ioc_inode_t *ioc_inode = NULL; - data_t *ioc_inode_data = dict_get (dict, name); - ioc_table_t *table = NULL; - - if (ioc_inode_data) { - ioc_inode = data_to_ptr (ioc_inode_data); - table = ioc_inode->table; - - ioc_table_lock (table); - { - if (list_empty (&ioc_inode->inode_lru)) { - ioc_inode = ioc_inode_reupdate (ioc_inode); - } - } - ioc_table_unlock (table); - } - - return ioc_inode; + ioc_inode_t *ioc_inode = NULL; + data_t *ioc_inode_data = NULL; + ioc_table_t *table = NULL; + + ioc_inode_data = dict_get (dict, name); + if (ioc_inode_data) { + ioc_inode = data_to_ptr (ioc_inode_data); + table = ioc_inode->table; + + ioc_table_lock (table); + { + if (list_empty (&ioc_inode->inode_lru)) { + ioc_inode = ioc_inode_reupdate (ioc_inode); + } + } + ioc_table_unlock (table); + } + + return ioc_inode; } +*/ -int32_t -ioc_inode_need_revalidate (ioc_inode_t *ioc_inode) +int +ioc_update_pages(call_frame_t *frame, ioc_inode_t *ioc_inode, + struct iovec *vector, int32_t count, int op_ret, off_t offset) { - int8_t need_revalidate = 0; - struct timeval tv = {0,}; - int32_t ret = -1; - ioc_table_t *table = ioc_inode->table; + size_t size = 0; + off_t rounded_offset = 0, rounded_end = 0, trav_offset = 0, + write_offset = 0; + off_t page_offset = 0, page_end = 0; + ioc_page_t *trav = NULL; + + size = iov_length(vector, count); + size = min(size, op_ret); + + rounded_offset = gf_floor(offset, ioc_inode->table->page_size); + rounded_end = gf_roof(offset + size, ioc_inode->table->page_size); + + trav_offset = rounded_offset; + ioc_inode_lock(ioc_inode); + { + while (trav_offset < rounded_end) { + trav = __ioc_page_get(ioc_inode, trav_offset); + if (trav && trav->ready) { + if (trav_offset == rounded_offset) + page_offset = offset - rounded_offset; + else + page_offset = 0; + + if ((trav_offset + ioc_inode->table->page_size) >= + rounded_end) { + page_end = trav->size - (rounded_end - (offset + size)); + } else { + page_end = trav->size; + } + + iov_range_copy(trav->vector, trav->count, page_offset, vector, + count, write_offset, page_end - page_offset); + } else if (trav) { + if (!trav->waitq) + ioc_inode->table->cache_used -= __ioc_page_destroy(trav); + } + + if (trav_offset == rounded_offset) + write_offset += (ioc_inode->table->page_size - + (offset - rounded_offset)); + else + write_offset += ioc_inode->table->page_size; + + trav_offset += ioc_inode->table->page_size; + } + } + ioc_inode_unlock(ioc_inode); + + return 0; +} - ret = gettimeofday (&tv, NULL); +static gf_boolean_t +ioc_inode_need_revalidate(ioc_inode_t *ioc_inode) +{ + ioc_table_t *table = NULL; - if (time_elapsed (&tv, &ioc_inode->tv) >= table->cache_timeout) - need_revalidate = 1; + GF_ASSERT(ioc_inode); + table = ioc_inode->table; + GF_ASSERT(table); - return need_revalidate; + return (gf_time() - ioc_inode->cache.last_revalidate >= + table->cache_timeout); } /* * __ioc_inode_flush - flush all the cached pages of the given inode * - * @ioc_inode: + * @ioc_inode: * * assumes lock is held */ -int32_t -__ioc_inode_flush (ioc_inode_t *ioc_inode) +int64_t +__ioc_inode_flush(ioc_inode_t *ioc_inode) { - ioc_page_t *curr = NULL, *next = NULL; - int32_t destroy_size = 0; - int32_t ret = 0; - - list_for_each_entry_safe (curr, next, &ioc_inode->pages, pages) { - ret = ioc_page_destroy (curr); - - if (ret != -1) - destroy_size += ret; - } - - return destroy_size; + ioc_page_t *curr = NULL, *next = NULL; + int64_t destroy_size = 0; + int64_t ret = 0; + + list_for_each_entry_safe(curr, next, &ioc_inode->cache.page_lru, page_lru) + { + ret = __ioc_page_destroy(curr); + + if (ret != -1) + destroy_size += ret; + } + + return destroy_size; } void -ioc_inode_flush (ioc_inode_t *ioc_inode) +ioc_inode_flush(ioc_inode_t *ioc_inode) { - int32_t destroy_size = 0; - - ioc_inode_lock (ioc_inode); - { - destroy_size = __ioc_inode_flush (ioc_inode); - } - ioc_inode_unlock (ioc_inode); - - if (destroy_size) { - ioc_table_lock (ioc_inode->table); - { - ioc_inode->table->cache_used -= destroy_size; - } - ioc_table_unlock (ioc_inode->table); - } - - return; + int64_t destroy_size = 0; + + ioc_inode_lock(ioc_inode); + { + destroy_size = __ioc_inode_flush(ioc_inode); + } + ioc_inode_unlock(ioc_inode); + + if (destroy_size) { + ioc_table_lock(ioc_inode->table); + { + ioc_inode->table->cache_used -= destroy_size; + } + ioc_table_unlock(ioc_inode->table); + } + + return; } -/* - * ioc_utimens_cbk - - * - * @frame: - * @cookie: - * @this: - * @op_ret: - * @op_errno: - * @stbuf: - * - */ int32_t -ioc_utimens_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct stat *stbuf) +ioc_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preop, + struct iatt *postop, dict_t *xdata) { - STACK_UNWIND (frame, op_ret, op_errno, stbuf); - return 0; + STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, preop, postop, xdata); + return 0; } -/* - * ioc_utimens - - * - * @frame: - * @this: - * @loc: - * @tv: - * - */ int32_t -ioc_utimens (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - struct timespec *tv) +ioc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, + int32_t valid, dict_t *xdata) { - uint64_t ioc_inode = 0; - inode_ctx_get (loc->inode, this, &ioc_inode); + uint64_t ioc_inode = 0; + + inode_ctx_get(loc->inode, this, &ioc_inode); - if (ioc_inode) - ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + if (ioc_inode && + ((valid & GF_SET_ATTR_ATIME) || (valid & GF_SET_ATTR_MTIME))) + ioc_inode_flush((ioc_inode_t *)(long)ioc_inode); - STACK_WIND (frame, ioc_utimens_cbk, - FIRST_CHILD (this), - FIRST_CHILD (this)->fops->utimens, - loc, tv); - return 0; + STACK_WIND(frame, ioc_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata); + + return 0; } int32_t -ioc_lookup_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - inode_t *inode, - struct stat *stbuf, - dict_t *dict) +ioc_inode_update(xlator_t *this, inode_t *inode, char *path, struct iatt *iabuf) { - ioc_inode_t *ioc_inode = NULL; - ioc_local_t *local = frame->local; - ioc_table_t *table = this->private; - ioc_page_t *page = NULL; - data_t *page_data = NULL; - data_t *content_data = NULL; - char *src = NULL; - char *dst = NULL; - char need_unref = 0; - uint8_t cache_still_valid = 0; - uint32_t weight = 0; - uint64_t tmp_ioc_inode = 0; - char *buf = NULL; - char *tmp = NULL; - int i; - - if (op_ret != 0) - goto out; - - inode_ctx_get (inode, this, &tmp_ioc_inode); - ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; - if (ioc_inode) { - cache_still_valid = ioc_cache_still_valid (ioc_inode, - stbuf); - - if (!cache_still_valid) { - ioc_inode_flush (ioc_inode); - } - /* update the time-stamp of revalidation */ - ioc_inode_lock (ioc_inode); - { - gettimeofday (&ioc_inode->tv, NULL); - } - ioc_inode_unlock (ioc_inode); - - ioc_table_lock (ioc_inode->table); - { - list_move_tail (&ioc_inode->inode_lru, - &table->inode_lru[ioc_inode->weight]); - } - ioc_table_unlock (ioc_inode->table); - } - - if (local && stbuf->st_size && - local->need_xattr >= stbuf->st_size) { - if (!ioc_inode) { - weight = ioc_get_priority (table, - local->file_loc.path); - ioc_inode = ioc_inode_update (table, - inode, weight); - inode_ctx_put (inode, this, - (uint64_t)(long)ioc_inode); - } - - ioc_inode_lock (ioc_inode); - { - content_data = dict_get (dict, "glusterfs.content"); - page = ioc_page_get (ioc_inode, 0); - - if (content_data) { - if (page) { - dict_unref (page->ref); - free (page->vector); - page->vector = NULL; - - ioc_table_lock (table); - { - table->cache_used -= - page->size; - } - ioc_table_unlock (table); - } else { - page = ioc_page_create (ioc_inode, 0); - } - - dst = CALLOC (1, stbuf->st_size); - page->ref = dict_ref (get_new_dict ()); - page_data = data_from_dynptr (dst, - stbuf->st_size); - dict_set (page->ref, NULL, page_data); - - src = data_to_ptr (content_data); - memcpy (dst, src, stbuf->st_size); - - page->vector = CALLOC (1, - sizeof (*page->vector)); - page->vector->iov_base = dst; - page->vector->iov_len = stbuf->st_size; - page->count = 1; - - page->waitq = NULL; - page->size = stbuf->st_size; - page->ready = 1; - - ioc_table_lock (table); - { - table->cache_used += page->size; - } - ioc_table_unlock (table); - - } else { - if (!(page && page->ready)) { - gf_log (this->name, GF_LOG_DEBUG, - "page not present"); - - ioc_inode_unlock (ioc_inode); - STACK_WIND (frame, - ioc_lookup_cbk, - FIRST_CHILD (this), - FIRST_CHILD (this)->fops->lookup, - &local->file_loc, - local->xattr_req); - return 0; - } - buf = CALLOC (1, stbuf->st_size); - tmp = buf; - - for (i = 0; i < page->count; i++) { - memcpy (tmp, page->vector[i].iov_base, - page->vector[i].iov_len); - tmp += page->vector[i].iov_len; - } - - gf_log (this->name, GF_LOG_DEBUG, - "serving file %s from cache", - local->file_loc.path); - - if (!dict) { - need_unref = 1; - dict = dict_ref ( - get_new_dict ()); - } - dict_set (dict, "glusterfs.content", - data_from_dynptr (buf, - stbuf->st_size)); - } - - ioc_inode->mtime = stbuf->st_mtime; - gettimeofday (&ioc_inode->tv, NULL); - } - ioc_inode_unlock (ioc_inode); - - if (content_data && - ioc_need_prune (ioc_inode->table)) { - ioc_prune (ioc_inode->table); - } - } - - out: - STACK_UNWIND (frame, op_ret, op_errno, inode, stbuf, dict); - - if (need_unref) { - dict_unref (dict); - } - - return 0; + ioc_table_t *table = NULL; + uint64_t tmp_ioc_inode = 0; + ioc_inode_t *ioc_inode = NULL; + uint32_t weight = 0xffffffff; + gf_boolean_t cache_still_valid = _gf_false; + + if (!this || !inode) + goto out; + + table = this->private; + + LOCK(&inode->lock); + { + (void)__inode_ctx_get(inode, this, &tmp_ioc_inode); + ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; + + if (!ioc_inode) { + weight = ioc_get_priority(table, path); + + ioc_inode = ioc_inode_create(table, inode, weight); + + (void)__inode_ctx_put(inode, this, (uint64_t)(long)ioc_inode); + } + } + UNLOCK(&inode->lock); + + ioc_inode_lock(ioc_inode); + { + if (ioc_inode->cache.mtime == 0) { + ioc_inode->cache.mtime = iabuf->ia_mtime; + ioc_inode->cache.mtime_nsec = iabuf->ia_mtime_nsec; + } + + ioc_inode->ia_size = iabuf->ia_size; + } + ioc_inode_unlock(ioc_inode); + + cache_still_valid = ioc_cache_still_valid(ioc_inode, iabuf); + + if (!cache_still_valid) { + ioc_inode_flush(ioc_inode); + } + + ioc_table_lock(ioc_inode->table); + { + list_move_tail(&ioc_inode->inode_lru, + &table->inode_lru[ioc_inode->weight]); + } + ioc_table_unlock(ioc_inode->table); + +out: + return 0; } -int32_t -ioc_lookup (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - dict_t *xattr_req) +int32_t +ioc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *stbuf, dict_t *xdata, struct iatt *postparent) { - uint64_t content_limit = 0; - - if (GF_FILE_CONTENT_REQUESTED(xattr_req, &content_limit)) { - uint64_t tmp_ioc_inode = 0; - ioc_inode_t *ioc_inode = NULL; - ioc_page_t *page = NULL; - ioc_local_t *local = CALLOC (1, sizeof (*local)); - - local->need_xattr = content_limit; - local->file_loc.path = loc->path; - local->file_loc.inode = loc->inode; - frame->local = local; - - inode_ctx_get (loc->inode, this, &tmp_ioc_inode); - ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; - - if (ioc_inode) { - ioc_inode_lock (ioc_inode); - { - page = ioc_page_get (ioc_inode, 0); - if ((content_limit <= - ioc_inode->table->page_size) && - page && page->ready) { - local->need_xattr = -1; - } - } - ioc_inode_unlock (ioc_inode); - } - } - - STACK_WIND (frame, - ioc_lookup_cbk, - FIRST_CHILD (this), - FIRST_CHILD (this)->fops->lookup, - loc, - xattr_req); - return 0; + ioc_local_t *local = NULL; + + if (op_ret != 0) + goto out; + + local = frame->local; + if (local == NULL) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + if (!this || !this->private) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + ioc_inode_update(this, inode, (char *)local->file_loc.path, stbuf); + +out: + if (frame->local != NULL) { + local = frame->local; + loc_wipe(&local->file_loc); + } + + STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xdata, + postparent); + return 0; +} + +int32_t +ioc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + ioc_local_t *local = NULL; + int32_t op_errno = -1, ret = -1; + + local = mem_get0(this->local_pool); + if (local == NULL) { + op_errno = ENOMEM; + gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL); + goto unwind; + } + + ret = loc_copy(&local->file_loc, loc); + if (ret != 0) { + op_errno = ENOMEM; + gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL); + goto unwind; + } + + frame->local = local; + + STACK_WIND(frame, ioc_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + + return 0; + +unwind: + if (local != NULL) { + loc_wipe(&local->file_loc); + mem_put(local); + } + + STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); + + return 0; } /* - * ioc_forget - + * ioc_forget - * * @frame: * @this: @@ -408,22 +360,33 @@ ioc_lookup (call_frame_t *frame, * */ int32_t -ioc_forget (xlator_t *this, - inode_t *inode) +ioc_forget(xlator_t *this, inode_t *inode) { - uint64_t ioc_inode = 0; + uint64_t ioc_inode = 0; + + inode_ctx_get(inode, this, &ioc_inode); - inode_ctx_get (inode, this, &ioc_inode); + if (ioc_inode) + ioc_inode_destroy((ioc_inode_t *)(long)ioc_inode); - if (ioc_inode) - ioc_inode_destroy ((ioc_inode_t *)(long)ioc_inode); - - return 0; + return 0; } +static int32_t +ioc_invalidate(xlator_t *this, inode_t *inode) +{ + uint64_t ioc_inode = 0; + + inode_ctx_get(inode, this, &ioc_inode); -/* - * ioc_cache_validate_cbk - + if (ioc_inode) + ioc_inode_flush((ioc_inode_t *)(uintptr_t)ioc_inode); + + return 0; +} + +/* + * ioc_cache_validate_cbk - * * @frame: * @cookie: @@ -434,94 +397,103 @@ ioc_forget (xlator_t *this, * */ int32_t -ioc_cache_validate_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct stat *stbuf) +ioc_cache_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + dict_t *xdata) { - ioc_local_t *local = frame->local; - ioc_inode_t *ioc_inode = NULL; - size_t destroy_size = 0; - struct stat *local_stbuf = stbuf; - - ioc_inode = local->inode; - - if ((op_ret == -1) || - ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) { - gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG, - "cache for inode(%p) is invalid. flushing all pages", - ioc_inode); - /* NOTE: only pages with no waiting frames are flushed by - * ioc_inode_flush. page_fault will be generated for all - * the pages which have waiting frames by ioc_inode_wakeup() - */ - ioc_inode_lock (ioc_inode); - { - destroy_size = __ioc_inode_flush (ioc_inode); - if (op_ret >= 0) - ioc_inode->mtime = stbuf->st_mtime; - } - ioc_inode_unlock (ioc_inode); - local_stbuf = NULL; - } - - if (destroy_size) { - ioc_table_lock (ioc_inode->table); - { - ioc_inode->table->cache_used -= destroy_size; - } - ioc_table_unlock (ioc_inode->table); - } - - if (op_ret < 0) - local_stbuf = NULL; - - ioc_inode_lock (ioc_inode); - { - gettimeofday (&ioc_inode->tv, NULL); - } - ioc_inode_unlock (ioc_inode); - - ioc_inode_wakeup (frame, ioc_inode, local_stbuf); - - /* any page-fault initiated by ioc_inode_wakeup() will have its own - * fd_ref on fd, safe to unref validate frame's private copy - */ - fd_unref (local->fd); - - STACK_DESTROY (frame->root); - - return 0; + ioc_local_t *local = NULL; + ioc_inode_t *ioc_inode = NULL; + size_t destroy_size = 0; + struct iatt *local_stbuf = NULL; + + local = frame->local; + ioc_inode = local->inode; + local_stbuf = stbuf; + + if ((op_ret == -1) || + ((op_ret >= 0) && !ioc_cache_still_valid(ioc_inode, stbuf))) { + gf_msg_debug(ioc_inode->table->xl->name, 0, + "cache for inode(%p) is invalid. flushing all pages", + ioc_inode); + /* NOTE: only pages with no waiting frames are flushed by + * ioc_inode_flush. page_fault will be generated for all + * the pages which have waiting frames by ioc_inode_wakeup() + */ + ioc_inode_lock(ioc_inode); + { + destroy_size = __ioc_inode_flush(ioc_inode); + if (op_ret >= 0) { + ioc_inode->cache.mtime = stbuf->ia_mtime; + ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec; + } + } + ioc_inode_unlock(ioc_inode); + local_stbuf = NULL; + } + + if (destroy_size) { + ioc_table_lock(ioc_inode->table); + { + ioc_inode->table->cache_used -= destroy_size; + } + ioc_table_unlock(ioc_inode->table); + } + + if (op_ret < 0) + local_stbuf = NULL; + + ioc_inode_lock(ioc_inode); + { + ioc_inode->cache.last_revalidate = gf_time(); + } + ioc_inode_unlock(ioc_inode); + + ioc_inode_wakeup(frame, ioc_inode, local_stbuf); + + /* any page-fault initiated by ioc_inode_wakeup() will have its own + * fd_ref on fd, safe to unref validate frame's private copy + */ + fd_unref(local->fd); + dict_unref(local->xattr_req); + + STACK_DESTROY(frame->root); + + return 0; } -static int32_t -ioc_wait_on_inode (ioc_inode_t *ioc_inode, - ioc_page_t *page) +int32_t +ioc_wait_on_inode(ioc_inode_t *ioc_inode, ioc_page_t *page) { - ioc_waitq_t *waiter = NULL, *trav = NULL; - uint32_t page_found = 0; - - trav = ioc_inode->waitq; - - while (trav) { - if (trav->data == page) { - page_found = 1; - break; - } - trav = trav->next; - } - - if (!page_found) { - waiter = CALLOC (1, sizeof (ioc_waitq_t)); - ERR_ABORT (waiter); - waiter->data = page; - waiter->next = ioc_inode->waitq; - ioc_inode->waitq = waiter; - } - - return 0; + ioc_waitq_t *waiter = NULL, *trav = NULL; + uint32_t page_found = 0; + int32_t ret = 0; + + trav = ioc_inode->waitq; + + while (trav) { + if (trav->data == page) { + page_found = 1; + break; + } + trav = trav->next; + } + + if (!page_found) { + waiter = GF_CALLOC(1, sizeof(ioc_waitq_t), gf_ioc_mt_ioc_waitq_t); + if (waiter == NULL) { + gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, ENOMEM, + IO_CACHE_MSG_NO_MEMORY, NULL); + ret = -ENOMEM; + goto out; + } + + waiter->data = page; + waiter->next = ioc_inode->waitq; + ioc_inode->waitq = waiter; + } + +out: + return ret; } /* @@ -532,61 +504,81 @@ ioc_wait_on_inode (ioc_inode_t *ioc_inode, * @fd: * */ -static int32_t -ioc_cache_validate (call_frame_t *frame, - ioc_inode_t *ioc_inode, - fd_t *fd, - ioc_page_t *page) +int32_t +ioc_cache_validate(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, + ioc_page_t *page) { - call_frame_t *validate_frame = NULL; - ioc_local_t *validate_local = NULL; - - validate_local = CALLOC (1, sizeof (ioc_local_t)); - ERR_ABORT (validate_local); - validate_frame = copy_frame (frame); - validate_local->fd = fd_ref (fd); - validate_local->inode = ioc_inode; - validate_frame->local = validate_local; - - STACK_WIND (validate_frame, - ioc_cache_validate_cbk, - FIRST_CHILD (frame->this), - FIRST_CHILD (frame->this)->fops->fstat, - fd); - - return 0; + call_frame_t *validate_frame = NULL; + ioc_local_t *validate_local = NULL; + ioc_local_t *local = NULL; + int32_t ret = 0; + + local = frame->local; + validate_local = mem_get0(THIS->local_pool); + if (validate_local == NULL) { + ret = -1; + local->op_ret = -1; + local->op_errno = ENOMEM; + gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0, + IO_CACHE_MSG_NO_MEMORY, NULL); + goto out; + } + + validate_frame = copy_frame(frame); + if (validate_frame == NULL) { + ret = -1; + local->op_ret = -1; + local->op_errno = ENOMEM; + mem_put(validate_local); + gf_smsg(ioc_inode->table->xl->name, GF_LOG_ERROR, 0, + IO_CACHE_MSG_NO_MEMORY, NULL); + goto out; + } + + validate_local->fd = fd_ref(fd); + validate_local->inode = ioc_inode; + if (local && local->xattr_req) + validate_local->xattr_req = dict_ref(local->xattr_req); + validate_frame->local = validate_local; + + STACK_WIND(validate_frame, ioc_cache_validate_cbk, FIRST_CHILD(frame->this), + FIRST_CHILD(frame->this)->fops->fstat, fd, + validate_local->xattr_req); + +out: + return ret; } -static inline uint32_t -is_match (const char *path, - const char *pattern) +static uint32_t +is_match(const char *path, const char *pattern) { - char *pathname = strdup (path); - int32_t ret = 0; - - ret = fnmatch (pattern, path, FNM_NOESCAPE); - - free (pathname); - - return (ret == 0); + int32_t ret = 0; + + ret = fnmatch(pattern, path, FNM_NOESCAPE); + + return (ret == 0); } -static uint32_t -ioc_get_priority (ioc_table_t *table, - const char *path) +uint32_t +ioc_get_priority(ioc_table_t *table, const char *path) { - uint32_t priority = 0; - struct ioc_priority *curr = NULL; - - list_for_each_entry (curr, &table->priority_list, list) { - if (is_match (path, curr->pattern)) - priority = curr->priority; - } - - return priority; + uint32_t priority = 1; + struct ioc_priority *curr = NULL; + + if (list_empty(&table->priority_list) || !path) + return priority; + + priority = 0; + list_for_each_entry(curr, &table->priority_list, list) + { + if (is_match(path, curr->pattern)) + priority = curr->priority; + } + + return priority; } -/* +/* * ioc_open_cbk - open callback for io cache * * @frame: call frame @@ -598,71 +590,68 @@ ioc_get_priority (ioc_table_t *table, * */ int32_t -ioc_open_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - fd_t *fd) +ioc_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, fd_t *fd, dict_t *xdata) { - uint64_t tmp_ioc_inode = 0; - ioc_local_t *local = frame->local; - ioc_table_t *table = this->private; - ioc_inode_t *ioc_inode = NULL; - inode_t *inode = local->file_loc.inode; - uint32_t weight = 0; - const char *path = local->file_loc.path; - - if (op_ret != -1) { - /* look for ioc_inode corresponding to this fd */ - LOCK (&fd->inode->lock); - { - __inode_ctx_get (fd->inode, this, &tmp_ioc_inode); - ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; - - if (!ioc_inode) { - /* this is the first time someone is opening this - file, assign weight - */ - weight = ioc_get_priority (table, path); - - ioc_inode = ioc_inode_update (table, inode, weight); - __inode_ctx_put (fd->inode, this, - (uint64_t)(long)ioc_inode); - } else { - ioc_table_lock (ioc_inode->table); - { - list_move_tail (&ioc_inode->inode_lru, - &table->inode_lru[ioc_inode->weight]); - } - ioc_table_unlock (ioc_inode->table); - } - - } - UNLOCK (&fd->inode->lock); - - /* If mandatory locking has been enabled on this file, - we disable caching on it */ - if (((inode->st_mode & S_ISGID) && - !(inode->st_mode & S_IXGRP))) { - fd_ctx_set (fd, this, 1); - } - - /* If O_DIRECT open, we disable caching on it */ - if ((local->flags & O_DIRECT)){ - /* O_DIRECT is only for one fd, not the inode - * as a whole - */ - fd_ctx_set (fd, this, 1); - } - } - - FREE (local); - frame->local = NULL; - - STACK_UNWIND (frame, op_ret, op_errno, fd); - - return 0; + uint64_t tmp_ioc_inode = 0; + ioc_local_t *local = NULL; + ioc_table_t *table = NULL; + ioc_inode_t *ioc_inode = NULL; + + local = frame->local; + if (!this || !this->private) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + table = this->private; + + if (op_ret != -1) { + inode_ctx_get(fd->inode, this, &tmp_ioc_inode); + ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; + + // TODO: see why inode context is NULL and handle it. + if (!ioc_inode) { + gf_smsg(this->name, GF_LOG_ERROR, EINVAL, + IO_CACHE_MSG_ENFORCEMENT_FAILED, "inode-gfid=%s", + uuid_utoa(fd->inode->gfid), NULL); + goto out; + } + + ioc_table_lock(ioc_inode->table); + { + list_move_tail(&ioc_inode->inode_lru, + &table->inode_lru[ioc_inode->weight]); + } + ioc_table_unlock(ioc_inode->table); + + ioc_inode_lock(ioc_inode); + { + if ((table->min_file_size > ioc_inode->ia_size) || + ((table->max_file_size > 0) && + (table->max_file_size < ioc_inode->ia_size))) { + fd_ctx_set(fd, this, 1); + } + } + ioc_inode_unlock(ioc_inode); + + /* If O_DIRECT open, we disable caching on it */ + if ((local->flags & O_DIRECT)) { + /* O_DIRECT is only for one fd, not the inode + * as a whole + */ + fd_ctx_set(fd, this, 1); + } + } + +out: + mem_put(local); + frame->local = NULL; + + STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata); + + return 0; } /* @@ -679,54 +668,173 @@ ioc_open_cbk (call_frame_t *frame, * */ int32_t -ioc_create_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - fd_t *fd, - inode_t *inode, - struct stat *buf) +ioc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - ioc_local_t *local = frame->local; - ioc_table_t *table = this->private; - ioc_inode_t *ioc_inode = NULL; - uint32_t weight = 0; - const char *path = local->file_loc.path; - - if (op_ret != -1) { - { - /* assign weight */ - weight = ioc_get_priority (table, path); - - ioc_inode = ioc_inode_update (table, inode, weight); - - inode_ctx_put (fd->inode, this, - (uint64_t)(long)ioc_inode); - } - /* If mandatory locking has been enabled on this file, - we disable caching on it */ - if ((inode->st_mode & S_ISGID) && - !(inode->st_mode & S_IXGRP)) { - fd_ctx_set (fd, this, 1); - } - - /* If O_DIRECT open, we disable caching on it */ - if (local->flags & O_DIRECT){ - /* O_DIRECT is only for one fd, not the inode - * as a whole - */ - fd_ctx_set (fd, this, 1); - } - - } - - frame->local = NULL; - FREE (local); - - STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf); - - return 0; + ioc_local_t *local = NULL; + ioc_table_t *table = NULL; + ioc_inode_t *ioc_inode = NULL; + uint32_t weight = 0xffffffff; + const char *path = NULL; + int ret = -1; + + local = frame->local; + if (!this || !this->private) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + table = this->private; + path = local->file_loc.path; + + if (op_ret != -1) { + /* assign weight */ + weight = ioc_get_priority(table, path); + + ioc_inode = ioc_inode_create(table, inode, weight); + + ioc_inode_lock(ioc_inode); + { + ioc_inode->cache.mtime = buf->ia_mtime; + ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec; + ioc_inode->ia_size = buf->ia_size; + + if ((table->min_file_size > ioc_inode->ia_size) || + ((table->max_file_size > 0) && + (table->max_file_size < ioc_inode->ia_size))) { + ret = fd_ctx_set(fd, this, 1); + if (ret) + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, + IO_CACHE_MSG_SET_FD_FAILED, "path=%s", + local->file_loc.path, NULL); + } + } + ioc_inode_unlock(ioc_inode); + + inode_ctx_put(fd->inode, this, (uint64_t)(long)ioc_inode); + + /* If O_DIRECT open, we disable caching on it */ + if (local->flags & O_DIRECT) { + /* + * O_DIRECT is only for one fd, not the inode + * as a whole */ + ret = fd_ctx_set(fd, this, 1); + if (ret) + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, + IO_CACHE_MSG_SET_FD_FAILED, "path=%s", + local->file_loc.path, NULL); + } + + /* if weight == 0, we disable caching on it */ + if (!weight) { + /* we allow a pattern-matched cache disable this way */ + ret = fd_ctx_set(fd, this, 1); + if (ret) + gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, + IO_CACHE_MSG_SET_FD_FAILED, "path=%s", + local->file_loc.path, NULL); + } + } + +out: + frame->local = NULL; + mem_put(local); + + STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent, xdata); + + return 0; +} + +int32_t +ioc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode, struct iatt *buf, + struct iatt *preparent, struct iatt *postparent, dict_t *xdata) +{ + ioc_local_t *local = NULL; + ioc_table_t *table = NULL; + ioc_inode_t *ioc_inode = NULL; + uint32_t weight = 0xffffffff; + const char *path = NULL; + + local = frame->local; + if (!this || !this->private) { + op_ret = -1; + op_errno = EINVAL; + goto out; + } + + table = this->private; + path = local->file_loc.path; + + if (op_ret != -1) { + /* assign weight */ + weight = ioc_get_priority(table, path); + + ioc_inode = ioc_inode_create(table, inode, weight); + + ioc_inode_lock(ioc_inode); + { + ioc_inode->cache.mtime = buf->ia_mtime; + ioc_inode->cache.mtime_nsec = buf->ia_mtime_nsec; + ioc_inode->ia_size = buf->ia_size; + } + ioc_inode_unlock(ioc_inode); + + inode_ctx_put(inode, this, (uint64_t)(long)ioc_inode); + } + +out: + frame->local = NULL; + + loc_wipe(&local->file_loc); + mem_put(local); + + STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent, + postparent, xdata); + return 0; +} + +int +ioc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + dev_t rdev, mode_t umask, dict_t *xdata) +{ + ioc_local_t *local = NULL; + int32_t op_errno = -1, ret = -1; + + local = mem_get0(this->local_pool); + if (local == NULL) { + op_errno = ENOMEM; + gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL); + goto unwind; + } + + ret = loc_copy(&local->file_loc, loc); + if (ret != 0) { + op_errno = ENOMEM; + gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_NO_MEMORY, NULL); + goto unwind; + } + + frame->local = local; + + STACK_WIND(frame, ioc_mknod_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata); + return 0; + +unwind: + if (local != NULL) { + loc_wipe(&local->file_loc); + mem_put(local); + } + + STACK_UNWIND_STRICT(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, + NULL); + + return 0; } /* @@ -738,36 +846,33 @@ ioc_create_cbk (call_frame_t *frame, * */ int32_t -ioc_open (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - int32_t flags, - fd_t *fd) +ioc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + fd_t *fd, dict_t *xdata) { - - ioc_local_t *local = CALLOC (1, sizeof (ioc_local_t)); - ERR_ABORT (local); - - local->flags = flags; - local->file_loc.path = loc->path; - local->file_loc.inode = loc->inode; - - frame->local = local; - - STACK_WIND (frame, - ioc_open_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, - loc, - flags, - fd); - - return 0; + ioc_local_t *local = NULL; + + local = mem_get0(this->local_pool); + if (local == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); + STACK_UNWIND_STRICT(open, frame, -1, ENOMEM, NULL, NULL); + return 0; + } + + local->flags = flags; + local->file_loc.path = loc->path; + local->file_loc.inode = loc->inode; + + frame->local = local; + + STACK_WIND(frame, ioc_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + + return 0; } /* * ioc_create - create fop for io cache - * + * * @frame: * @this: * @pathname: @@ -776,213 +881,218 @@ ioc_open (call_frame_t *frame, * */ int32_t -ioc_create (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - int32_t flags, - mode_t mode, - fd_t *fd) +ioc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - ioc_local_t *local = CALLOC (1, sizeof (ioc_local_t)); - ERR_ABORT (local); - - local->flags = flags; - local->file_loc.path = loc->path; - frame->local = local; - - STACK_WIND (frame, ioc_create_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->create, - loc, flags, mode, fd); - return 0; -} + ioc_local_t *local = NULL; + + local = mem_get0(this->local_pool); + if (local == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); + STACK_UNWIND_STRICT(create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, + NULL, NULL); + return 0; + } + local->flags = flags; + local->file_loc.path = loc->path; + frame->local = local; + STACK_WIND(frame, ioc_create_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd, + xdata); + return 0; +} /* * ioc_release - release fop for io cache - * + * * @frame: * @this: * @fd: * */ int32_t -ioc_release (xlator_t *this, - fd_t *fd) +ioc_release(xlator_t *this, fd_t *fd) { - return 0; + return 0; } -/* - * ioc_readv_disabled_cbk - * @frame: - * @cookie: - * @this: - * @op_ret: - * @op_errno: - * @vector: - * @count: - * - */ -int32_t -ioc_readv_disabled_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iovec *vector, - int32_t count, - struct stat *stbuf) -{ - STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf); - return 0; -} - - int32_t -ioc_need_prune (ioc_table_t *table) +ioc_need_prune(ioc_table_t *table) { - int64_t cache_difference = 0; - - ioc_table_lock (table); - { - cache_difference = table->cache_used - table->cache_size; - } - ioc_table_unlock (table); - - if (cache_difference > 0) - return 1; - else - return 0; + int64_t cache_difference = 0; + + ioc_table_lock(table); + { + cache_difference = table->cache_used - table->cache_size; + } + ioc_table_unlock(table); + + if (cache_difference > 0) + return 1; + else + return 0; } /* - * dispatch_requests - - * + * ioc_dispatch_requests - + * * @frame: * @inode: * - * + * */ -static void -dispatch_requests (call_frame_t *frame, - ioc_inode_t *ioc_inode, - fd_t *fd, - off_t offset, - size_t size) +void +ioc_dispatch_requests(call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, + off_t offset, size_t size) { - ioc_local_t *local = frame->local; - ioc_table_t *table = ioc_inode->table; - ioc_page_t *trav = NULL; - ioc_waitq_t *waitq = NULL; - off_t rounded_offset = 0; - off_t rounded_end = 0; - off_t trav_offset = 0; - int32_t fault = 0; - int8_t need_validate = 0; - int8_t might_need_validate = 0; /* if a page exists, do we need - to validate it? */ - - rounded_offset = floor (offset, table->page_size); - rounded_end = roof (offset + size, table->page_size); - trav_offset = rounded_offset; - - /* once a frame does read, it should be waiting on something */ - local->wait_count++; - - /* Requested region can fall in three different pages, - * 1. Ready - region is already in cache, we just have to serve it. - * 2. In-transit - page fault has been generated on this page, we need - * to wait till the page is ready - * 3. Fault - page is not in cache, we have to generate a page fault - */ - - might_need_validate = ioc_inode_need_revalidate (ioc_inode); - - while (trav_offset < rounded_end) { - size_t trav_size = 0; - off_t local_offset = 0; - - ioc_inode_lock (ioc_inode); - //{ - - /* look for requested region in the cache */ - trav = ioc_page_get (ioc_inode, trav_offset); - - local_offset = max (trav_offset, offset); - trav_size = min (((offset+size) - local_offset), - table->page_size); - - if (!trav) { - /* page not in cache, we need to generate page fault */ - trav = ioc_page_create (ioc_inode, trav_offset); - fault = 1; - if (!trav) { - gf_log (frame->this->name, GF_LOG_CRITICAL, - "ioc_page_create returned NULL"); - } - } - - ioc_wait_on_page (trav, frame, local_offset, trav_size); - - if (trav->ready) { - /* page found in cache */ - if (!might_need_validate) { - /* fresh enough */ - gf_log (frame->this->name, GF_LOG_DEBUG, - "cache hit for trav_offset=%"PRId64"" - "/local_offset=%"PRId64"", - trav_offset, local_offset); - waitq = ioc_page_wakeup (trav); - } else { - /* if waitq already exists, fstat revalidate is - already on the way */ - if (!ioc_inode->waitq) { - need_validate = 1; - } - ioc_wait_on_inode (ioc_inode, trav); - } - } - - //} - ioc_inode_unlock (ioc_inode); - - ioc_waitq_return (waitq); - waitq = NULL; - - if (fault) { - fault = 0; - /* new page created, increase the table->cache_used */ - ioc_page_fault (ioc_inode, frame, fd, trav_offset); - } - - if (need_validate) { - need_validate = 0; - gf_log (frame->this->name, GF_LOG_DEBUG, - "sending validate request for " - "inode(%"PRId64") at offset=%"PRId64"", - fd->inode->ino, trav_offset); - ioc_cache_validate (frame, ioc_inode, fd, trav); - } - - trav_offset += table->page_size; - } - - ioc_frame_return (frame); - - if (ioc_need_prune (ioc_inode->table)) { - ioc_prune (ioc_inode->table); - } - - return; + ioc_local_t *local = NULL; + ioc_table_t *table = NULL; + ioc_page_t *trav = NULL; + ioc_waitq_t *waitq = NULL; + off_t rounded_offset = 0; + off_t rounded_end = 0; + off_t trav_offset = 0; + int32_t fault = 0; + size_t trav_size = 0; + off_t local_offset = 0; + int32_t ret = -1; + int8_t need_validate = 0; + int8_t might_need_validate = 0; /* + * if a page exists, do we need + * to validate it? + */ + local = frame->local; + table = ioc_inode->table; + + rounded_offset = gf_floor(offset, table->page_size); + rounded_end = gf_roof(offset + size, table->page_size); + trav_offset = rounded_offset; + + /* once a frame does read, it should be waiting on something */ + local->wait_count++; + + /* Requested region can fall in three different pages, + * 1. Ready - region is already in cache, we just have to serve it. + * 2. In-transit - page fault has been generated on this page, we need + * to wait till the page is ready + * 3. Fault - page is not in cache, we have to generate a page fault + */ + + might_need_validate = ioc_inode_need_revalidate(ioc_inode); + + while (trav_offset < rounded_end) { + ioc_inode_lock(ioc_inode); + { + /* look for requested region in the cache */ + trav = __ioc_page_get(ioc_inode, trav_offset); + + local_offset = max(trav_offset, offset); + trav_size = min(((offset + size) - local_offset), table->page_size); + + if (!trav) { + /* page not in cache, we need to generate page + * fault + */ + trav = __ioc_page_create(ioc_inode, trav_offset); + fault = 1; + if (!trav) { + gf_smsg(frame->this->name, GF_LOG_CRITICAL, ENOMEM, + IO_CACHE_MSG_NO_MEMORY, NULL); + local->op_ret = -1; + local->op_errno = ENOMEM; + ioc_inode_unlock(ioc_inode); + goto out; + } + } + + __ioc_wait_on_page(trav, frame, local_offset, trav_size); + + if (trav->ready) { + /* page found in cache */ + if (!might_need_validate && !ioc_inode->waitq) { + /* fresh enough */ + gf_msg_trace(frame->this->name, 0, + "cache hit for " + "trav_offset=%" PRId64 + "/local_" + "offset=%" PRId64 "", + trav_offset, local_offset); + waitq = __ioc_page_wakeup(trav, trav->op_errno); + } else { + /* if waitq already exists, fstat + * revalidate is + * already on the way + */ + if (!ioc_inode->waitq) { + need_validate = 1; + } + + ret = ioc_wait_on_inode(ioc_inode, trav); + if (ret < 0) { + local->op_ret = -1; + local->op_errno = -ret; + need_validate = 0; + + waitq = __ioc_page_wakeup(trav, trav->op_errno); + ioc_inode_unlock(ioc_inode); + + ioc_waitq_return(waitq); + waitq = NULL; + goto out; + } + } + } + } + ioc_inode_unlock(ioc_inode); + + ioc_waitq_return(waitq); + waitq = NULL; + + if (fault) { + fault = 0; + /* new page created, increase the table->cache_used */ + ioc_page_fault(ioc_inode, frame, fd, trav_offset); + } + + if (need_validate) { + need_validate = 0; + gf_msg_trace(frame->this->name, 0, + "sending validate request for " + "inode(%s) at offset=%" PRId64 "", + uuid_utoa(fd->inode->gfid), trav_offset); + ret = ioc_cache_validate(frame, ioc_inode, fd, trav); + if (ret == -1) { + ioc_inode_lock(ioc_inode); + { + waitq = __ioc_page_wakeup(trav, trav->op_errno); + } + ioc_inode_unlock(ioc_inode); + + ioc_waitq_return(waitq); + waitq = NULL; + goto out; + } + } + + trav_offset += table->page_size; + } + +out: + ioc_frame_return(frame); + + if (ioc_need_prune(ioc_inode->table)) { + ioc_prune(ioc_inode->table); + } + + return; } - /* * ioc_readv - - * + * * @frame: * @this: * @fd: @@ -991,75 +1101,113 @@ dispatch_requests (call_frame_t *frame, * */ int32_t -ioc_readv (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - size_t size, - off_t offset) +ioc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - uint64_t tmp_ioc_inode = 0; - ioc_inode_t *ioc_inode = NULL; - ioc_local_t *local = NULL; - uint32_t weight = 0; - - inode_ctx_get (fd->inode, this, &tmp_ioc_inode); - ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; - if (!ioc_inode) { - /* caching disabled, go ahead with normal readv */ - STACK_WIND (frame, - ioc_readv_disabled_cbk, - FIRST_CHILD (frame->this), - FIRST_CHILD (frame->this)->fops->readv, - fd, - size, - offset); - return 0; - } - - if (!fd_ctx_get (fd, this, NULL)) { - /* disable caching for this fd, go ahead with normal readv */ - STACK_WIND (frame, - ioc_readv_disabled_cbk, - FIRST_CHILD (frame->this), - FIRST_CHILD (frame->this)->fops->readv, - fd, - size, - offset); - return 0; - } - - local = (ioc_local_t *) CALLOC (1, sizeof (ioc_local_t)); - ERR_ABORT (local); - INIT_LIST_HEAD (&local->fill_list); - - frame->local = local; - local->pending_offset = offset; - local->pending_size = size; - local->offset = offset; - local->size = size; - local->inode = ioc_inode; - - gf_log (this->name, GF_LOG_DEBUG, - "NEW REQ (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET"", - frame, offset, size); - - weight = ioc_inode->weight; - - ioc_table_lock (ioc_inode->table); - { - list_move_tail (&ioc_inode->inode_lru, - &ioc_inode->table->inode_lru[weight]); - } - ioc_table_unlock (ioc_inode->table); - - dispatch_requests (frame, ioc_inode, fd, offset, size); - - return 0; + uint64_t tmp_ioc_inode = 0; + ioc_inode_t *ioc_inode = NULL; + ioc_local_t *local = NULL; + uint32_t weight = 0; + ioc_table_t *table = NULL; + int32_t op_errno = EINVAL; + + if (!this) { + goto out; + } + + inode_ctx_get(fd->inode, this, &tmp_ioc_inode); + ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; + if (!ioc_inode) { + /* caching disabled, go ahead with normal readv */ + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, + xdata); + return 0; + } + + if (flags & O_DIRECT) { + /* disable caching for this fd, if O_DIRECT is used */ + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, + xdata); + return 0; + } + + table = this->private; + + if (!table) { + gf_smsg(this->name, GF_LOG_ERROR, EINVAL, IO_CACHE_MSG_TABLE_NULL, + NULL); + op_errno = EINVAL; + goto out; + } + + ioc_inode_lock(ioc_inode); + { + if (!ioc_inode->cache.page_table) { + ioc_inode->cache.page_table = rbthash_table_init( + this->ctx, IOC_PAGE_TABLE_BUCKET_COUNT, ioc_hashfn, NULL, 0, + table->mem_pool); + + if (ioc_inode->cache.page_table == NULL) { + op_errno = ENOMEM; + ioc_inode_unlock(ioc_inode); + goto out; + } + } + } + ioc_inode_unlock(ioc_inode); + + if (!fd_ctx_get(fd, this, NULL)) { + /* disable caching for this fd, go ahead with normal readv */ + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, + xdata); + return 0; + } + + local = mem_get0(this->local_pool); + if (local == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); + op_errno = ENOMEM; + goto out; + } + + INIT_LIST_HEAD(&local->fill_list); + + frame->local = local; + local->pending_offset = offset; + local->pending_size = size; + local->offset = offset; + local->size = size; + local->inode = ioc_inode; + local->xattr_req = dict_ref(xdata); + + gf_msg_trace(this->name, 0, + "NEW REQ (%p) offset " + "= %" PRId64 " && size = %" GF_PRI_SIZET "", + frame, offset, size); + + weight = ioc_inode->weight; + + ioc_table_lock(ioc_inode->table); + { + list_move_tail(&ioc_inode->inode_lru, + &ioc_inode->table->inode_lru[weight]); + } + ioc_table_unlock(ioc_inode->table); + + ioc_dispatch_requests(frame, ioc_inode, fd, offset, size); + return 0; + +out: + STACK_UNWIND_STRICT(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL); + return 0; } /* * ioc_writev_cbk - - * + * * @frame: * @cookie: * @this: @@ -1068,28 +1216,36 @@ ioc_readv (call_frame_t *frame, * */ int32_t -ioc_writev_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct stat *stbuf) +ioc_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - ioc_local_t *local = frame->local; - uint64_t ioc_inode = 0; - - inode_ctx_get (local->fd->inode, this, &ioc_inode); - - if (ioc_inode) - ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); - - STACK_UNWIND (frame, op_ret, op_errno, stbuf); - return 0; + ioc_local_t *local = NULL; + uint64_t ioc_inode = 0; + + local = frame->local; + frame->local = NULL; + inode_ctx_get(local->fd->inode, this, &ioc_inode); + + if (op_ret >= 0) { + ioc_update_pages(frame, (ioc_inode_t *)(long)ioc_inode, local->vector, + local->op_ret, op_ret, local->offset); + } + + STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + if (local->iobref) { + iobref_unref(local->iobref); + GF_FREE(local->vector); + } + + mem_put(local); + return 0; } /* * ioc_writev - * + * * @frame: * @this: * @fd: @@ -1099,42 +1255,43 @@ ioc_writev_cbk (call_frame_t *frame, * */ int32_t -ioc_writev (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - struct iovec *vector, - int32_t count, - off_t offset) +ioc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) { - ioc_local_t *local = NULL; - uint64_t ioc_inode = 0; - - local = CALLOC (1, sizeof (ioc_local_t)); - ERR_ABORT (local); - - /* TODO: why is it not fd_ref'ed */ - local->fd = fd; - frame->local = local; - - inode_ctx_get (fd->inode, this, &ioc_inode); - if (ioc_inode) - ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); - - STACK_WIND (frame, - ioc_writev_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->writev, - fd, - vector, - count, - offset); - - return 0; + ioc_local_t *local = NULL; + uint64_t ioc_inode = 0; + + local = mem_get0(this->local_pool); + if (local == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); + + STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, NULL, NULL, NULL); + return 0; + } + + /* TODO: why is it not fd_ref'ed */ + local->fd = fd; + frame->local = local; + + inode_ctx_get(fd->inode, this, &ioc_inode); + if (ioc_inode) { + local->iobref = iobref_ref(iobref); + local->vector = iov_dup(vector, count); + local->op_ret = count; + local->offset = offset; + } + + STACK_WIND(frame, ioc_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, + flags, iobref, xdata); + + return 0; } /* * ioc_truncate_cbk - - * + * * @frame: * @cookie: * @this: @@ -1143,52 +1300,65 @@ ioc_writev (call_frame_t *frame, * @buf: * */ -int32_t -ioc_truncate_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct stat *buf) +int32_t +ioc_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { + STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; +} - STACK_UNWIND (frame, op_ret, op_errno, buf); - return 0; +/* + * ioc_ftruncate_cbk - + * + * @frame: + * @cookie: + * @this: + * @op_ret: + * @op_errno: + * @buf: + * + */ +int32_t +ioc_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + return 0; } /* * ioc_truncate - - * + * * @frame: * @this: * @loc: * @offset: * */ -int32_t -ioc_truncate (call_frame_t *frame, - xlator_t *this, - loc_t *loc, - off_t offset) +int32_t +ioc_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - uint64_t ioc_inode = 0; - inode_ctx_get (loc->inode, this, &ioc_inode); - - if (ioc_inode) - ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); - - STACK_WIND (frame, - ioc_truncate_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, - loc, - offset); - return 0; + uint64_t ioc_inode = 0; + + inode_ctx_get(loc->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND(frame, ioc_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; } /* * ioc_ftruncate - - * + * * @frame: * @this: * @fd: @@ -1196,279 +1366,866 @@ ioc_truncate (call_frame_t *frame, * */ int32_t -ioc_ftruncate (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - off_t offset) +ioc_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - uint64_t ioc_inode = 0; - inode_ctx_get (fd->inode, this, &ioc_inode); - - if (ioc_inode) - ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); - - STACK_WIND (frame, - ioc_truncate_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, - fd, - offset); - return 0; + uint64_t ioc_inode = 0; + + inode_ctx_get(fd->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND(frame, ioc_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; } int32_t -ioc_lk_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct flock *lock) +ioc_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct gf_flock *lock, dict_t *xdata) { - STACK_UNWIND (frame, op_ret, op_errno, lock); - return 0; + STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, lock, xdata); + return 0; } -int32_t -ioc_lk (call_frame_t *frame, - xlator_t *this, - fd_t *fd, - int32_t cmd, - struct flock *lock) +int32_t +ioc_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *lock, dict_t *xdata) { - ioc_inode_t *ioc_inode = NULL; - uint64_t tmp_inode = 0; - - inode_ctx_get (fd->inode, this, &tmp_inode); - ioc_inode = (ioc_inode_t *)(long)tmp_inode; - if (!ioc_inode) { - gf_log (this->name, GF_LOG_ERROR, - "inode context is NULL: returning EBADFD"); - STACK_UNWIND (frame, -1, EBADFD, NULL); - return 0; - } - - ioc_inode_lock (ioc_inode); - { - gettimeofday (&ioc_inode->tv, NULL); - } - ioc_inode_unlock (ioc_inode); - - STACK_WIND (frame, ioc_lk_cbk, - FIRST_CHILD (this), - FIRST_CHILD (this)->fops->lk, fd, cmd, lock); - return 0; + ioc_inode_t *ioc_inode = NULL; + uint64_t tmp_inode = 0; + + inode_ctx_get(fd->inode, this, &tmp_inode); + ioc_inode = (ioc_inode_t *)(long)tmp_inode; + if (!ioc_inode) { + gf_msg_debug(this->name, EBADFD, + "inode context is NULL: returning EBADFD"); + STACK_UNWIND_STRICT(lk, frame, -1, EBADFD, NULL, NULL); + return 0; + } + + ioc_inode_lock(ioc_inode); + { + ioc_inode->cache.last_revalidate = gf_time(); + } + ioc_inode_unlock(ioc_inode); + + STACK_WIND(frame, ioc_lk_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata); + + return 0; +} + +int +ioc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, gf_dirent_t *entries, dict_t *xdata) +{ + gf_dirent_t *entry = NULL; + char *path = NULL; + fd_t *fd = NULL; + + fd = frame->local; + frame->local = NULL; + + if (op_ret <= 0) + goto unwind; + + list_for_each_entry(entry, &entries->list, list) + { + inode_path(fd->inode, entry->d_name, &path); + ioc_inode_update(this, entry->inode, path, &entry->d_stat); + GF_FREE(path); + path = NULL; + } + +unwind: + STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata); + + return 0; +} + +int +ioc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *dict) +{ + frame->local = fd; + + STACK_WIND(frame, ioc_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict); + + return 0; +} + +static int32_t +ioc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata); + return 0; +} + +static int32_t +ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + uint64_t ioc_inode = 0; + + inode_ctx_get(fd->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND(frame, ioc_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); + return 0; +} + +static int32_t +ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, pre, post, xdata); + return 0; +} + +static int32_t +ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) +{ + uint64_t ioc_inode = 0; + + inode_ctx_get(fd->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + return 0; } int32_t -ioc_get_priority_list (const char *opt_str, struct list_head *first) +ioc_get_priority_list(const char *opt_str, struct list_head *first) { - int32_t max_pri = 0; - char *tmp_str = NULL; - char *tmp_str1 = NULL; - char *tmp_str2 = NULL; - char *dup_str = NULL; - char *stripe_str = NULL; - char *pattern = NULL; - char *priority = NULL; - char *string = strdup (opt_str); - struct ioc_priority *curr = NULL; - - /* Get the pattern for cache priority. - * "option priority *.jpg:1,abc*:2" etc - */ - /* TODO: inode_lru in table is statically hard-coded to 5, - * should be changed to run-time configuration - */ - stripe_str = strtok_r (string, ",", &tmp_str); - while (stripe_str) { - curr = CALLOC (1, sizeof (struct ioc_priority)); - ERR_ABORT (curr); - list_add_tail (&curr->list, first); - - dup_str = strdup (stripe_str); - pattern = strtok_r (dup_str, ":", &tmp_str1); - if (!pattern) - return -1; - priority = strtok_r (NULL, ":", &tmp_str1); - if (!priority) - return -1; - gf_log ("io-cache", - GF_LOG_DEBUG, - "ioc priority : pattern %s : priority %s", - pattern, - priority); - curr->pattern = strdup (pattern); - curr->priority = strtol (priority, &tmp_str2, 0); - if (tmp_str2 && (*tmp_str2)) - return -1; - else - max_pri = max (max_pri, curr->priority); - stripe_str = strtok_r (NULL, ",", &tmp_str); - } - - return max_pri; + int32_t max_pri = 1; + char *tmp_str = NULL; + char *tmp_str1 = NULL; + char *tmp_str2 = NULL; + char *dup_str = NULL; + char *stripe_str = NULL; + char *pattern = NULL; + char *priority = NULL; + char *string = NULL; + struct ioc_priority *curr = NULL, *tmp = NULL; + + string = gf_strdup(opt_str); + if (string == NULL) { + max_pri = -1; + goto out; + } + + /* Get the pattern for cache priority. + * "option priority *.jpg:1,abc*:2" etc + */ + /* TODO: inode_lru in table is statically hard-coded to 5, + * should be changed to run-time configuration + */ + stripe_str = strtok_r(string, ",", &tmp_str); + while (stripe_str) { + curr = GF_CALLOC(1, sizeof(struct ioc_priority), + gf_ioc_mt_ioc_priority); + if (curr == NULL) { + max_pri = -1; + goto out; + } + + list_add_tail(&curr->list, first); + + dup_str = gf_strdup(stripe_str); + if (dup_str == NULL) { + max_pri = -1; + goto out; + } + + pattern = strtok_r(dup_str, ":", &tmp_str1); + if (!pattern) { + max_pri = -1; + goto out; + } + + priority = strtok_r(NULL, ":", &tmp_str1); + if (!priority) { + max_pri = -1; + goto out; + } + + gf_msg_trace("io-cache", 0, "ioc priority : pattern %s : priority %s", + pattern, priority); + + curr->pattern = gf_strdup(pattern); + if (curr->pattern == NULL) { + max_pri = -1; + goto out; + } + + curr->priority = strtol(priority, &tmp_str2, 0); + if (tmp_str2 && (*tmp_str2)) { + max_pri = -1; + goto out; + } else { + max_pri = max(max_pri, curr->priority); + } + + GF_FREE(dup_str); + dup_str = NULL; + + stripe_str = strtok_r(NULL, ",", &tmp_str); + } +out: + GF_FREE(string); + + GF_FREE(dup_str); + + if (max_pri == -1) { + list_for_each_entry_safe(curr, tmp, first, list) + { + list_del_init(&curr->list); + GF_FREE(curr->pattern); + GF_FREE(curr); + } + } + + return max_pri; +} + +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init(this, gf_ioc_mt_end + 1); + + if (ret != 0) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + IO_CACHE_MSG_MEMORY_INIT_FAILED, NULL); + return ret; + } + + return ret; +} + +static gf_boolean_t +check_cache_size_ok(xlator_t *this, uint64_t cache_size) +{ + gf_boolean_t ret = _gf_true; + uint64_t total_mem = 0; + uint64_t max_cache_size = 0; + volume_option_t *opt = NULL; + + GF_ASSERT(this); + opt = xlator_volume_option_get(this, "cache-size"); + if (!opt) { + ret = _gf_false; + gf_smsg(this->name, GF_LOG_ERROR, EINVAL, + IO_CACHE_MSG_NO_CACHE_SIZE_OPT, NULL); + goto out; + } + + total_mem = get_mem_size(); + if (-1 == total_mem) + max_cache_size = opt->max; + else + max_cache_size = total_mem; + + gf_msg_debug(this->name, 0, "Max cache size is %" PRIu64, max_cache_size); + + if (cache_size > max_cache_size) { + ret = _gf_false; + gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT, + "Cache-size=%" PRIu64, cache_size, "max-size=%" PRIu64, + max_cache_size, NULL); + goto out; + } +out: + return ret; +} + +int +reconfigure(xlator_t *this, dict_t *options) +{ + data_t *data = NULL; + ioc_table_t *table = NULL; + int ret = -1; + uint64_t cache_size_new = 0; + if (!this || !this->private) + goto out; + + table = this->private; + + ioc_table_lock(table); + { + GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, + unlock); + + GF_OPTION_RECONF("cache-timeout", table->cache_timeout, options, int32, + unlock); + + data = dict_get(options, "priority"); + if (data) { + char *option_list = data_to_str(data); + + gf_msg_trace(this->name, 0, "option path %s", option_list); + /* parse the list of pattern:priority */ + table->max_pri = ioc_get_priority_list(option_list, + &table->priority_list); + + if (table->max_pri == -1) { + goto unlock; + } + table->max_pri++; + } + + GF_OPTION_RECONF("max-file-size", table->max_file_size, options, + size_uint64, unlock); + + GF_OPTION_RECONF("min-file-size", table->min_file_size, options, + size_uint64, unlock); + + if ((table->max_file_size <= UINT64_MAX) && + (table->min_file_size > table->max_file_size)) { + gf_smsg(this->name, GF_LOG_ERROR, 0, IO_CACHE_MSG_DEFAULTING_TO_OLD, + "minimum-size=%" PRIu64, table->min_file_size, + "maximum-size=%" PRIu64, table->max_file_size, NULL); + goto unlock; + } + + GF_OPTION_RECONF("cache-size", cache_size_new, options, size_uint64, + unlock); + if (!check_cache_size_ok(this, cache_size_new)) { + ret = -1; + gf_smsg(this->name, GF_LOG_ERROR, 0, + IO_CACHE_MSG_NOT_RECONFIG_CACHE_SIZE, NULL); + goto unlock; + } + table->cache_size = cache_size_new; + + ret = 0; + } +unlock: + ioc_table_unlock(table); +out: + return ret; } /* - * init - + * init - * @this: * */ -int32_t -init (xlator_t *this) +int32_t +init(xlator_t *this) +{ + ioc_table_t *table = NULL; + dict_t *xl_options = NULL; + uint32_t index = 0; + int32_t ret = -1; + glusterfs_ctx_t *ctx = NULL; + data_t *data = 0; + uint32_t num_pages = 0; + + xl_options = this->options; + + if (!this->children || this->children->next) { + gf_smsg(this->name, GF_LOG_ERROR, 0, + IO_CACHE_MSG_XLATOR_CHILD_MISCONFIGURED, NULL); + goto out; + } + + if (!this->parents) { + gf_smsg(this->name, GF_LOG_WARNING, 0, IO_CACHE_MSG_VOL_MISCONFIGURED, + NULL); + } + + table = (void *)GF_CALLOC(1, sizeof(*table), gf_ioc_mt_ioc_table_t); + if (table == NULL) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, IO_CACHE_MSG_NO_MEMORY, NULL); + goto out; + } + + table->xl = this; + table->page_size = this->ctx->page_size; + + GF_OPTION_INIT("pass-through", this->pass_through, bool, out); + + GF_OPTION_INIT("cache-size", table->cache_size, size_uint64, out); + + GF_OPTION_INIT("cache-timeout", table->cache_timeout, int32, out); + + GF_OPTION_INIT("min-file-size", table->min_file_size, size_uint64, out); + + GF_OPTION_INIT("max-file-size", table->max_file_size, size_uint64, out); + + if (!check_cache_size_ok(this, table->cache_size)) { + ret = -1; + goto out; + } + + INIT_LIST_HEAD(&table->priority_list); + table->max_pri = 1; + data = dict_get(xl_options, "priority"); + if (data) { + char *option_list = data_to_str(data); + gf_msg_trace(this->name, 0, "option path %s", option_list); + /* parse the list of pattern:priority */ + table->max_pri = ioc_get_priority_list(option_list, + &table->priority_list); + + if (table->max_pri == -1) { + goto out; + } + } + table->max_pri++; + + INIT_LIST_HEAD(&table->inodes); + + if ((table->max_file_size <= UINT64_MAX) && + (table->min_file_size > table->max_file_size)) { + gf_smsg("io-cache", GF_LOG_ERROR, 0, IO_CACHE_MSG_INVALID_ARGUMENT, + "minimum-size=%" PRIu64, table->min_file_size, + "maximum-size=%" PRIu64, table->max_file_size, NULL); + goto out; + } + + table->inode_lru = GF_CALLOC(table->max_pri, sizeof(struct list_head), + gf_ioc_mt_list_head); + if (table->inode_lru == NULL) { + goto out; + } + + for (index = 0; index < (table->max_pri); index++) + INIT_LIST_HEAD(&table->inode_lru[index]); + + this->local_pool = mem_pool_new(ioc_local_t, 64); + if (!this->local_pool) { + ret = -1; + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + IO_CACHE_MSG_CREATE_MEM_POOL_FAILED, NULL); + goto out; + } + + pthread_mutex_init(&table->table_lock, NULL); + this->private = table; + + num_pages = (table->cache_size / table->page_size) + + ((table->cache_size % table->page_size) ? 1 : 0); + + table->mem_pool = mem_pool_new(rbthash_entry_t, num_pages); + if (!table->mem_pool) { + gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, + IO_CACHE_MSG_ALLOC_MEM_POOL_FAILED, NULL); + goto out; + } + + ret = 0; + + ctx = this->ctx; + ioc_log2_page_size = log_base2(ctx->page_size); + +out: + if (ret == -1) { + if (table != NULL) { + GF_FREE(table->inode_lru); + GF_FREE(table); + } + } + + return ret; +} + +void +ioc_page_waitq_dump(ioc_page_t *page, char *prefix) { - ioc_table_t *table; - dict_t *options = this->options; - uint32_t index = 0; - char *page_size_string = NULL; - char *cache_size_string = NULL; - - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "FATAL: io-cache not configured with exactly " - "one child"); - return -1; - } - - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); - } - - table = (void *) CALLOC (1, sizeof (*table)); - ERR_ABORT (table); - - table->xl = this; - table->page_size = IOC_PAGE_SIZE; - table->cache_size = IOC_CACHE_SIZE; - - if (dict_get (options, "page-size")) - page_size_string = data_to_str (dict_get (options, - "page-size")); - - if (page_size_string) { - if (gf_string2bytesize (page_size_string, - &table->page_size) != 0) { - gf_log ("io-cache", GF_LOG_ERROR, - "invalid number format \"%s\" of " - "\"option page-size\"", - page_size_string); - return -1; - } - gf_log (this->name, GF_LOG_DEBUG, - "using page-size %"PRIu64"", table->page_size); - } - - if (dict_get (options, "cache-size")) - cache_size_string = data_to_str (dict_get (options, - "cache-size")); - if (cache_size_string) { - if (gf_string2bytesize (cache_size_string, - &table->cache_size) != 0) { - gf_log ("io-cache", GF_LOG_ERROR, - "invalid number format \"%s\" of " - "\"option cache-size\"", - cache_size_string); - return -1; - } - - gf_log (this->name, GF_LOG_DEBUG, - "using cache-size %"PRIu64"", table->cache_size); - } - - table->cache_timeout = 1; - - if (dict_get (options, "cache-timeout")) { - table->cache_timeout = - data_to_uint32 (dict_get (options, - "cache-timeout")); - gf_log (this->name, GF_LOG_DEBUG, - "Using %d seconds to revalidate cache", - table->cache_timeout); - } - - INIT_LIST_HEAD (&table->priority_list); - if (dict_get (options, "priority")) { - char *option_list = data_to_str (dict_get (options, - "priority")); - gf_log (this->name, GF_LOG_DEBUG, - "option path %s", option_list); - /* parse the list of pattern:priority */ - table->max_pri = ioc_get_priority_list (option_list, - &table->priority_list); - - if (table->max_pri == -1) - return -1; - } - table->max_pri ++; - INIT_LIST_HEAD (&table->inodes); - - table->inode_lru = CALLOC (table->max_pri, sizeof (struct list_head)); - ERR_ABORT (table->inode_lru); - for (index = 0; index < (table->max_pri); index++) - INIT_LIST_HEAD (&table->inode_lru[index]); - - pthread_mutex_init (&table->table_lock, NULL); - this->private = table; - return 0; + ioc_waitq_t *trav = NULL; + call_frame_t *frame = NULL; + int32_t i = 0; + char key[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; + + trav = page->waitq; + + while (trav) { + frame = trav->data; + sprintf(key, "waitq.frame[%d]", i++); + gf_proc_dump_write(key, "%" PRId64, frame->root->unique); + + trav = trav->next; + } +} + +void +__ioc_inode_waitq_dump(ioc_inode_t *ioc_inode, char *prefix) +{ + ioc_waitq_t *trav = NULL; + ioc_page_t *page = NULL; + int32_t i = 0; + char key[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; + + trav = ioc_inode->waitq; + + while (trav) { + page = trav->data; + + sprintf(key, "cache-validation-waitq.page[%d].offset", i++); + gf_proc_dump_write(key, "%" PRId64, page->offset); + + trav = trav->next; + } +} + +void +__ioc_page_dump(ioc_page_t *page, char *prefix) +{ + int ret = -1; + + if (!page) + return; + /* ioc_page_lock can be used to hold the mutex. But in statedump + * its better to use trylock to avoid deadlocks. + */ + ret = pthread_mutex_trylock(&page->page_lock); + if (ret) + goto out; + { + gf_proc_dump_write("offset", "%" PRId64, page->offset); + gf_proc_dump_write("size", "%" GF_PRI_SIZET, page->size); + gf_proc_dump_write("dirty", "%s", page->dirty ? "yes" : "no"); + gf_proc_dump_write("ready", "%s", page->ready ? "yes" : "no"); + ioc_page_waitq_dump(page, prefix); + } + pthread_mutex_unlock(&page->page_lock); + +out: + if (ret && page) + gf_proc_dump_write("Unable to dump the page information", + "(Lock acquisition failed) %p", page); + + return; +} + +void +__ioc_cache_dump(ioc_inode_t *ioc_inode, char *prefix) +{ + off_t offset = 0; + ioc_table_t *table = NULL; + ioc_page_t *page = NULL; + int i = 0; + char key[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; + char timestr[GF_TIMESTR_SIZE] = { + 0, + }; + + if ((ioc_inode == NULL) || (prefix == NULL)) { + goto out; + } + + table = ioc_inode->table; + + if (ioc_inode->cache.last_revalidate) { + gf_time_fmt(timestr, sizeof timestr, ioc_inode->cache.last_revalidate, + gf_timefmt_FT); + + gf_proc_dump_write("last-cache-validation-time", "%s", timestr); + } + + for (offset = 0; offset < ioc_inode->ia_size; offset += table->page_size) { + page = __ioc_page_get(ioc_inode, offset); + if (page == NULL) { + continue; + } + + sprintf(key, "inode.cache.page[%d]", i++); + __ioc_page_dump(page, key); + } +out: + return; +} + +int +ioc_inode_dump(xlator_t *this, inode_t *inode) +{ + char *path = NULL; + int ret = -1; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; + uint64_t tmp_ioc_inode = 0; + ioc_inode_t *ioc_inode = NULL; + gf_boolean_t section_added = _gf_false; + char uuid_str[64] = { + 0, + }; + + if (this == NULL || inode == NULL) + goto out; + + gf_proc_dump_build_key(key_prefix, "io-cache", "inode"); + + inode_ctx_get(inode, this, &tmp_ioc_inode); + ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; + if (ioc_inode == NULL) + goto out; + + /* Similar to ioc_page_dump function its better to use + * pthread_mutex_trylock and not to use gf_log in statedump + * to avoid deadlocks. + */ + ret = pthread_mutex_trylock(&ioc_inode->inode_lock); + if (ret) + goto out; + + { + if (gf_uuid_is_null(ioc_inode->inode->gfid)) + goto unlock; + + gf_proc_dump_add_section("%s", key_prefix); + section_added = _gf_true; + + __inode_path(ioc_inode->inode, NULL, &path); + + gf_proc_dump_write("inode.weight", "%d", ioc_inode->weight); + + if (path) { + gf_proc_dump_write("path", "%s", path); + GF_FREE(path); + } + + gf_proc_dump_write("uuid", "%s", + uuid_utoa_r(ioc_inode->inode->gfid, uuid_str)); + __ioc_cache_dump(ioc_inode, key_prefix); + __ioc_inode_waitq_dump(ioc_inode, key_prefix); + } +unlock: + pthread_mutex_unlock(&ioc_inode->inode_lock); + +out: + if (ret && ioc_inode) { + if (section_added == _gf_false) + gf_proc_dump_add_section("%s", key_prefix); + gf_proc_dump_write("Unable to print the status of ioc_inode", + "(Lock acquisition failed) %s", + uuid_utoa(inode->gfid)); + } + return ret; +} + +int +ioc_priv_dump(xlator_t *this) +{ + ioc_table_t *priv = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; + int ret = -1; + gf_boolean_t add_section = _gf_false; + + if (!this || !this->private) + goto out; + + priv = this->private; + + gf_proc_dump_build_key(key_prefix, "io-cache", "priv"); + gf_proc_dump_add_section("%s", key_prefix); + add_section = _gf_true; + + ret = pthread_mutex_trylock(&priv->table_lock); + if (ret) + goto out; + { + gf_proc_dump_write("page_size", "%" PRIu64, priv->page_size); + gf_proc_dump_write("cache_size", "%" PRIu64, priv->cache_size); + gf_proc_dump_write("cache_used", "%" PRIu64, priv->cache_used); + gf_proc_dump_write("inode_count", "%u", priv->inode_count); + gf_proc_dump_write("cache_timeout", "%u", priv->cache_timeout); + gf_proc_dump_write("min-file-size", "%" PRIu64, priv->min_file_size); + gf_proc_dump_write("max-file-size", "%" PRIu64, priv->max_file_size); + } + pthread_mutex_unlock(&priv->table_lock); +out: + if (ret && priv) { + if (!add_section) { + gf_proc_dump_build_key(key_prefix, + "xlator." + "performance.io-cache", + "priv"); + gf_proc_dump_add_section("%s", key_prefix); + } + gf_proc_dump_write( + "Unable to dump the state of private " + "structure of io-cache xlator", + "(Lock " + "acquisition failed) %s", + this->name); + } + + return 0; } /* * fini - - * + * * @this: * */ void -fini (xlator_t *this) +fini(xlator_t *this) { - ioc_table_t *table = this->private; - - pthread_mutex_destroy (&table->table_lock); - FREE (table); - - this->private = NULL; - return; + ioc_table_t *table = NULL; + struct ioc_priority *curr = NULL, *tmp = NULL; + + table = this->private; + + if (table == NULL) + return; + + this->private = NULL; + + if (table->mem_pool != NULL) { + mem_pool_destroy(table->mem_pool); + table->mem_pool = NULL; + } + + list_for_each_entry_safe(curr, tmp, &table->priority_list, list) + { + list_del_init(&curr->list); + GF_FREE(curr->pattern); + GF_FREE(curr); + } + + /* inode_lru and inodes list can be empty in case fini() is + * called soon after init()? Hence commenting the below asserts. + */ + /*for (i = 0; i < table->max_pri; i++) { + GF_ASSERT (list_empty (&table->inode_lru[i])); + } + + GF_ASSERT (list_empty (&table->inodes)); + */ + pthread_mutex_destroy(&table->table_lock); + GF_FREE(table); + + this->private = NULL; + return; } struct xlator_fops fops = { - .open = ioc_open, - .create = ioc_create, - .readv = ioc_readv, - .writev = ioc_writev, - .truncate = ioc_truncate, - .ftruncate = ioc_ftruncate, - .utimens = ioc_utimens, - .lookup = ioc_lookup, - .lk = ioc_lk + .open = ioc_open, + .create = ioc_create, + .readv = ioc_readv, + .writev = ioc_writev, + .truncate = ioc_truncate, + .ftruncate = ioc_ftruncate, + .lookup = ioc_lookup, + .lk = ioc_lk, + .setattr = ioc_setattr, + .mknod = ioc_mknod, + + .readdirp = ioc_readdirp, + .discard = ioc_discard, + .zerofill = ioc_zerofill, }; -struct xlator_mops mops = { +struct xlator_dumpops dumpops = { + .priv = ioc_priv_dump, + .inodectx = ioc_inode_dump, }; struct xlator_cbks cbks = { - .forget = ioc_forget, - .release = ioc_release + .forget = ioc_forget, + .release = ioc_release, + .invalidate = ioc_invalidate, }; struct volume_options options[] = { - { .key = {"priority"}, - .type = GF_OPTION_TYPE_ANY - }, - { .key = {"cache-timeout", "force-revalidate-timeout"}, - .type = GF_OPTION_TYPE_INT, - .min = 0, - .max = 60 - }, - { .key = {"page-size"}, - .type = GF_OPTION_TYPE_SIZET, - .min = 16 * GF_UNIT_KB, - .max = 4 * GF_UNIT_MB - }, - { .key = {"cache-size"}, - .type = GF_OPTION_TYPE_SIZET, - .min = 4 * GF_UNIT_MB, - .max = 6 * GF_UNIT_GB - }, - { .key = {NULL} }, + { + .key = {"io-cache"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable io-cache", + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE, + }, + {.key = {"priority"}, + .type = GF_OPTION_TYPE_PRIORITY_LIST, + .default_value = "", + .description = "Assigns priority to filenames with specific " + "patterns so that when a page needs to be ejected " + "out of the cache, the page of a file whose " + "priority is the lowest will be ejected earlier", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"cache-timeout", "force-revalidate-timeout"}, + .type = GF_OPTION_TYPE_INT, + .min = 0, + .max = 60, + .default_value = "1", + .description = "The cached data for a file will be retained for " + "'cache-refresh-timeout' seconds, after which data " + "re-validation is performed.", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"cache-size"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 4 * GF_UNIT_MB, + .max = INFINITY, + .default_value = "32MB", + .description = "Size of the read cache.", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"min-file-size"}, + .type = GF_OPTION_TYPE_SIZET, + .default_value = "0", + .description = "Minimum file size which would be cached by the " + "io-cache translator.", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"max-file-size"}, + .type = GF_OPTION_TYPE_SIZET, + .default_value = "0", + .description = "Maximum file size which would be cached by the " + "io-cache translator.", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC}, + {.key = {"pass-through"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .op_version = {GD_OP_VERSION_4_1_0}, + .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT, + .tags = {"io-cache"}, + .description = "Enable/Disable io cache translator"}, + {.key = {NULL}}, +}; + +xlator_api_t xlator_api = { + .init = init, + .fini = fini, + .reconfigure = reconfigure, + .mem_acct_init = mem_acct_init, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &dumpops, + .fops = &fops, + .cbks = &cbks, + .options = options, + .identifier = "io-cache", + .category = GF_MAINTAINED, }; diff --git a/xlators/performance/io-cache/src/io-cache.h b/xlators/performance/io-cache/src/io-cache.h index 3cefca16414..14923c75edc 100644 --- a/xlators/performance/io-cache/src/io-cache.h +++ b/xlators/performance/io-cache/src/io-cache.h @@ -1,44 +1,30 @@ /* - Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef __IO_CACHE_H #define __IO_CACHE_H -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - #include <sys/types.h> -#include "compat-errno.h" - -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" -#include "common-utils.h" -#include "call-stub.h" +#include <glusterfs/compat-errno.h> + +#include <glusterfs/glusterfs.h> +#include <glusterfs/dict.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/rbthash.h> #include <sys/time.h> #include <fnmatch.h> +#include "io-cache-messages.h" -#define IOC_PAGE_SIZE (1024 * 128) /* 128KB */ -#define IOC_CACHE_SIZE (32 * 1024 * 1024) +#define IOC_PAGE_SIZE (1024 * 128) /* 128KB */ +#define IOC_CACHE_SIZE (32 * 1024 * 1024) +#define IOC_PAGE_TABLE_BUCKET_COUNT 1 struct ioc_table; struct ioc_local; @@ -46,106 +32,135 @@ struct ioc_page; struct ioc_inode; struct ioc_priority { - struct list_head list; - char *pattern; - uint32_t priority; + struct list_head list; + char *pattern; + uint32_t priority; }; /* - * ioc_waitq - this structure is used to represents the waiting + * ioc_waitq - this structure is used to represents the waiting * frames on a page * * @next: pointer to next object in waitq * @data: pointer to the frame which is waiting */ struct ioc_waitq { - struct ioc_waitq *next; - void *data; - off_t pending_offset; - size_t pending_size; + struct ioc_waitq *next; + void *data; + off_t pending_offset; + size_t pending_size; }; /* - * ioc_fill - + * ioc_fill - * */ struct ioc_fill { - struct list_head list; /* list of ioc_fill structures of a frame */ - off_t offset; - size_t size; - struct iovec *vector; - int32_t count; - dict_t *refs; + struct list_head list; /* list of ioc_fill structures of a frame */ + off_t offset; + size_t size; + struct iovec *vector; + int32_t count; + struct iobref *iobref; }; struct ioc_local { - mode_t mode; - int32_t flags; - loc_t file_loc; - off_t offset; - size_t size; - int32_t op_ret; - int32_t op_errno; - struct list_head fill_list; /* list of ioc_fill structures */ - off_t pending_offset; /* offset from this frame should continue */ - size_t pending_size; /* size of data this frame is waiting on */ - struct ioc_inode *inode; - int32_t wait_count; - pthread_mutex_t local_lock; - struct ioc_waitq *waitq; - void *stub; - fd_t *fd; - int32_t need_xattr; - dict_t *xattr_req; + mode_t mode; + int32_t flags; + loc_t file_loc; + off_t offset; + size_t size; + int32_t op_ret; + int32_t op_errno; + struct list_head fill_list; /* list of ioc_fill structures */ + off_t pending_offset; /* + * offset from this frame should + * continue + */ + size_t pending_size; /* + * size of data this frame is waiting + * on + */ + struct ioc_inode *inode; + int32_t wait_count; + pthread_mutex_t local_lock; + struct ioc_waitq *waitq; + void *stub; + fd_t *fd; + struct iovec *vector; + struct iobref *iobref; + int32_t need_xattr; + dict_t *xattr_req; }; /* - * ioc_page - structure to store page of data from file + * ioc_page - structure to store page of data from file * */ struct ioc_page { - struct list_head pages; - struct list_head page_lru; - struct ioc_inode *inode; /* inode this page belongs to */ - struct ioc_priority *priority; - char dirty; - char ready; - struct iovec *vector; - int32_t count; - off_t offset; - size_t size; - struct ioc_waitq *waitq; - dict_t *ref; - pthread_mutex_t page_lock; + struct list_head page_lru; + struct ioc_inode *inode; /* inode this page belongs to */ + struct ioc_priority *priority; + char dirty; + char ready; + struct iovec *vector; + int32_t count; + off_t offset; + size_t size; + struct ioc_waitq *waitq; + struct iobref *iobref; + pthread_mutex_t page_lock; + int32_t op_errno; + char stale; +}; + +struct ioc_cache { + rbthash_table_t *page_table; + struct list_head page_lru; + time_t mtime; /* + * seconds component of file mtime + */ + time_t mtime_nsec; /* + * nanosecond component of file mtime + */ + time_t last_revalidate; /* timestamp at last re-validate */ }; struct ioc_inode { - struct ioc_table *table; - struct list_head pages; /* list of pages of this inode */ - struct list_head inode_list; /* list of inodes, maintained by io-cache translator */ - struct list_head inode_lru; - struct list_head page_lru; - struct ioc_waitq *waitq; - pthread_mutex_t inode_lock; - uint32_t weight; /* weight of the inode, increases on each read */ - time_t mtime; /* mtime of the server file when last cached */ - struct timeval tv; /* time-stamp at last re-validate */ + struct ioc_table *table; + off_t ia_size; + struct ioc_cache cache; + struct list_head inode_list; /* + * list of inodes, maintained by + * io-cache translator + */ + struct list_head inode_lru; + struct ioc_waitq *waitq; + pthread_mutex_t inode_lock; + uint32_t weight; /* + * weight of the inode, increases + * on each read + */ + inode_t *inode; }; struct ioc_table { - uint64_t page_size; - uint64_t cache_size; - uint64_t cache_used; - struct list_head inodes; /* list of inodes cached */ - struct list_head active; - struct list_head *inode_lru; - struct list_head priority_list; - int32_t readv_count; - pthread_mutex_t table_lock; - xlator_t *xl; - uint32_t inode_count; - int32_t cache_timeout; - int32_t max_pri; + uint64_t page_size; + uint64_t cache_size; + uint64_t cache_used; + uint64_t min_file_size; + uint64_t max_file_size; + struct list_head inodes; /* list of inodes cached */ + struct list_head active; + struct list_head *inode_lru; + struct list_head priority_list; + int32_t readv_count; + pthread_mutex_t table_lock; + xlator_t *xl; + uint32_t inode_count; + int32_t cache_timeout; + int32_t max_pri; + struct mem_pool *mem_pool; }; typedef struct ioc_table ioc_table_t; @@ -156,175 +171,136 @@ typedef struct ioc_waitq ioc_waitq_t; typedef struct ioc_fill ioc_fill_t; void * -str_to_ptr (char *string); +str_to_ptr(char *string); char * -ptr_to_str (void *ptr); - -int32_t -ioc_readv_disabled_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iovec *vector, - int32_t count, - struct stat *stbuf); +ptr_to_str(void *ptr); + +int32_t +ioc_readv_disabled_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iovec *vector, + int32_t count, struct iatt *stbuf, struct iobref *iobref, + dict_t *xdata); ioc_page_t * -ioc_page_get (ioc_inode_t *ioc_inode, - off_t offset); +__ioc_page_get(ioc_inode_t *ioc_inode, off_t offset); ioc_page_t * -ioc_page_create (ioc_inode_t *ioc_inode, - off_t offset); +__ioc_page_create(ioc_inode_t *ioc_inode, off_t offset); void -ioc_page_fault (ioc_inode_t *ioc_inode, - call_frame_t *frame, - fd_t *fd, - off_t offset); +ioc_page_fault(ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd, + off_t offset); void -ioc_wait_on_page (ioc_page_t *page, - call_frame_t *frame, - off_t offset, - size_t size); +__ioc_wait_on_page(ioc_page_t *page, call_frame_t *frame, off_t offset, + size_t size); ioc_waitq_t * -ioc_page_wakeup (ioc_page_t *page); +__ioc_page_wakeup(ioc_page_t *page, int32_t op_errno); void -ioc_page_flush (ioc_page_t *page); +ioc_page_flush(ioc_page_t *page); ioc_waitq_t * -ioc_page_error (ioc_page_t *page, - int32_t op_ret, - int32_t op_errno); -void -ioc_page_purge (ioc_page_t *page); +__ioc_page_error(ioc_page_t *page, int32_t op_ret, int32_t op_errno); void -ioc_frame_return (call_frame_t *frame); +ioc_frame_return(call_frame_t *frame); void -ioc_waitq_return (ioc_waitq_t *waitq); +ioc_waitq_return(ioc_waitq_t *waitq); -void -ioc_frame_fill (ioc_page_t *page, - call_frame_t *frame, - off_t offset, - size_t size); - -#define ioc_inode_lock(ioc_inode) \ - do { \ - gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG, \ - "locked inode(%p)", ioc_inode); \ - pthread_mutex_lock (&ioc_inode->inode_lock); \ - } while (0) - - -#define ioc_inode_unlock(ioc_inode) \ - do { \ - gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG, \ - "unlocked inode(%p)", ioc_inode); \ - pthread_mutex_unlock (&ioc_inode->inode_lock); \ - } while (0) - - -#define ioc_table_lock(table) \ - do { \ - gf_log (table->xl->name, GF_LOG_DEBUG, \ - "locked table(%p)", table); \ - pthread_mutex_lock (&table->table_lock); \ - } while (0) - - -#define ioc_table_unlock(table) \ - do { \ - gf_log (table->xl->name, GF_LOG_DEBUG, \ - "unlocked table(%p)", table); \ - pthread_mutex_unlock (&table->table_lock); \ - } while (0) - - -#define ioc_local_lock(local) \ - do { \ - gf_log (local->inode->table->xl->name, GF_LOG_DEBUG, \ - "locked local(%p)", local); \ - pthread_mutex_lock (&local->local_lock); \ - } while (0) - - -#define ioc_local_unlock(local) \ - do { \ - gf_log (local->inode->table->xl->name, GF_LOG_DEBUG, \ - "unlocked local(%p)", local); \ - pthread_mutex_unlock (&local->local_lock); \ - } while (0) - - -#define ioc_page_lock(page) \ - do { \ - gf_log (page->inode->table->xl->name, GF_LOG_DEBUG, \ - "locked page(%p)", page); \ - pthread_mutex_lock (&page->page_lock); \ - } while (0) - - -#define ioc_page_unlock(page) \ - do { \ - gf_log (page->inode->table->xl->name, GF_LOG_DEBUG, \ - "unlocked page(%p)", page); \ - pthread_mutex_unlock (&page->page_lock); \ - } while (0) - - -static inline uint64_t -time_elapsed (struct timeval *now, - struct timeval *then) -{ - uint64_t sec = now->tv_sec - then->tv_sec; - - if (sec) - return sec; - - return 0; -} +int32_t +ioc_frame_fill(ioc_page_t *page, call_frame_t *frame, off_t offset, size_t size, + int32_t op_errno); + +#define ioc_inode_lock(ioc_inode) \ + do { \ + gf_msg_trace(ioc_inode->table->xl->name, 0, "locked inode(%p)", \ + ioc_inode); \ + pthread_mutex_lock(&ioc_inode->inode_lock); \ + } while (0) + +#define ioc_inode_unlock(ioc_inode) \ + do { \ + gf_msg_trace(ioc_inode->table->xl->name, 0, "unlocked inode(%p)", \ + ioc_inode); \ + pthread_mutex_unlock(&ioc_inode->inode_lock); \ + } while (0) + +#define ioc_table_lock(table) \ + do { \ + gf_msg_trace(table->xl->name, 0, "locked table(%p)", table); \ + pthread_mutex_lock(&table->table_lock); \ + } while (0) + +#define ioc_table_unlock(table) \ + do { \ + gf_msg_trace(table->xl->name, 0, "unlocked table(%p)", table); \ + pthread_mutex_unlock(&table->table_lock); \ + } while (0) + +#define ioc_local_lock(local) \ + do { \ + gf_msg_trace(local->inode->table->xl->name, 0, "locked local(%p)", \ + local); \ + pthread_mutex_lock(&local->local_lock); \ + } while (0) + +#define ioc_local_unlock(local) \ + do { \ + gf_msg_trace(local->inode->table->xl->name, 0, "unlocked local(%p)", \ + local); \ + pthread_mutex_unlock(&local->local_lock); \ + } while (0) + +#define ioc_page_lock(page) \ + do { \ + gf_msg_trace(page->inode->table->xl->name, 0, "locked page(%p)", \ + page); \ + pthread_mutex_lock(&page->page_lock); \ + } while (0) + +#define ioc_page_unlock(page) \ + do { \ + gf_msg_trace(page->inode->table->xl->name, 0, "unlocked page(%p)", \ + page); \ + pthread_mutex_unlock(&page->page_lock); \ + } while (0) ioc_inode_t * -ioc_inode_search (ioc_table_t *table, - inode_t *inode); +ioc_inode_search(ioc_table_t *table, inode_t *inode); -void -ioc_inode_destroy (ioc_inode_t *ioc_inode); +void +ioc_inode_destroy(ioc_inode_t *ioc_inode); + +int32_t +ioc_inode_update(xlator_t *this, inode_t *inode, char *path, + struct iatt *iabuf); ioc_inode_t * -ioc_inode_update (ioc_table_t *table, - inode_t *inode, - uint32_t weight); +ioc_inode_create(ioc_table_t *table, inode_t *inode, uint32_t weight); -int64_t -ioc_page_destroy (ioc_page_t *page); +int64_t +__ioc_page_destroy(ioc_page_t *page); -int32_t -__ioc_inode_flush (ioc_inode_t *ioc_inode); +int64_t +__ioc_inode_flush(ioc_inode_t *ioc_inode); void -ioc_inode_flush (ioc_inode_t *ioc_inode); +ioc_inode_flush(ioc_inode_t *ioc_inode); void -ioc_inode_wakeup (call_frame_t *frame, - ioc_inode_t *ioc_inode, - struct stat *stbuf); +ioc_inode_wakeup(call_frame_t *frame, ioc_inode_t *ioc_inode, + struct iatt *stbuf); int8_t -ioc_cache_still_valid (ioc_inode_t *ioc_inode, - struct stat *stbuf); +ioc_cache_still_valid(ioc_inode_t *ioc_inode, struct iatt *stbuf); int32_t -ioc_prune (ioc_table_t *table); +ioc_prune(ioc_table_t *table); int32_t -ioc_need_prune (ioc_table_t *table); +ioc_need_prune(ioc_table_t *table); -#endif /* __READ_AHEAD_H */ +#endif /* __IO_CACHE_H */ diff --git a/xlators/performance/io-cache/src/ioc-inode.c b/xlators/performance/io-cache/src/ioc-inode.c index 917391de5ab..97767d85285 100644 --- a/xlators/performance/io-cache/src/ioc-inode.c +++ b/xlators/performance/io-cache/src/ioc-inode.c @@ -1,29 +1,17 @@ /* - Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - #include "io-cache.h" +#include "ioc-mem-types.h" +extern int ioc_log2_page_size; /* * str_to_ptr - convert a string to pointer @@ -31,12 +19,17 @@ * */ void * -str_to_ptr (char *string) +str_to_ptr(char *string) { - void *ptr = (void *)strtoul (string, NULL, 16); - return ptr; -} + void *ptr = NULL; + GF_VALIDATE_OR_GOTO("io-cache", string, out); + + ptr = (void *)strtoul(string, NULL, 16); + +out: + return ptr; +} /* * ptr_to_str - convert a pointer to string @@ -44,158 +37,191 @@ str_to_ptr (char *string) * */ char * -ptr_to_str (void *ptr) +ptr_to_str(void *ptr) { - char *str; - asprintf (&str, "%p", ptr); - return str; + int ret = 0; + char *str = NULL; + + GF_VALIDATE_OR_GOTO("io-cache", ptr, out); + + ret = gf_asprintf(&str, "%p", ptr); + if (-1 == ret) { + gf_smsg("io-cache", GF_LOG_WARNING, 0, + IO_CACHE_MSG_STR_COVERSION_FAILED, NULL); + str = NULL; + goto out; + } + +out: + return str; } void -ioc_inode_wakeup (call_frame_t *frame, - ioc_inode_t *ioc_inode, - struct stat *stbuf) +ioc_inode_wakeup(call_frame_t *frame, ioc_inode_t *ioc_inode, + struct iatt *stbuf) { - ioc_waitq_t *waiter = NULL, *waited = NULL; - ioc_waitq_t *page_waitq = NULL; - int8_t cache_still_valid = 1; - ioc_local_t *local = frame->local; - int8_t need_fault = 0; - ioc_page_t *waiter_page = NULL; - - ioc_inode_lock (ioc_inode); - { - waiter = ioc_inode->waitq; - ioc_inode->waitq = NULL; - } - ioc_inode_unlock (ioc_inode); - - if (stbuf) - cache_still_valid = ioc_cache_still_valid (ioc_inode, stbuf); - else - cache_still_valid = 0; - - if (!waiter) { - gf_log (frame->this->name, GF_LOG_DEBUG, - "cache validate called without any " - "page waiting to be validated"); - } - - while (waiter) { - waiter_page = waiter->data; - page_waitq = NULL; - - if (waiter_page) { - if (cache_still_valid) { - /* cache valid, wake up page */ - ioc_inode_lock (ioc_inode); - { - page_waitq = - ioc_page_wakeup (waiter_page); - } - ioc_inode_unlock (ioc_inode); - if (page_waitq) - ioc_waitq_return (page_waitq); - } else { - /* cache invalid, generate page fault and set - * page->ready = 0, to avoid double faults - */ - ioc_inode_lock (ioc_inode); - - if (waiter_page->ready) { - waiter_page->ready = 0; - need_fault = 1; - } else { - gf_log (frame->this->name, - GF_LOG_DEBUG, - "validate frame(%p) is waiting" - "for in-transit page = %p", - frame, waiter_page); - } - - ioc_inode_unlock (ioc_inode); - - if (need_fault) { - need_fault = 0; - ioc_page_fault (ioc_inode, frame, - local->fd, - waiter_page->offset); - } - } - } - - waited = waiter; - waiter = waiter->next; - - waited->data = NULL; - free (waited); - } + ioc_waitq_t *waiter = NULL, *waited = NULL; + ioc_waitq_t *page_waitq = NULL; + int8_t cache_still_valid = 1; + ioc_local_t *local = NULL; + int8_t need_fault = 0; + ioc_page_t *waiter_page = NULL; + + GF_VALIDATE_OR_GOTO("io-cache", frame, out); + + local = frame->local; + GF_VALIDATE_OR_GOTO(frame->this->name, local, out); + + if (ioc_inode == NULL) { + local->op_ret = -1; + local->op_errno = EINVAL; + gf_smsg(frame->this->name, GF_LOG_WARNING, 0, IO_CACHE_MSG_INODE_NULL, + NULL); + goto out; + } + + if (stbuf) + cache_still_valid = ioc_cache_still_valid(ioc_inode, stbuf); + else + cache_still_valid = 0; + + ioc_inode_lock(ioc_inode); + { + waiter = ioc_inode->waitq; + if (!waiter) { + gf_smsg(frame->this->name, GF_LOG_WARNING, 0, + IO_CACHE_MSG_PAGE_WAIT_VALIDATE, NULL); + + ioc_inode_unlock(ioc_inode); + goto out; + } + + while (waiter) { + waiter_page = waiter->data; + ioc_inode->waitq = waiter->next; + page_waitq = NULL; + + if (waiter_page) { + if (cache_still_valid) { + /* cache valid, wake up page */ + page_waitq = __ioc_page_wakeup(waiter_page, + waiter_page->op_errno); + if (page_waitq) { + ioc_inode_unlock(ioc_inode); + ioc_waitq_return(page_waitq); + ioc_inode_lock(ioc_inode); + } + } else { + /* cache invalid, generate page fault and set + * page->ready = 0, to avoid double faults + */ + if (waiter_page->ready) { + waiter_page->ready = 0; + need_fault = 1; + } else { + gf_msg_trace(frame->this->name, 0, + "validate " + "frame(%p) is " + "waiting for " + "in-transit" + " page = %p", + frame, waiter_page); + } + + if (need_fault) { + need_fault = 0; + ioc_inode_unlock(ioc_inode); + ioc_page_fault(ioc_inode, frame, local->fd, + waiter_page->offset); + ioc_inode_lock(ioc_inode); + } + } + } + + waited = waiter; + waiter = ioc_inode->waitq; + + waited->data = NULL; + GF_FREE(waited); + } + } + ioc_inode_unlock(ioc_inode); + +out: + return; } -/* - * ioc_inode_update - create a new ioc_inode_t structure and add it to - * the table table. fill in the fields which are derived +/* + * ioc_inode_create - create a new ioc_inode_t structure and add it to + * the table table. fill in the fields which are derived * from inode_t corresponding to the file - * + * * @table: io-table structure * @inode: inode structure * * not for external reference */ ioc_inode_t * -ioc_inode_update (ioc_table_t *table, - inode_t *inode, - uint32_t weight) +ioc_inode_create(ioc_table_t *table, inode_t *inode, uint32_t weight) { - ioc_inode_t *ioc_inode = CALLOC (1, sizeof (ioc_inode_t)); - ERR_ABORT (ioc_inode); - - ioc_inode->table = table; - - /* initialize the list for pages */ - INIT_LIST_HEAD (&ioc_inode->pages); - INIT_LIST_HEAD (&ioc_inode->page_lru); - - ioc_table_lock (table); - - table->inode_count++; - list_add (&ioc_inode->inode_list, &table->inodes); - list_add_tail (&ioc_inode->inode_lru, &table->inode_lru[weight]); - - gf_log (table->xl->name, - GF_LOG_DEBUG, - "adding to inode_lru[%d]", weight); - - ioc_table_unlock (table); - - pthread_mutex_init (&ioc_inode->inode_lock, NULL); - ioc_inode->weight = weight; - - return ioc_inode; -} + ioc_inode_t *ioc_inode = NULL; + GF_VALIDATE_OR_GOTO("io-cache", table, out); -/* + ioc_inode = GF_CALLOC(1, sizeof(ioc_inode_t), gf_ioc_mt_ioc_inode_t); + if (ioc_inode == NULL) { + goto out; + } + + ioc_inode->inode = inode; + ioc_inode->table = table; + INIT_LIST_HEAD(&ioc_inode->cache.page_lru); + pthread_mutex_init(&ioc_inode->inode_lock, NULL); + ioc_inode->weight = weight; + + ioc_table_lock(table); + { + table->inode_count++; + list_add(&ioc_inode->inode_list, &table->inodes); + list_add_tail(&ioc_inode->inode_lru, &table->inode_lru[weight]); + } + ioc_table_unlock(table); + + gf_msg_trace(table->xl->name, 0, "adding to inode_lru[%d]", weight); + +out: + return ioc_inode; +} + +/* * ioc_inode_destroy - destroy an ioc_inode_t object. * * @inode: inode to destroy * - * to be called only from ioc_forget. + * to be called only from ioc_forget. */ void -ioc_inode_destroy (ioc_inode_t *ioc_inode) +ioc_inode_destroy(ioc_inode_t *ioc_inode) { - ioc_table_t *table = ioc_inode->table; - - ioc_table_lock (table); - table->inode_count--; - list_del (&ioc_inode->inode_list); - list_del (&ioc_inode->inode_lru); - ioc_table_unlock (table); - - ioc_inode_flush (ioc_inode); - - pthread_mutex_destroy (&ioc_inode->inode_lock); - free (ioc_inode); -} + ioc_table_t *table = NULL; + + GF_VALIDATE_OR_GOTO("io-cache", ioc_inode, out); + + table = ioc_inode->table; + ioc_table_lock(table); + { + table->inode_count--; + list_del(&ioc_inode->inode_list); + list_del(&ioc_inode->inode_lru); + } + ioc_table_unlock(table); + + ioc_inode_flush(ioc_inode); + rbthash_table_destroy(ioc_inode->cache.page_table); + + pthread_mutex_destroy(&ioc_inode->inode_lock); + GF_FREE(ioc_inode); +out: + return; +} diff --git a/xlators/performance/io-cache/src/ioc-mem-types.h b/xlators/performance/io-cache/src/ioc-mem-types.h new file mode 100644 index 00000000000..20c9a12021e --- /dev/null +++ b/xlators/performance/io-cache/src/ioc-mem-types.h @@ -0,0 +1,29 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __IOC_MT_H__ +#define __IOC_MT_H__ + +#include <glusterfs/mem-types.h> + +enum gf_ioc_mem_types_ { + gf_ioc_mt_iovec = gf_common_mt_end + 1, + gf_ioc_mt_ioc_table_t, + gf_ioc_mt_char, + gf_ioc_mt_ioc_waitq_t, + gf_ioc_mt_ioc_priority, + gf_ioc_mt_list_head, + gf_ioc_mt_call_pool_t, + gf_ioc_mt_ioc_inode_t, + gf_ioc_mt_ioc_fill_t, + gf_ioc_mt_ioc_newpage_t, + gf_ioc_mt_end +}; +#endif diff --git a/xlators/performance/io-cache/src/page.c b/xlators/performance/io-cache/src/page.c index 1acda2ce697..84b1ae6cb20 100644 --- a/xlators/performance/io-cache/src/page.c +++ b/xlators/performance/io-cache/src/page.c @@ -1,112 +1,190 @@ /* - Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com> - This file is part of GlusterFS. - - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. -*/ + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ -#include "glusterfs.h" -#include "logging.h" -#include "dict.h" -#include "xlator.h" +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> #include "io-cache.h" +#include "ioc-mem-types.h" #include <assert.h> #include <sys/time.h> +#include "io-cache-messages.h" +char +ioc_empty(struct ioc_cache *cache) +{ + char is_empty = -1; + + GF_VALIDATE_OR_GOTO("io-cache", cache, out); + + is_empty = list_empty(&cache->page_lru); + +out: + return is_empty; +} ioc_page_t * -ioc_page_get (ioc_inode_t *ioc_inode, - off_t offset) +__ioc_page_get(ioc_inode_t *ioc_inode, off_t offset) { - int8_t found = 0; - ioc_page_t *page = NULL; - ioc_table_t *table = ioc_inode->table; - off_t rounded_offset = floor (offset, table->page_size); - - if (list_empty (&ioc_inode->pages)) { - return NULL; - } - - list_for_each_entry (page, &ioc_inode->pages, pages) { - if (page->offset == rounded_offset) { - found = 1; - break; - } - } - - /* was previously returning ioc_inode itself.., - * 1st of its type and found one more downstairs :O */ - if (!found){ - page = NULL; - } else { - /* push the page to the end of the lru list */ - list_move_tail (&page->page_lru, &ioc_inode->page_lru); - } - - return page; + ioc_page_t *page = NULL; + ioc_table_t *table = NULL; + off_t rounded_offset = 0; + + GF_VALIDATE_OR_GOTO("io-cache", ioc_inode, out); + + table = ioc_inode->table; + GF_VALIDATE_OR_GOTO("io-cache", ioc_inode, out); + + rounded_offset = gf_floor(offset, table->page_size); + + page = rbthash_get(ioc_inode->cache.page_table, &rounded_offset, + sizeof(rounded_offset)); + + if (page != NULL) { + /* push the page to the end of the lru list */ + list_move_tail(&page->page_lru, &ioc_inode->cache.page_lru); + } + +out: + return page; } +ioc_page_t * +ioc_page_get(ioc_inode_t *ioc_inode, off_t offset) +{ + ioc_page_t *page = NULL; + + if (ioc_inode == NULL) { + goto out; + } + + ioc_inode_lock(ioc_inode); + { + page = __ioc_page_get(ioc_inode, offset); + } + ioc_inode_unlock(ioc_inode); + +out: + return page; +} /* - * ioc_page_destroy - + * __ioc_page_destroy - * * @page: * */ int64_t -ioc_page_destroy (ioc_page_t *page) +__ioc_page_destroy(ioc_page_t *page) +{ + int64_t page_size = 0; + + GF_VALIDATE_OR_GOTO("io-cache", page, out); + + if (page->iobref) + page_size = iobref_size(page->iobref); + + if (page->waitq) { + /* frames waiting on this page, do not destroy this page */ + page_size = -1; + page->stale = 1; + } else { + rbthash_remove(page->inode->cache.page_table, &page->offset, + sizeof(page->offset)); + list_del(&page->page_lru); + + gf_msg_trace(page->inode->table->xl->name, 0, + "destroying page = %p, offset = %" PRId64 + " " + "&& inode = %p", + page, page->offset, page->inode); + + if (page->vector) { + iobref_unref(page->iobref); + GF_FREE(page->vector); + page->vector = NULL; + } + + page->inode = NULL; + } + + if (page_size != -1) { + pthread_mutex_destroy(&page->page_lock); + GF_FREE(page); + } + +out: + return page_size; +} + +int64_t +ioc_page_destroy(ioc_page_t *page) { - int64_t page_size = 0; - - page_size = page->size; - - if (page->waitq) { - /* frames waiting on this page, do not destroy this page */ - page_size = -1; - } else { - - list_del (&page->pages); - list_del (&page->page_lru); - - gf_log (page->inode->table->xl->name, GF_LOG_DEBUG, - "destroying page = %p, offset = %"PRId64" " - "&& inode = %p", - page, page->offset, page->inode); - - if (page->vector){ - dict_unref (page->ref); - free (page->vector); - page->vector = NULL; - } - - page->inode = NULL; - - } - - if (page_size != -1) { - pthread_mutex_destroy (&page->page_lock); - free (page); - } - - return page_size; + int64_t ret = 0; + struct ioc_inode *inode = NULL; + + if (page == NULL) { + goto out; + } + + ioc_inode_lock(page->inode); + { + inode = page->inode; + ret = __ioc_page_destroy(page); + } + ioc_inode_unlock(inode); + +out: + return ret; } +int32_t +__ioc_inode_prune(ioc_inode_t *curr, uint64_t *size_pruned, + uint64_t size_to_prune, uint32_t index) +{ + ioc_page_t *page = NULL, *next = NULL; + int32_t ret = 0; + ioc_table_t *table = NULL; + + if (curr == NULL) { + goto out; + } + + table = curr->table; + + list_for_each_entry_safe(page, next, &curr->cache.page_lru, page_lru) + { + *size_pruned += page->size; + ret = __ioc_page_destroy(page); + + if (ret != -1) + table->cache_used -= ret; + + gf_msg_trace(table->xl->name, 0, + "index = %d && " + "table->cache_used = %" PRIu64 + " && table->" + "cache_size = %" PRIu64, + index, table->cache_used, table->cache_size); + + if ((*size_pruned) >= size_to_prune) + break; + } + + if (ioc_empty(&curr->cache)) { + list_del_init(&curr->inode_lru); + } + +out: + return 0; +} /* * ioc_prune - prune the cache. we have a limit to the number of pages we * can have in-memory. @@ -115,153 +193,157 @@ ioc_page_destroy (ioc_page_t *page) * */ int32_t -ioc_prune (ioc_table_t *table) +ioc_prune(ioc_table_t *table) { - ioc_inode_t *curr = NULL, *next_ioc_inode = NULL; - ioc_page_t *page = NULL, *next = NULL; - int32_t ret = -1; - int32_t index = 0; - uint64_t size_to_prune = 0; - uint64_t size_pruned = 0; - - ioc_table_lock (table); - { - size_to_prune = table->cache_used - table->cache_size; - /* take out the least recently used inode */ - for (index=0; index < table->max_pri; index++) { - list_for_each_entry_safe (curr, next_ioc_inode, - &table->inode_lru[index], - inode_lru) { - /* prune page-by-page for this inode, till - * we reach the equilibrium */ - ioc_inode_lock (curr); - /* { */ - - list_for_each_entry_safe (page, next, - &curr->page_lru, - page_lru) { - /* done with all pages, and not - * reached equilibrium yet?? - * continue with next inode in - * lru_list */ - size_pruned += page->size; - ret = ioc_page_destroy (page); - - if (ret != -1) - table->cache_used -= ret; - - gf_log (table->xl->name, - GF_LOG_DEBUG, - "index = %d && table->cache_" - "used = %"PRIu64" && table->" - "cache_size = %"PRIu64, - index, table->cache_used, - table->cache_size); - - if (size_pruned >= size_to_prune) - break; - } /* list_for_each_entry_safe(page...) */ - if (list_empty (&curr->pages)) { - list_del_init (&curr->inode_lru); - } - - /* } */ - ioc_inode_unlock (curr); - - if (size_pruned >= size_to_prune) - break; - } /* list_for_each_entry_safe (curr...) */ - - if (size_pruned >= size_to_prune) - break; - } /* for(index=0;...) */ - - } /* ioc_inode_table locked region end */ - ioc_table_unlock (table); - - return 0; + ioc_inode_t *curr = NULL, *next_ioc_inode = NULL; + int32_t index = 0; + uint64_t size_to_prune = 0; + uint64_t size_pruned = 0; + + GF_VALIDATE_OR_GOTO("io-cache", table, out); + + ioc_table_lock(table); + { + size_to_prune = table->cache_used - table->cache_size; + /* take out the least recently used inode */ + for (index = 0; index < table->max_pri; index++) { + list_for_each_entry_safe(curr, next_ioc_inode, + &table->inode_lru[index], inode_lru) + { + /* prune page-by-page for this inode, till + * we reach the equilibrium */ + ioc_inode_lock(curr); + { + __ioc_inode_prune(curr, &size_pruned, size_to_prune, index); + } + ioc_inode_unlock(curr); + + if (size_pruned >= size_to_prune) + break; + } /* list_for_each_entry_safe (curr...) */ + + if (size_pruned >= size_to_prune) + break; + } /* for(index=0;...) */ + + } /* ioc_inode_table locked region end */ + ioc_table_unlock(table); + +out: + return 0; } /* - * ioc_page_create - create a new page. + * __ioc_page_create - create a new page. * - * @ioc_inode: + * @ioc_inode: * @offset: * */ ioc_page_t * -ioc_page_create (ioc_inode_t *ioc_inode, - off_t offset) +__ioc_page_create(ioc_inode_t *ioc_inode, off_t offset) { - ioc_table_t *table = ioc_inode->table; - ioc_page_t *page = NULL; - off_t rounded_offset = floor (offset, table->page_size); - ioc_page_t *newpage = CALLOC (1, sizeof (*newpage)); - ERR_ABORT (newpage); - - if (ioc_inode) - table = ioc_inode->table; - else { - return NULL; - } - - newpage->offset = rounded_offset; - newpage->inode = ioc_inode; - pthread_mutex_init (&newpage->page_lock, NULL); - - list_add_tail (&newpage->page_lru, &ioc_inode->page_lru); - list_add_tail (&newpage->pages, &ioc_inode->pages); - - page = newpage; - - gf_log ("io-cache", GF_LOG_DEBUG, - "returning new page %p", page); - return page; + ioc_table_t *table = NULL; + ioc_page_t *page = NULL; + off_t rounded_offset = 0; + ioc_page_t *newpage = NULL; + + GF_VALIDATE_OR_GOTO("io-cache", ioc_inode, out); + + table = ioc_inode->table; + GF_VALIDATE_OR_GOTO("io-cache", table, out); + + rounded_offset = gf_floor(offset, table->page_size); + + newpage = GF_CALLOC(1, sizeof(*newpage), gf_ioc_mt_ioc_newpage_t); + if (newpage == NULL) { + goto out; + } + + if (!ioc_inode) { + GF_FREE(newpage); + newpage = NULL; + goto out; + } + + newpage->offset = rounded_offset; + newpage->inode = ioc_inode; + pthread_mutex_init(&newpage->page_lock, NULL); + + rbthash_insert(ioc_inode->cache.page_table, newpage, &rounded_offset, + sizeof(rounded_offset)); + + list_add_tail(&newpage->page_lru, &ioc_inode->cache.page_lru); + + page = newpage; + + gf_msg_trace("io-cache", 0, "returning new page %p", page); + +out: + return page; } -/* - * ioc_wait_on_page - pause a frame to wait till the arrival of a page. - * here we need to handle the case when the frame who calls wait_on_page - * himself has caused page_fault +/* + * ioc_wait_on_page - pause a frame to wait till the arrival of a page. + * here we need to handle the case when the frame who calls wait_on_page + * himself has caused page_fault * * @page: page to wait on * @frame: call frame who is waiting on page * */ void -ioc_wait_on_page (ioc_page_t *page, - call_frame_t *frame, - off_t offset, - size_t size) +__ioc_wait_on_page(ioc_page_t *page, call_frame_t *frame, off_t offset, + size_t size) { - ioc_waitq_t *waitq = NULL; - ioc_local_t *local = frame->local; - - waitq = CALLOC (1, sizeof (*waitq)); - ERR_ABORT (waitq); - - gf_log (frame->this->name, GF_LOG_DEBUG, - "frame(%p) waiting on page = %p, offset=%"PRId64", " - "size=%"GF_PRI_SIZET"", - frame, page, offset, size); - - waitq->data = frame; - waitq->next = page->waitq; - waitq->pending_offset = offset; - waitq->pending_size = size; - page->waitq = waitq; - /* one frame can wait only once on a given page, - * local->wait_count is number of pages a frame is waiting on */ - ioc_local_lock (local); - { - local->wait_count++; - } - ioc_local_unlock (local); + ioc_waitq_t *waitq = NULL; + ioc_local_t *local = NULL; + + GF_VALIDATE_OR_GOTO("io-cache", frame, out); + local = frame->local; + + GF_VALIDATE_OR_GOTO(frame->this->name, local, out); + + if (page == NULL) { + local->op_ret = -1; + local->op_errno = ENOMEM; + gf_smsg(frame->this->name, GF_LOG_WARNING, 0, + IO_CACHE_MSG_NULL_PAGE_WAIT, NULL); + goto out; + } + + waitq = GF_CALLOC(1, sizeof(*waitq), gf_ioc_mt_ioc_waitq_t); + if (waitq == NULL) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto out; + } + + gf_msg_trace(frame->this->name, 0, + "frame(%p) waiting on page = %p, offset=%" PRId64 + ", " + "size=%" GF_PRI_SIZET "", + frame, page, offset, size); + + waitq->data = frame; + waitq->next = page->waitq; + waitq->pending_offset = offset; + waitq->pending_size = size; + page->waitq = waitq; + /* one frame can wait only once on a given page, + * local->wait_count is number of pages a frame is waiting on */ + ioc_local_lock(local); + { + local->wait_count++; + } + ioc_local_unlock(local); + +out: + return; } - /* - * ioc_cache_still_valid - see if cached pages ioc_inode are still valid + * ioc_cache_still_valid - see if cached pages ioc_inode are still valid * against given stbuf * * @ioc_inode: @@ -270,184 +352,203 @@ ioc_wait_on_page (ioc_page_t *page, * assumes ioc_inode is locked */ int8_t -ioc_cache_still_valid (ioc_inode_t *ioc_inode, - struct stat *stbuf) +ioc_cache_still_valid(ioc_inode_t *ioc_inode, struct iatt *stbuf) { - int8_t cache_still_valid = 1; - + int8_t cache_still_valid = 1; + + GF_VALIDATE_OR_GOTO("io-cache", ioc_inode, out); + #if 0 - if (!stbuf || (stbuf->st_mtime != ioc_inode->mtime) || - (stbuf->st_mtim.tv_nsec != ioc_inode->stbuf.st_mtim.tv_nsec)) - cache_still_valid = 0; + if (!stbuf || (stbuf->ia_mtime != ioc_inode->cache.mtime) || + (stbuf->st_mtim.tv_nsec != ioc_inode->stbuf.st_mtim.tv_nsec)) + cache_still_valid = 0; #else - if (!stbuf || (stbuf->st_mtime != ioc_inode->mtime)) - cache_still_valid = 0; + if (!stbuf || (stbuf->ia_mtime != ioc_inode->cache.mtime) || + (stbuf->ia_mtime_nsec != ioc_inode->cache.mtime_nsec)) + cache_still_valid = 0; #endif #if 0 - /* talk with avati@zresearch.com to enable this section */ - if (!ioc_inode->mtime && stbuf) { - cache_still_valid = 1; - ioc_inode->mtime = stbuf->st_mtime; - } + /* talk with avati@gluster.com to enable this section */ + if (!ioc_inode->mtime && stbuf) { + cache_still_valid = 1; + ioc_inode->mtime = stbuf->ia_mtime; + } #endif - return cache_still_valid; +out: + return cache_still_valid; } - void -ioc_waitq_return (ioc_waitq_t *waitq) +ioc_waitq_return(ioc_waitq_t *waitq) { - ioc_waitq_t *trav = NULL; - ioc_waitq_t *next = NULL; - call_frame_t *frame = NULL; + ioc_waitq_t *trav = NULL; + ioc_waitq_t *next = NULL; + call_frame_t *frame = NULL; - for (trav = waitq; trav; trav = next) { - next = trav->next; + for (trav = waitq; trav; trav = next) { + next = trav->next; - frame = trav->data; - ioc_frame_return (frame); - free (trav); - } + frame = trav->data; + ioc_frame_return(frame); + GF_FREE(trav); + } } - int -ioc_fault_cbk (call_frame_t *frame, - void *cookie, - xlator_t *this, - int32_t op_ret, - int32_t op_errno, - struct iovec *vector, - int32_t count, - struct stat *stbuf) +ioc_fault_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iovec *vector, int32_t count, + struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) { - ioc_local_t *local = frame->local; - off_t offset = local->pending_offset; - ioc_inode_t *ioc_inode = local->inode; - ioc_table_t *table = ioc_inode->table; - ioc_page_t *page = NULL; - off_t trav_offset = 0; - size_t payload_size = 0; - int32_t destroy_size = 0; - size_t page_size = 0; - ioc_waitq_t *waitq = NULL; - - trav_offset = offset; - payload_size = op_ret; - - ioc_inode_lock (ioc_inode); - { - if (op_ret == -1 || - (op_ret >= 0 && - !ioc_cache_still_valid(ioc_inode, stbuf))) { - gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG, - "cache for inode(%p) is invalid. flushing " - "all pages", ioc_inode); - destroy_size = __ioc_inode_flush (ioc_inode); - } - - if (op_ret >= 0) - ioc_inode->mtime = stbuf->st_mtime; - - gettimeofday (&ioc_inode->tv, NULL); - - if (op_ret < 0) { - /* error, readv returned -1 */ - page = ioc_page_get (ioc_inode, offset); - if (page) - waitq = ioc_page_error (page, op_ret, - op_errno); - } else { - gf_log (ioc_inode->table->xl->name, GF_LOG_DEBUG, - "op_ret = %d", op_ret); - page = ioc_page_get (ioc_inode, offset); - if (!page) { - /* page was flushed */ - /* some serious bug ? */ - gf_log (this->name, GF_LOG_DEBUG, - "wasted copy: %"PRId64"[+%"PRId64"] " - "ioc_inode=%p", offset, - table->page_size, ioc_inode); - } else { - if (page->vector) { - dict_unref (page->ref); - free (page->vector); - page->vector = NULL; - } - - /* keep a copy of the page for our cache */ - page->vector = iov_dup (vector, count); - page->count = count; - if (frame->root->rsp_refs) { - dict_ref (frame->root->rsp_refs); - page->ref = frame->root->rsp_refs; - } else { - /* TODO: we have got a response to - * our request and no data */ - gf_log (this->name, GF_LOG_CRITICAL, - "frame>root>rsp_refs is null"); - } /* if(frame->root->rsp_refs) */ - - /* page->size should indicate exactly how - * much the readv call to the child - * translator returned. earlier op_ret - * from child translator was used, which - * gave rise to a bug where reads from - * io-cached volume were resulting in 0 - * byte replies */ - page_size = iov_length(vector, count); - - page->size = page_size; - - if (page->waitq) { - /* wake up all the frames waiting on - * this page, including - * the frame which triggered fault */ - waitq = ioc_page_wakeup (page); - } /* if(page->waitq) */ - } /* if(!page)...else */ - } /* if(op_ret < 0)...else */ - } /* ioc_inode locked region end */ - ioc_inode_unlock (ioc_inode); - - ioc_waitq_return (waitq); - - if (page_size) { - ioc_table_lock (table); - { - table->cache_used += page_size; - } - ioc_table_unlock (table); - } - - if (destroy_size) { - ioc_table_lock (table); - { - table->cache_used -= destroy_size; - } - ioc_table_unlock (table); - } - - if (ioc_need_prune (ioc_inode->table)) { - ioc_prune (ioc_inode->table); - } - - gf_log (this->name, GF_LOG_DEBUG, "fault frame %p returned", frame); - pthread_mutex_destroy (&local->local_lock); - - fd_unref (local->fd); - - STACK_DESTROY (frame->root); - return 0; + ioc_local_t *local = NULL; + off_t offset = 0; + ioc_inode_t *ioc_inode = NULL; + ioc_table_t *table = NULL; + ioc_page_t *page = NULL; + int32_t destroy_size = 0; + size_t page_size = 0; + ioc_waitq_t *waitq = NULL; + size_t iobref_page_size = 0; + char zero_filled = 0; + + GF_ASSERT(frame); + + local = frame->local; + GF_ASSERT(local); + + offset = local->pending_offset; + ioc_inode = local->inode; + GF_ASSERT(ioc_inode); + + table = ioc_inode->table; + GF_ASSERT(table); + + zero_filled = ((op_ret >= 0) && (stbuf->ia_mtime == 0)); + + ioc_inode_lock(ioc_inode); + { + if (op_ret == -1 || + !(zero_filled || ioc_cache_still_valid(ioc_inode, stbuf))) { + gf_msg_trace(ioc_inode->table->xl->name, 0, + "cache for inode(%p) is invalid. flushing " + "all pages", + ioc_inode); + destroy_size = __ioc_inode_flush(ioc_inode); + } + + if ((op_ret >= 0) && !zero_filled) { + ioc_inode->cache.mtime = stbuf->ia_mtime; + ioc_inode->cache.mtime_nsec = stbuf->ia_mtime_nsec; + } + + ioc_inode->cache.last_revalidate = gf_time(); + + if (op_ret < 0) { + /* error, readv returned -1 */ + page = __ioc_page_get(ioc_inode, offset); + if (page) + waitq = __ioc_page_error(page, op_ret, op_errno); + } else { + gf_msg_trace(ioc_inode->table->xl->name, 0, "op_ret = %d", op_ret); + page = __ioc_page_get(ioc_inode, offset); + if (!page) { + /* page was flushed */ + /* some serious bug ? */ + gf_smsg(frame->this->name, GF_LOG_WARNING, 0, + IO_CACHE_MSG_WASTED_COPY, "offset=%" PRId64, offset, + "page-size=%" PRId64, table->page_size, "ioc_inode=%p", + ioc_inode, NULL); + } else { + if (page->vector) { + iobref_unref(page->iobref); + GF_FREE(page->vector); + page->vector = NULL; + page->iobref = NULL; + } + + /* keep a copy of the page for our cache */ + page->vector = iov_dup(vector, count); + if (page->vector == NULL) { + page = __ioc_page_get(ioc_inode, offset); + if (page != NULL) + waitq = __ioc_page_error(page, -1, ENOMEM); + goto unlock; + } + + page->count = count; + if (iobref) { + page->iobref = iobref_ref(iobref); + } else { + /* TODO: we have got a response to + * our request and no data */ + gf_smsg(frame->this->name, GF_LOG_CRITICAL, ENOMEM, + IO_CACHE_MSG_FRAME_NULL, NULL); + } /* if(frame->root->rsp_refs) */ + + /* page->size should indicate exactly how + * much the readv call to the child + * translator returned. earlier op_ret + * from child translator was used, which + * gave rise to a bug where reads from + * io-cached volume were resulting in 0 + * byte replies */ + page_size = iov_length(vector, count); + page->size = page_size; + page->op_errno = op_errno; + + iobref_page_size = iobref_size(page->iobref); + + if (page->waitq) { + /* wake up all the frames waiting on + * this page, including + * the frame which triggered fault */ + waitq = __ioc_page_wakeup(page, op_errno); + } /* if(page->waitq) */ + } /* if(!page)...else */ + } /* if(op_ret < 0)...else */ + } /* ioc_inode locked region end */ +unlock: + ioc_inode_unlock(ioc_inode); + + ioc_waitq_return(waitq); + + if (iobref_page_size) { + ioc_table_lock(table); + { + table->cache_used += iobref_page_size; + } + ioc_table_unlock(table); + } + + if (destroy_size) { + ioc_table_lock(table); + { + table->cache_used -= destroy_size; + } + ioc_table_unlock(table); + } + + if (ioc_need_prune(ioc_inode->table)) { + ioc_prune(ioc_inode->table); + } + + gf_msg_trace(frame->this->name, 0, "fault frame %p returned", frame); + pthread_mutex_destroy(&local->local_lock); + + fd_unref(local->fd); + if (local->xattr_req) + dict_unref(local->xattr_req); + + STACK_DESTROY(frame->root); + return 0; } /* * ioc_page_fault - - * + * * @ioc_inode: * @frame: * @fd: @@ -455,145 +556,216 @@ ioc_fault_cbk (call_frame_t *frame, * */ void -ioc_page_fault (ioc_inode_t *ioc_inode, - call_frame_t *frame, - fd_t *fd, - off_t offset) +ioc_page_fault(ioc_inode_t *ioc_inode, call_frame_t *frame, fd_t *fd, + off_t offset) { - ioc_table_t *table = ioc_inode->table; - call_frame_t *fault_frame = copy_frame (frame); - ioc_local_t *fault_local = CALLOC (1, sizeof (ioc_local_t)); - ERR_ABORT (fault_local); - - /* NOTE: copy_frame() means, the frame the fop whose fd_ref we - * are using till now won't be valid till we get reply from server. - * we unref this fd, in fault_cbk */ - fault_local->fd = fd_ref (fd); - - fault_frame->local = fault_local; - pthread_mutex_init (&fault_local->local_lock, NULL); - - INIT_LIST_HEAD (&fault_local->fill_list); - fault_local->pending_offset = offset; - fault_local->pending_size = table->page_size; - fault_local->inode = ioc_inode; - - gf_log (frame->this->name, GF_LOG_DEBUG, - "stack winding page fault for offset = %"PRId64" with " - "frame %p", offset, fault_frame); - - STACK_WIND (fault_frame, ioc_fault_cbk, - FIRST_CHILD(fault_frame->this), - FIRST_CHILD(fault_frame->this)->fops->readv, - fd, table->page_size, offset); - return; + ioc_table_t *table = NULL; + call_frame_t *fault_frame = NULL; + ioc_local_t *fault_local = NULL; + ioc_local_t *local = NULL; + int32_t op_ret = -1, op_errno = -1; + ioc_waitq_t *waitq = NULL; + ioc_page_t *page = NULL; + + GF_ASSERT(ioc_inode); + if (frame == NULL) { + op_ret = -1; + op_errno = EINVAL; + gf_smsg("io-cache", GF_LOG_WARNING, EINVAL, IO_CACHE_MSG_PAGE_FAULT, + NULL); + goto err; + } + + table = ioc_inode->table; + fault_frame = copy_frame(frame); + if (fault_frame == NULL) { + op_ret = -1; + op_errno = ENOMEM; + goto err; + } + + local = frame->local; + fault_local = mem_get0(THIS->local_pool); + if (fault_local == NULL) { + op_ret = -1; + op_errno = ENOMEM; + STACK_DESTROY(fault_frame->root); + goto err; + } + + /* NOTE: copy_frame() means, the frame the fop whose fd_ref we + * are using till now won't be valid till we get reply from server. + * we unref this fd, in fault_cbk */ + fault_local->fd = fd_ref(fd); + + fault_frame->local = fault_local; + pthread_mutex_init(&fault_local->local_lock, NULL); + + INIT_LIST_HEAD(&fault_local->fill_list); + fault_local->pending_offset = offset; + fault_local->pending_size = table->page_size; + fault_local->inode = ioc_inode; + + if (local && local->xattr_req) + fault_local->xattr_req = dict_ref(local->xattr_req); + + gf_msg_trace(frame->this->name, 0, + "stack winding page fault for offset = %" PRId64 + " with " + "frame %p", + offset, fault_frame); + + STACK_WIND(fault_frame, ioc_fault_cbk, FIRST_CHILD(fault_frame->this), + FIRST_CHILD(fault_frame->this)->fops->readv, fd, + table->page_size, offset, 0, fault_local->xattr_req); + return; + +err: + ioc_inode_lock(ioc_inode); + { + page = __ioc_page_get(ioc_inode, offset); + if (page != NULL) { + waitq = __ioc_page_error(page, op_ret, op_errno); + } + } + ioc_inode_unlock(ioc_inode); + + if (waitq != NULL) { + ioc_waitq_return(waitq); + } } -void -ioc_frame_fill (ioc_page_t *page, - call_frame_t *frame, - off_t offset, - size_t size) +int32_t +__ioc_frame_fill(ioc_page_t *page, call_frame_t *frame, off_t offset, + size_t size, int32_t op_errno) { - ioc_local_t *local = frame->local; - ioc_fill_t *fill = NULL; - off_t src_offset = 0; - off_t dst_offset = 0; - ssize_t copy_size = 0; - ioc_inode_t *ioc_inode = page->inode; - - gf_log (frame->this->name, GF_LOG_DEBUG, - "frame (%p) offset = %"PRId64" && size = %"GF_PRI_SIZET" " - "&& page->size = %"GF_PRI_SIZET" && wait_count = %d", - frame, offset, size, page->size, local->wait_count); - - /* immediately move this page to the end of the page_lru list */ - list_move_tail (&page->page_lru, &ioc_inode->page_lru); - /* fill local->pending_size bytes from local->pending_offset */ - if (local->op_ret != -1 && page->size) { - if (offset > page->offset) - /* offset is offset in file, convert it to offset in - * page */ - src_offset = offset - page->offset; - /*FIXME: since offset is the offset within page is the - * else case valid? */ - else - /* local->pending_offset is in previous page. do not - * fill until we have filled all previous pages */ - dst_offset = page->offset - offset; - - /* we have to copy from offset to either end of this page - * or till the requested size */ - copy_size = min (page->size - src_offset, - size - dst_offset); - - if (copy_size < 0) { - /* if page contains fewer bytes and the required offset - is beyond the page size in the page */ - copy_size = src_offset = 0; - } - - gf_log (page->inode->table->xl->name, GF_LOG_DEBUG, - "copy_size = %"GF_PRI_SIZET" && src_offset = " - "%"PRId64" && dst_offset = %"PRId64"", - copy_size, src_offset, dst_offset); - - { - ioc_fill_t *new = CALLOC (1, sizeof (*new)); - ERR_ABORT (new); - new->offset = page->offset; - new->size = copy_size; - new->refs = dict_ref (page->ref); - new->count = iov_subset (page->vector, - page->count, - src_offset, - src_offset + copy_size, - NULL); - new->vector = CALLOC (new->count, - sizeof (struct iovec)); - ERR_ABORT (new->vector); - new->count = iov_subset (page->vector, - page->count, - src_offset, - src_offset + copy_size, - new->vector); - - - - /* add the ioc_fill to fill_list for this frame */ - if (list_empty (&local->fill_list)) { - /* if list is empty, then this is the first - * time we are filling frame, add the - * ioc_fill_t to the end of list */ - list_add_tail (&new->list, &local->fill_list); - } else { - int8_t found = 0; - /* list is not empty, we need to look for - * where this offset fits in list */ - list_for_each_entry (fill, &local->fill_list, - list) { - if (fill->offset > new->offset) { - found = 1; - break; - } - } - - if (found) { - found = 0; - list_add_tail (&new->list, - &fill->list); - } else { - list_add_tail (&new->list, - &local->fill_list); - } - } - } - local->op_ret += copy_size; - } + ioc_local_t *local = NULL; + ioc_fill_t *fill = NULL; + off_t src_offset = 0; + off_t dst_offset = 0; + ssize_t copy_size = 0; + ioc_inode_t *ioc_inode = NULL; + ioc_fill_t *new = NULL; + int8_t found = 0; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("io-cache", frame, out); + + local = frame->local; + GF_VALIDATE_OR_GOTO(frame->this->name, local, out); + + if (page == NULL) { + gf_smsg(frame->this->name, GF_LOG_WARNING, 0, + IO_CACHE_MSG_SERVE_READ_REQUEST, NULL); + local->op_ret = -1; + local->op_errno = EINVAL; + goto out; + } + + ioc_inode = page->inode; + + gf_msg_trace(frame->this->name, 0, + "frame (%p) offset = %" PRId64 " && size = %" GF_PRI_SIZET + " " + "&& page->size = %" GF_PRI_SIZET " && wait_count = %d", + frame, offset, size, page->size, local->wait_count); + + /* immediately move this page to the end of the page_lru list */ + list_move_tail(&page->page_lru, &ioc_inode->cache.page_lru); + /* fill local->pending_size bytes from local->pending_offset */ + if (local->op_ret != -1) { + local->op_errno = op_errno; + + if (page->size == 0) { + goto done; + } + + if (offset > page->offset) + /* offset is offset in file, convert it to offset in + * page */ + src_offset = offset - page->offset; + /*FIXME: since offset is the offset within page is the + * else case valid? */ + else + /* local->pending_offset is in previous page. do not + * fill until we have filled all previous pages */ + dst_offset = page->offset - offset; + + /* we have to copy from offset to either end of this page + * or till the requested size */ + copy_size = min(page->size - src_offset, size - dst_offset); + + if (copy_size < 0) { + /* if page contains fewer bytes and the required offset + is beyond the page size in the page */ + copy_size = src_offset = 0; + } + + gf_msg_trace(page->inode->table->xl->name, 0, + "copy_size = %" GF_PRI_SIZET + " && src_offset = " + "%" PRId64 " && dst_offset = %" PRId64 "", + copy_size, src_offset, dst_offset); + + { + new = GF_CALLOC(1, sizeof(*new), gf_ioc_mt_ioc_fill_t); + if (new == NULL) { + local->op_ret = -1; + local->op_errno = ENOMEM; + goto out; + } + + new->offset = page->offset; + new->size = copy_size; + new->iobref = iobref_ref(page->iobref); + new->count = iov_subset(page->vector, page->count, src_offset, + copy_size, &new->vector, 0); + if (new->count < 0) { + local->op_ret = -1; + local->op_errno = ENOMEM; + + iobref_unref(new->iobref); + GF_FREE(new); + goto out; + } + + /* add the ioc_fill to fill_list for this frame */ + if (list_empty(&local->fill_list)) { + /* if list is empty, then this is the first + * time we are filling frame, add the + * ioc_fill_t to the end of list */ + list_add_tail(&new->list, &local->fill_list); + } else { + found = 0; + /* list is not empty, we need to look for + * where this offset fits in list */ + list_for_each_entry(fill, &local->fill_list, list) + { + if (fill->offset > new->offset) { + found = 1; + break; + } + } + + if (found) { + list_add_tail(&new->list, &fill->list); + } else { + list_add_tail(&new->list, &local->fill_list); + } + } + } + + local->op_ret += copy_size; + } + +done: + ret = 0; +out: + return ret; } /* - * ioc_frame_unwind - frame unwinds only from here + * ioc_frame_unwind - frame unwinds only from here * * @frame: call frame to unwind * @@ -602,73 +774,109 @@ ioc_frame_fill (ioc_page_t *page, * */ static void -ioc_frame_unwind (call_frame_t *frame) +ioc_frame_unwind(call_frame_t *frame) { - ioc_local_t *local = frame->local; - ioc_fill_t *fill = NULL, *next = NULL; - int32_t count = 0; - struct iovec *vector = NULL; - int32_t copied = 0; - dict_t *refs = NULL; - struct stat stbuf = {0,}; - int32_t op_ret = 0; - - // ioc_local_lock (local); - refs = get_new_dict (); - - frame->local = NULL; - - if (list_empty (&local->fill_list)) { - gf_log (frame->this->name, GF_LOG_DEBUG, - "frame(%p) has 0 entries in local->fill_list " - "(offset = %"PRId64" && size = %"GF_PRI_SIZET")", - frame, local->offset, local->size); - } - - list_for_each_entry (fill, &local->fill_list, list) { - count += fill->count; - } - - vector = CALLOC (count, sizeof (*vector)); - ERR_ABORT (vector); - - list_for_each_entry_safe (fill, next, &local->fill_list, list) { - memcpy (((char *)vector) + copied, - fill->vector, - fill->count * sizeof (*vector)); - - copied += (fill->count * sizeof (*vector)); - - dict_copy (fill->refs, refs); - - list_del (&fill->list); - dict_unref (fill->refs); - free (fill->vector); - free (fill); - } - - frame->root->rsp_refs = dict_ref (refs); - - op_ret = iov_length (vector, count); - gf_log (frame->this->name, GF_LOG_DEBUG, - "frame(%p) unwinding with op_ret=%d", frame, op_ret); - - // ioc_local_unlock (local); - - STACK_UNWIND (frame, - op_ret, - local->op_errno, - vector, - count, - &stbuf); - - dict_unref (refs); - - pthread_mutex_destroy (&local->local_lock); - free (local); - free (vector); - - return; + ioc_local_t *local = NULL; + ioc_fill_t *fill = NULL, *next = NULL; + int32_t count = 0; + struct iovec *vector = NULL; + int32_t copied = 0; + struct iobref *iobref = NULL; + struct iatt stbuf = { + 0, + }; + int32_t op_ret = 0, op_errno = 0; + + GF_ASSERT(frame); + + local = frame->local; + if (local == NULL) { + gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM, + IO_CACHE_MSG_LOCAL_NULL, NULL); + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + if (local->op_ret < 0) { + op_ret = local->op_ret; + op_errno = local->op_errno; + goto unwind; + } + + // ioc_local_lock (local); + iobref = iobref_new(); + if (iobref == NULL) { + op_ret = -1; + op_errno = ENOMEM; + } + + if (list_empty(&local->fill_list)) { + gf_msg_trace(frame->this->name, 0, + "frame(%p) has 0 entries in local->fill_list " + "(offset = %" PRId64 " && size = %" GF_PRI_SIZET ")", + frame, local->offset, local->size); + } + + list_for_each_entry(fill, &local->fill_list, list) { count += fill->count; } + + vector = GF_CALLOC(count, sizeof(*vector), gf_ioc_mt_iovec); + if (vector == NULL) { + op_ret = -1; + op_errno = ENOMEM; + } + + list_for_each_entry_safe(fill, next, &local->fill_list, list) + { + /* # TODO: check why this if clause is needed at all. */ + if ((vector != NULL) && (iobref != NULL)) { + memcpy(((char *)vector) + copied, fill->vector, + fill->count * sizeof(*vector)); + + copied += (fill->count * sizeof(*vector)); + + if (iobref_merge(iobref, fill->iobref)) { + op_ret = -1; + op_errno = ENOMEM; + } + } + + list_del(&fill->list); + iobref_unref(fill->iobref); + GF_FREE(fill->vector); + GF_FREE(fill); + } + + if (op_ret != -1) { + op_ret = iov_length(vector, count); + } + +unwind: + gf_msg_trace(frame->this->name, 0, "frame(%p) unwinding with op_ret=%d", + frame, op_ret); + + // ioc_local_unlock (local); + + frame->local = NULL; + STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, &stbuf, + iobref, NULL); + + if (iobref != NULL) { + iobref_unref(iobref); + } + + if (vector != NULL) { + GF_FREE(vector); + vector = NULL; + } + + if (local) { + if (local->xattr_req) + dict_unref(local->xattr_req); + pthread_mutex_destroy(&local->local_lock); + mem_put(local); + } + return; } /* @@ -678,55 +886,117 @@ ioc_frame_unwind (call_frame_t *frame) * to be called only when a frame is waiting on an in-transit page */ void -ioc_frame_return (call_frame_t *frame) +ioc_frame_return(call_frame_t *frame) { - ioc_local_t *local = frame->local; - int32_t wait_count; - assert (local->wait_count > 0); + ioc_local_t *local = NULL; + int32_t wait_count = 0; + + GF_ASSERT(frame); - ioc_local_lock (local); - { - wait_count = --local->wait_count; - } - ioc_local_unlock (local); + local = frame->local; + GF_ASSERT(local->wait_count > 0); - if (!wait_count) { - ioc_frame_unwind (frame); - } + ioc_local_lock(local); + { + wait_count = --local->wait_count; + } + ioc_local_unlock(local); - return; + if (!wait_count) { + ioc_frame_unwind(frame); + } + + return; } -/* +/* * ioc_page_wakeup - * @page: * * to be called only when a frame is waiting on an in-transit page */ ioc_waitq_t * -ioc_page_wakeup (ioc_page_t *page) +__ioc_page_wakeup(ioc_page_t *page, int32_t op_errno) { - ioc_waitq_t *waitq = NULL, *trav = NULL; - call_frame_t *frame = NULL; - - waitq = page->waitq; - page->waitq = NULL; - - trav = waitq; - page->ready = 1; - - gf_log (page->inode->table->xl->name, GF_LOG_DEBUG, - "page is %p && waitq = %p", page, waitq); - - for (trav = waitq; trav; trav = trav->next) { - frame = trav->data; - ioc_frame_fill (page, frame, trav->pending_offset, - trav->pending_size); - } - - return waitq; + ioc_waitq_t *waitq = NULL, *trav = NULL; + call_frame_t *frame = NULL; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("io-cache", page, out); + + waitq = page->waitq; + page->waitq = NULL; + + page->ready = 1; + + gf_msg_trace(page->inode->table->xl->name, 0, "page is %p && waitq = %p", + page, waitq); + + for (trav = waitq; trav; trav = trav->next) { + frame = trav->data; + ret = __ioc_frame_fill(page, frame, trav->pending_offset, + trav->pending_size, op_errno); + if (ret == -1) { + break; + } + } + + if (page->stale) { + __ioc_page_destroy(page); + } + +out: + return waitq; } +/* + * ioc_page_error - + * @page: + * @op_ret: + * @op_errno: + * + */ +ioc_waitq_t * +__ioc_page_error(ioc_page_t *page, int32_t op_ret, int32_t op_errno) +{ + ioc_waitq_t *waitq = NULL, *trav = NULL; + call_frame_t *frame = NULL; + int64_t ret = 0; + ioc_table_t *table = NULL; + ioc_local_t *local = NULL; + + GF_VALIDATE_OR_GOTO("io-cache", page, out); + + waitq = page->waitq; + page->waitq = NULL; + + gf_msg_debug(page->inode->table->xl->name, 0, + "page error for page = %p & waitq = %p", page, waitq); + + for (trav = waitq; trav; trav = trav->next) { + frame = trav->data; + + local = frame->local; + ioc_local_lock(local); + { + if (local->op_ret != -1) { + local->op_ret = op_ret; + local->op_errno = op_errno; + } + } + ioc_local_unlock(local); + } + + table = page->inode->table; + ret = __ioc_page_destroy(page); + + if (ret != -1) { + table->cache_used -= ret; + } + +out: + return waitq; +} /* * ioc_page_error - @@ -736,43 +1006,22 @@ ioc_page_wakeup (ioc_page_t *page) * */ ioc_waitq_t * -ioc_page_error (ioc_page_t *page, - int32_t op_ret, - int32_t op_errno) +ioc_page_error(ioc_page_t *page, int32_t op_ret, int32_t op_errno) { - ioc_waitq_t *waitq = NULL, *trav = NULL; - call_frame_t *frame = NULL; - int64_t ret = 0; - ioc_table_t *table = NULL; - ioc_local_t *local = NULL; - - waitq = page->waitq; - page->waitq = NULL; - - gf_log (page->inode->table->xl->name, GF_LOG_DEBUG, - "page error for page = %p & waitq = %p", page, waitq); - - for (trav = waitq; trav; trav = trav->next) { - - frame = trav->data; - - local = frame->local; - ioc_local_lock (local); - { - if (local->op_ret != -1) { - local->op_ret = op_ret; - local->op_errno = op_errno; - } - } - ioc_local_unlock (local); - } - - table = page->inode->table; - ret = ioc_page_destroy (page); - - if (ret != -1) { - table->cache_used -= ret; - } - - return waitq; + ioc_waitq_t *waitq = NULL; + struct ioc_inode *inode = NULL; + + if (page == NULL) { + goto out; + } + + ioc_inode_lock(page->inode); + { + inode = page->inode; + waitq = __ioc_page_error(page, op_ret, op_errno); + } + ioc_inode_unlock(inode); + +out: + return waitq; } |
