diff options
Diffstat (limited to 'xlators/performance/io-cache/src/io-cache.c')
| -rw-r--r-- | xlators/performance/io-cache/src/io-cache.c | 627 |
1 files changed, 422 insertions, 205 deletions
diff --git a/xlators/performance/io-cache/src/io-cache.c b/xlators/performance/io-cache/src/io-cache.c index 90c14ea7d..201777b38 100644 --- a/xlators/performance/io-cache/src/io-cache.c +++ b/xlators/performance/io-cache/src/io-cache.c @@ -1,20 +1,11 @@ /* - Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ #ifndef _CONFIG_H @@ -37,13 +28,10 @@ int ioc_log2_page_size; uint32_t ioc_get_priority (ioc_table_t *table, const char *path); -uint32_t -ioc_get_priority (ioc_table_t *table, const char *path); - struct volume_options options[]; -inline uint32_t +static inline uint32_t ioc_hashfn (void *data, int len) { off_t offset; @@ -53,7 +41,7 @@ ioc_hashfn (void *data, int len) return (offset >> ioc_log2_page_size); } -inline ioc_inode_t * +static inline ioc_inode_t * ioc_inode_reupdate (ioc_inode_t *ioc_inode) { ioc_table_t *table = NULL; @@ -66,7 +54,7 @@ ioc_inode_reupdate (ioc_inode_t *ioc_inode) return ioc_inode; } -inline ioc_inode_t * +static inline ioc_inode_t * ioc_get_inode (dict_t *dict, char *name) { ioc_inode_t *ioc_inode = NULL; @@ -157,15 +145,16 @@ ioc_inode_flush (ioc_inode_t *ioc_inode) int32_t ioc_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop) + struct iatt *preop, struct iatt *postop, dict_t *xdata) { - STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop); + STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, + xdata); return 0; } int32_t ioc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct iatt *stbuf, int32_t valid) + struct iatt *stbuf, int32_t valid, dict_t *xdata) { uint64_t ioc_inode = 0; @@ -177,7 +166,7 @@ ioc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); STACK_WIND (frame, ioc_setattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid); + FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata); return 0; } @@ -185,7 +174,7 @@ ioc_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t ioc_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *stbuf, dict_t *dict, struct iatt *postparent) + struct iatt *stbuf, dict_t *xdata, struct iatt *postparent) { ioc_inode_t *ioc_inode = NULL; ioc_table_t *table = NULL; @@ -264,19 +253,18 @@ out: } STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, stbuf, - dict, postparent); + xdata, postparent); return 0; } int32_t ioc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xattr_req) + dict_t *xdata) { ioc_local_t *local = NULL; int32_t op_errno = -1, ret = -1; - local = GF_CALLOC (1, sizeof (*local), - gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { op_errno = ENOMEM; gf_log (this->name, GF_LOG_ERROR, "out of memory"); @@ -293,7 +281,7 @@ ioc_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, frame->local = local; STACK_WIND (frame, ioc_lookup_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->lookup, loc, xattr_req); + FIRST_CHILD (this)->fops->lookup, loc, xdata); return 0; @@ -325,6 +313,20 @@ ioc_forget (xlator_t *this, inode_t *inode) return 0; } +static int32_t +ioc_invalidate(xlator_t *this, inode_t *inode) +{ + uint64_t ioc_addr = 0; + ioc_inode_t *ioc_inode = NULL; + + inode_ctx_get(inode, this, (uint64_t *) &ioc_addr); + ioc_inode = (void *) ioc_addr; + + if (ioc_inode) + ioc_inode_flush(ioc_inode); + + return 0; +} /* * ioc_cache_validate_cbk - @@ -339,7 +341,8 @@ ioc_forget (xlator_t *this, inode_t *inode) */ int32_t ioc_cache_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *stbuf) + int32_t op_ret, int32_t op_errno, struct iatt *stbuf, + dict_t *xdata) { ioc_local_t *local = NULL; ioc_inode_t *ioc_inode = NULL; @@ -455,8 +458,7 @@ ioc_cache_validate (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, int32_t ret = 0; local = frame->local; - validate_local = GF_CALLOC (1, sizeof (ioc_local_t), - gf_ioc_mt_ioc_local_t); + validate_local = mem_get0 (THIS->local_pool); if (validate_local == NULL) { ret = -1; local->op_ret = -1; @@ -471,7 +473,7 @@ ioc_cache_validate (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, ret = -1; local->op_ret = -1; local->op_errno = ENOMEM; - GF_FREE (validate_local); + mem_put (validate_local); gf_log (ioc_inode->table->xl->name, GF_LOG_ERROR, "out of memory"); goto out; @@ -483,13 +485,13 @@ ioc_cache_validate (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, STACK_WIND (validate_frame, ioc_cache_validate_cbk, FIRST_CHILD (frame->this), - FIRST_CHILD (frame->this)->fops->fstat, fd); + FIRST_CHILD (frame->this)->fops->fstat, fd, NULL); out: return ret; } -inline uint32_t +static inline uint32_t is_match (const char *path, const char *pattern) { int32_t ret = 0; @@ -530,7 +532,7 @@ ioc_get_priority (ioc_table_t *table, const char *path) */ int32_t ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, fd_t *fd) + int32_t op_errno, fd_t *fd, dict_t *xdata) { uint64_t tmp_ioc_inode = 0; ioc_local_t *local = NULL; @@ -551,6 +553,13 @@ ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, inode_ctx_get (fd->inode, this, &tmp_ioc_inode); ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; + //TODO: see why inode context is NULL and handle it. + if (!ioc_inode) { + gf_log (this->name, GF_LOG_ERROR, "inode context is " + "NULL (%s)", uuid_utoa (fd->inode->gfid)); + goto out; + } + ioc_table_lock (ioc_inode->table); { list_move_tail (&ioc_inode->inode_lru, @@ -575,10 +584,6 @@ ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, */ fd_ctx_set (fd, this, 1); } - if ((local->wbflags & GF_OPEN_NOWB) != 0) { - /* disable caching as asked by NFS */ - fd_ctx_set (fd, this, 1); - } /* weight = 0, we disable caching on it */ if (weight == 0) { @@ -589,10 +594,10 @@ ioc_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, } out: - GF_FREE (local); + mem_put (local); frame->local = NULL; - STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd); + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); return 0; } @@ -614,7 +619,7 @@ int32_t ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { ioc_local_t *local = NULL; ioc_table_t *table = NULL; @@ -649,6 +654,10 @@ ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, || ((table->max_file_size > 0) && (table->max_file_size < ioc_inode->ia_size))) { ret = fd_ctx_set (fd, this, 1); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set fd ctx", + local->file_loc.path); } } ioc_inode_unlock (ioc_inode); @@ -657,25 +666,35 @@ ioc_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, (uint64_t)(long)ioc_inode); /* If O_DIRECT open, we disable caching on it */ - if (local->flags & O_DIRECT) + if (local->flags & O_DIRECT) { /* * O_DIRECT is only for one fd, not the inode * as a whole */ ret = fd_ctx_set (fd, this, 1); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set fd ctx", + local->file_loc.path); + } /* if weight == 0, we disable caching on it */ - if (!weight) + if (!weight) { /* we allow a pattern-matched cache disable this way */ ret = fd_ctx_set (fd, this, 1); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set fd ctx", + local->file_loc.path); + } } out: frame->local = NULL; - GF_FREE (local); + mem_put (local); STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, - preparent, postparent); + preparent, postparent, xdata); return 0; } @@ -685,7 +704,7 @@ int32_t ioc_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent, - struct iatt *postparent) + struct iatt *postparent, dict_t *xdata) { ioc_local_t *local = NULL; ioc_table_t *table = NULL; @@ -725,23 +744,22 @@ out: frame->local = NULL; loc_wipe (&local->file_loc); - GF_FREE (local); + mem_put (local); STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf, - preparent, postparent); + preparent, postparent, xdata); return 0; } int ioc_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev, dict_t *params) + dev_t rdev, mode_t umask, dict_t *xdata) { ioc_local_t *local = NULL; int32_t op_errno = -1, ret = -1; - local = GF_CALLOC (1, sizeof (*local), - gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { op_errno = ENOMEM; gf_log (this->name, GF_LOG_ERROR, "out of memory"); @@ -760,17 +778,17 @@ ioc_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, STACK_WIND (frame, ioc_mknod_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, - loc, mode, rdev, params); + loc, mode, rdev, umask, xdata); return 0; unwind: if (local != NULL) { loc_wipe (&local->file_loc); - GF_FREE (local); + mem_put (local); } STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL, - NULL, NULL); + NULL, NULL, NULL); return 0; } @@ -786,27 +804,27 @@ unwind: */ int32_t ioc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, int32_t wbflags) + fd_t *fd, dict_t *xdata) { ioc_local_t *local = NULL; - local = GF_CALLOC (1, sizeof (ioc_local_t), gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { gf_log (this->name, GF_LOG_ERROR, "out of memory"); - STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL); + STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL, NULL); return 0; } local->flags = flags; local->file_loc.path = loc->path; local->file_loc.inode = loc->inode; - local->wbflags = wbflags; frame->local = local; STACK_WIND (frame, ioc_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags); + FIRST_CHILD(this)->fops->open, loc, flags, fd, + xdata); return 0; } @@ -823,15 +841,15 @@ ioc_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, */ int32_t ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - mode_t mode, fd_t *fd, dict_t *params) + mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { ioc_local_t *local = NULL; - local = GF_CALLOC (1, sizeof (ioc_local_t), gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { gf_log (this->name, GF_LOG_ERROR, "out of memory"); STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, NULL, NULL, - NULL, NULL, NULL); + NULL, NULL, NULL, NULL); return 0; } @@ -841,7 +859,7 @@ ioc_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, STACK_WIND (frame, ioc_create_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, loc, flags, mode, - fd, params); + umask, fd, xdata); return 0; } @@ -878,10 +896,10 @@ int32_t ioc_readv_disabled_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, int32_t count, struct iatt *stbuf, - struct iobref *iobref) + struct iobref *iobref, dict_t *xdata) { STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count, - stbuf, iobref); + stbuf, iobref, xdata); return 0; } @@ -988,7 +1006,8 @@ ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, "cache hit for trav_offset=%" PRId64"/local_offset=%"PRId64"", trav_offset, local_offset); - waitq = __ioc_page_wakeup (trav); + waitq = __ioc_page_wakeup (trav, + trav->op_errno); } else { /* if waitq already exists, fstat * revalidate is @@ -1005,7 +1024,8 @@ ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, local->op_errno = -ret; need_validate = 0; - waitq = __ioc_page_wakeup (trav); + waitq = __ioc_page_wakeup (trav, + trav->op_errno); ioc_inode_unlock (ioc_inode); ioc_waitq_return (waitq); @@ -1031,13 +1051,14 @@ ioc_dispatch_requests (call_frame_t *frame, ioc_inode_t *ioc_inode, fd_t *fd, need_validate = 0; gf_log (frame->this->name, GF_LOG_TRACE, "sending validate request for " - "inode(%"PRId64") at offset=%"PRId64"", - fd->inode->ino, trav_offset); + "inode(%s) at offset=%"PRId64"", + uuid_utoa (fd->inode->gfid), trav_offset); ret = ioc_cache_validate (frame, ioc_inode, fd, trav); if (ret == -1) { ioc_inode_lock (ioc_inode); { - waitq = __ioc_page_wakeup (trav); + waitq = __ioc_page_wakeup (trav, + trav->op_errno); } ioc_inode_unlock (ioc_inode); @@ -1073,14 +1094,13 @@ out: */ int32_t ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t offset) + size_t size, off_t offset, uint32_t flags, dict_t *xdata) { uint64_t tmp_ioc_inode = 0; ioc_inode_t *ioc_inode = NULL; ioc_local_t *local = NULL; uint32_t weight = 0; ioc_table_t *table = NULL; - uint32_t num_pages = 0; int32_t op_errno = -1; if (!this) { @@ -1094,7 +1114,7 @@ ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, STACK_WIND (frame, ioc_readv_disabled_cbk, FIRST_CHILD (frame->this), FIRST_CHILD (frame->this)->fops->readv, fd, size, - offset); + offset, flags, xdata); return 0; } @@ -1107,29 +1127,6 @@ ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, goto out; } - - ioc_table_lock (table); - { - if (!table->mem_pool) { - - num_pages = (table->cache_size / table->page_size) - + ((table->cache_size % table->page_size) - ? 1 : 0); - - table->mem_pool - = mem_pool_new (rbthash_entry_t, num_pages); - - if (!table->mem_pool) { - gf_log (this->name, GF_LOG_ERROR, - "Unable to allocate mem_pool"); - op_errno = ENOMEM; - ioc_table_unlock (table); - goto out; - } - } - } - ioc_table_unlock (table); - ioc_inode_lock (ioc_inode); { if (!ioc_inode->cache.page_table) { @@ -1153,12 +1150,11 @@ ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, STACK_WIND (frame, ioc_readv_disabled_cbk, FIRST_CHILD (frame->this), FIRST_CHILD (frame->this)->fops->readv, fd, size, - offset); + offset, flags, xdata); return 0; } - local = (ioc_local_t *) GF_CALLOC (1, sizeof (ioc_local_t), - gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { gf_log (this->name, GF_LOG_ERROR, "out of memory"); op_errno = ENOMEM; @@ -1191,7 +1187,8 @@ ioc_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, return 0; out: - STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL); + STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, + NULL); return 0; } @@ -1208,7 +1205,7 @@ out: int32_t ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { ioc_local_t *local = NULL; uint64_t ioc_inode = 0; @@ -1219,7 +1216,8 @@ ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (ioc_inode) ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf); + STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, + xdata); return 0; } @@ -1237,16 +1235,16 @@ ioc_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, - struct iobref *iobref) + uint32_t flags, struct iobref *iobref, dict_t *xdata) { ioc_local_t *local = NULL; uint64_t ioc_inode = 0; - local = GF_CALLOC (1, sizeof (ioc_local_t), gf_ioc_mt_ioc_local_t); + local = mem_get0 (this->local_pool); if (local == NULL) { gf_log (this->name, GF_LOG_ERROR, "out of memory"); - STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL); + STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL); return 0; } @@ -1260,7 +1258,7 @@ ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, STACK_WIND (frame, ioc_writev_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev, fd, vector, count, offset, - iobref); + flags, iobref, xdata); return 0; } @@ -1279,11 +1277,11 @@ ioc_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t ioc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, - postbuf); + postbuf, xdata); return 0; } @@ -1302,11 +1300,11 @@ ioc_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t ioc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) + struct iatt *postbuf, dict_t *xdata) { STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, - postbuf); + postbuf, xdata); return 0; } @@ -1321,7 +1319,8 @@ ioc_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, * */ int32_t -ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) +ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { uint64_t ioc_inode = 0; @@ -1331,7 +1330,7 @@ ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); STACK_WIND (frame, ioc_truncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->truncate, loc, offset); + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); return 0; } @@ -1345,7 +1344,8 @@ ioc_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) * */ int32_t -ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) +ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { uint64_t ioc_inode = 0; @@ -1355,21 +1355,21 @@ ioc_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); STACK_WIND (frame, ioc_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset); + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); return 0; } int32_t ioc_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct gf_flock *lock) + int32_t op_errno, struct gf_flock *lock, dict_t *xdata) { - STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock); + STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata); return 0; } int32_t ioc_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, - struct gf_flock *lock) + struct gf_flock *lock, dict_t *xdata) { ioc_inode_t *ioc_inode = NULL; uint64_t tmp_inode = 0; @@ -1379,7 +1379,7 @@ ioc_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, if (!ioc_inode) { gf_log (this->name, GF_LOG_DEBUG, "inode context is NULL: returning EBADFD"); - STACK_UNWIND_STRICT (lk, frame, -1, EBADFD, NULL); + STACK_UNWIND_STRICT (lk, frame, -1, EBADFD, NULL, NULL); return 0; } @@ -1390,11 +1390,92 @@ ioc_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, ioc_inode_unlock (ioc_inode); STACK_WIND (frame, ioc_lk_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->lk, fd, cmd, lock); + FIRST_CHILD (this)->fops->lk, fd, cmd, lock, xdata); + + return 0; +} + +int +ioc_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata) +{ + gf_dirent_t *entry = NULL; + + if (op_ret <= 0) + goto unwind; + + list_for_each_entry (entry, &entries->list, list) { + /* TODO: fill things */ + } + +unwind: + STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata); + + return 0; +} +int +ioc_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *dict) +{ + STACK_WIND (frame, ioc_readdirp_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp, + fd, size, offset, dict); + + return 0; +} +static int32_t +ioc_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata); + return 0; +} + +static int32_t +ioc_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + uint64_t ioc_inode = 0; + + inode_ctx_get (fd->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND(frame, ioc_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); + return 0; +} + +static int32_t +ioc_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + STACK_UNWIND_STRICT(zerofill, frame, op_ret, + op_errno, pre, post, xdata); return 0; } +static int32_t +ioc_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + uint64_t ioc_inode = 0; + + inode_ctx_get (fd->inode, this, &ioc_inode); + + if (ioc_inode) + ioc_inode_flush ((ioc_inode_t *)(long)ioc_inode); + + STACK_WIND(frame, ioc_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + return 0; +} + + int32_t ioc_get_priority_list (const char *opt_str, struct list_head *first) { @@ -1475,13 +1556,9 @@ ioc_get_priority_list (const char *opt_str, struct list_head *first) stripe_str = strtok_r (NULL, ",", &tmp_str); } out: - if (string != NULL) { - GF_FREE (string); - } + GF_FREE (string); - if (dup_str != NULL) { - GF_FREE (dup_str); - } + GF_FREE (dup_str); if (max_pri == -1) { list_for_each_entry_safe (curr, tmp, first, list) { @@ -1514,13 +1591,50 @@ mem_acct_init (xlator_t *this) } +static gf_boolean_t +check_cache_size_ok (xlator_t *this, uint64_t cache_size) +{ + gf_boolean_t ret = _gf_true; + uint64_t total_mem = 0; + uint64_t max_cache_size = 0; + volume_option_t *opt = NULL; + + GF_ASSERT (this); + opt = xlator_volume_option_get (this, "cache-size"); + if (!opt) { + ret = _gf_false; + gf_log (this->name, GF_LOG_ERROR, + "could not get cache-size option"); + goto out; + } + + total_mem = get_mem_size (); + if (-1 == total_mem) + max_cache_size = opt->max; + else + max_cache_size = total_mem; + + gf_log (this->name, GF_LOG_DEBUG, "Max cache size is %"PRIu64, + max_cache_size); + + if (cache_size > max_cache_size) { + ret = _gf_false; + gf_log (this->name, GF_LOG_ERROR, "Cache size %"PRIu64 + " is greater than the max size of %"PRIu64, + cache_size, max_cache_size); + goto out; + } +out: + return ret; +} + int reconfigure (xlator_t *this, dict_t *options) { data_t *data = NULL; ioc_table_t *table = NULL; int ret = -1; - + uint64_t cache_size_new = 0; if (!this || !this->private) goto out; @@ -1531,9 +1645,6 @@ reconfigure (xlator_t *this, dict_t *options) GF_OPTION_RECONF ("cache-timeout", table->cache_timeout, options, int32, unlock); - GF_OPTION_RECONF ("cache-size", table->cache_size, - options, size, unlock); - data = dict_get (options, "priority"); if (data) { char *option_list = data_to_str (data); @@ -1566,6 +1677,16 @@ reconfigure (xlator_t *this, dict_t *options) goto unlock; } + GF_OPTION_RECONF ("cache-size", cache_size_new, + options, size, unlock); + if (!check_cache_size_ok (this, cache_size_new)) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "Not reconfiguring cache-size"); + goto unlock; + } + table->cache_size = cache_size_new; + ret = 0; } unlock: @@ -1589,6 +1710,7 @@ init (xlator_t *this) int32_t ret = -1; glusterfs_ctx_t *ctx = NULL; data_t *data = 0; + uint32_t num_pages = 0; xl_options = this->options; @@ -1621,6 +1743,11 @@ init (xlator_t *this) GF_OPTION_INIT ("max-file-size", table->max_file_size, size, out); + if (!check_cache_size_ok (this, table->cache_size)) { + ret = -1; + goto out; + } + INIT_LIST_HEAD (&table->priority_list); table->max_pri = 1; data = dict_get (xl_options, "priority"); @@ -1659,8 +1786,28 @@ init (xlator_t *this) for (index = 0; index < (table->max_pri); index++) INIT_LIST_HEAD (&table->inode_lru[index]); + this->local_pool = mem_pool_new (ioc_local_t, 64); + if (!this->local_pool) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + goto out; + } + pthread_mutex_init (&table->table_lock, NULL); this->private = table; + + num_pages = (table->cache_size / table->page_size) + + ((table->cache_size % table->page_size) + ? 1 : 0); + + table->mem_pool = mem_pool_new (rbthash_entry_t, num_pages); + if (!table->mem_pool) { + gf_log (this->name, GF_LOG_ERROR, + "Unable to allocate mem_pool"); + goto out; + } + ret = 0; ctx = this->ctx; @@ -1689,8 +1836,7 @@ ioc_page_waitq_dump (ioc_page_t *page, char *prefix) while (trav) { frame = trav->data; - gf_proc_dump_build_key (key, prefix, - "waitq.frame[%d]", i++); + sprintf (key, "waitq.frame[%d]", i++); gf_proc_dump_write (key, "%"PRId64, frame->root->unique); trav = trav->next; @@ -1709,9 +1855,8 @@ __ioc_inode_waitq_dump (ioc_inode_t *ioc_inode, char *prefix) while (trav) { page = trav->data; - gf_proc_dump_build_key (key, prefix, - "cache-validation-waitq.page[%d].offset", - i++); + + sprintf (key, "cache-validation-waitq.page[%d].offset", i++); gf_proc_dump_write (key, "%"PRId64, page->offset); trav = trav->next; @@ -1721,21 +1866,32 @@ __ioc_inode_waitq_dump (ioc_inode_t *ioc_inode, char *prefix) void __ioc_page_dump (ioc_page_t *page, char *prefix) { - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; - ioc_page_lock (page); + int ret = -1; + + if (!page) + return; + /* ioc_page_lock can be used to hold the mutex. But in statedump + * its better to use trylock to avoid deadlocks. + */ + ret = pthread_mutex_trylock (&page->page_lock); + if (ret) + goto out; { - gf_proc_dump_build_key (key, prefix, "offset"); - gf_proc_dump_write (key, "%"PRId64, page->offset); - gf_proc_dump_build_key (key, prefix, "size"); - gf_proc_dump_write (key, "%"PRId64, page->size); - gf_proc_dump_build_key (key, prefix, "dirty"); - gf_proc_dump_write (key, "%s", page->dirty ? "yes" : "no"); - gf_proc_dump_build_key (key, prefix, "ready"); - gf_proc_dump_write (key, "%s", page->ready ? "yes" : "no"); + gf_proc_dump_write ("offset", "%"PRId64, page->offset); + gf_proc_dump_write ("size", "%"PRId64, page->size); + gf_proc_dump_write ("dirty", "%s", page->dirty ? "yes" : "no"); + gf_proc_dump_write ("ready", "%s", page->ready ? "yes" : "no"); ioc_page_waitq_dump (page, prefix); } - ioc_page_unlock (page); + pthread_mutex_unlock (&page->page_lock); + +out: + if (ret && page) + gf_proc_dump_write ("Unable to dump the page information", + "(Lock acquisition failed) %p", page); + + return; } void @@ -1745,7 +1901,6 @@ __ioc_cache_dump (ioc_inode_t *ioc_inode, char *prefix) ioc_table_t *table = NULL; ioc_page_t *page = NULL; int i = 0; - struct tm *tm = NULL; char key[GF_DUMP_MAX_BUF_LEN] = {0, }; char timestr[256] = {0, }; @@ -1755,13 +1910,15 @@ __ioc_cache_dump (ioc_inode_t *ioc_inode, char *prefix) table = ioc_inode->table; - tm = localtime (&ioc_inode->cache.tv.tv_sec); - strftime (timestr, 256, "%Y-%m-%d %H:%M:%S", tm); - snprintf (timestr + strlen (timestr), 256 - strlen (timestr), - ".%"GF_PRI_SUSECONDS, ioc_inode->cache.tv.tv_usec); + if (ioc_inode->cache.tv.tv_sec) { + gf_time_fmt (timestr, sizeof timestr, + ioc_inode->cache.tv.tv_sec, gf_timefmt_FT); + snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr), + ".%"GF_PRI_SUSECONDS, ioc_inode->cache.tv.tv_usec); - gf_proc_dump_build_key (key, prefix, "last-cache-validation-time"); - gf_proc_dump_write (key, "%s", timestr); + gf_proc_dump_write ("last-cache-validation-time", "%s", + timestr); + } for (offset = 0; offset < ioc_inode->ia_size; offset += table->page_size) { @@ -1770,9 +1927,7 @@ __ioc_cache_dump (ioc_inode_t *ioc_inode, char *prefix) continue; } - gf_proc_dump_build_key (key, prefix, - "inode.cache.page[%d]", i++); - + sprintf (key, "inode.cache.page[%d]", i++); __ioc_page_dump (page, key); } out: @@ -1780,72 +1935,113 @@ out: } -void -ioc_inode_dump (ioc_inode_t *ioc_inode, char *prefix) +int +ioc_inode_dump (xlator_t *this, inode_t *inode) { - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; - char uuidbuf[256] = {0, }; - if ((ioc_inode == NULL) || (prefix == NULL)) { + char *path = NULL; + int ret = -1; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; + uint64_t tmp_ioc_inode = 0; + ioc_inode_t *ioc_inode = NULL; + gf_boolean_t section_added = _gf_false; + char uuid_str[64] = {0,}; + + if (this == NULL || inode == NULL) + goto out; + + gf_proc_dump_build_key (key_prefix, "io-cache", "inode"); + + inode_ctx_get (inode, this, &tmp_ioc_inode); + ioc_inode = (ioc_inode_t *)(long)tmp_ioc_inode; + if (ioc_inode == NULL) + goto out; + + /* Similar to ioc_page_dump function its better to use + * pthread_mutex_trylock and not to use gf_log in statedump + * to avoid deadlocks. + */ + ret = pthread_mutex_trylock (&ioc_inode->inode_lock); + if (ret) goto out; - } - ioc_inode_lock (ioc_inode); { - gf_proc_dump_build_key (key, prefix, "\ninode.gfid"); - uuid_unparse (ioc_inode->inode->gfid, uuidbuf); - gf_proc_dump_write (key, "%s", uuidbuf); - gf_proc_dump_build_key (key, prefix, "inode.ino"); - gf_proc_dump_write (key, "%ld", ioc_inode->inode->ino); - gf_proc_dump_build_key (key, prefix, "inode.weight"); - gf_proc_dump_write (key, "%d", ioc_inode->weight); - __ioc_cache_dump (ioc_inode, prefix); - __ioc_inode_waitq_dump (ioc_inode, prefix); + if (uuid_is_null (ioc_inode->inode->gfid)) + goto unlock; + + gf_proc_dump_add_section (key_prefix); + section_added = _gf_true; + + __inode_path (ioc_inode->inode, NULL, &path); + + gf_proc_dump_write ("inode.weight", "%d", ioc_inode->weight); + + if (path) { + gf_proc_dump_write ("path", "%s", path); + GF_FREE (path); + } + + gf_proc_dump_write ("uuid", "%s", uuid_utoa_r + (ioc_inode->inode->gfid, uuid_str)); + __ioc_cache_dump (ioc_inode, key_prefix); + __ioc_inode_waitq_dump (ioc_inode, key_prefix); } - ioc_inode_unlock (ioc_inode); +unlock: + pthread_mutex_unlock (&ioc_inode->inode_lock); + out: - return; + if (ret && ioc_inode) { + if (section_added == _gf_false) + gf_proc_dump_add_section (key_prefix); + gf_proc_dump_write ("Unable to print the status of ioc_inode", + "(Lock acquisition failed) %s", + uuid_utoa (inode->gfid)); + } + return ret; } int ioc_priv_dump (xlator_t *this) { ioc_table_t *priv = NULL; - ioc_inode_t *ioc_inode = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; - char key[GF_DUMP_MAX_BUF_LEN] = {0, }; + int ret = -1; + gf_boolean_t add_section = _gf_false; if (!this || !this->private) goto out; priv = this->private; - gf_proc_dump_build_key (key_prefix, "xlator.performance.io-cache", - "priv"); + + gf_proc_dump_build_key (key_prefix, "io-cache", "priv"); gf_proc_dump_add_section (key_prefix); + add_section = _gf_true; - ioc_table_lock (priv); + ret = pthread_mutex_trylock (&priv->table_lock); + if (ret) + goto out; { - gf_proc_dump_build_key (key, key_prefix, "page_size"); - gf_proc_dump_write (key, "%ld", priv->page_size); - gf_proc_dump_build_key (key, key_prefix, "cache_size"); - gf_proc_dump_write (key, "%ld", priv->cache_size); - gf_proc_dump_build_key (key, key_prefix, "cache_used"); - gf_proc_dump_write (key, "%ld", priv->cache_used); - gf_proc_dump_build_key (key, key_prefix, "inode_count"); - gf_proc_dump_write (key, "%u", priv->inode_count); - gf_proc_dump_build_key (key, key_prefix, "cache_timeout"); - gf_proc_dump_write (key, "%u", priv->cache_timeout); - gf_proc_dump_build_key (key, key_prefix, "min-file-size"); - gf_proc_dump_write (key, "%u", priv->min_file_size); - gf_proc_dump_build_key (key, key_prefix, "max-file-size"); - gf_proc_dump_write (key, "%u", priv->max_file_size); - - list_for_each_entry (ioc_inode, &priv->inodes, inode_list) { - ioc_inode_dump (ioc_inode, key_prefix); + gf_proc_dump_write ("page_size", "%ld", priv->page_size); + gf_proc_dump_write ("cache_size", "%ld", priv->cache_size); + gf_proc_dump_write ("cache_used", "%ld", priv->cache_used); + gf_proc_dump_write ("inode_count", "%u", priv->inode_count); + gf_proc_dump_write ("cache_timeout", "%u", priv->cache_timeout); + gf_proc_dump_write ("min-file-size", "%u", priv->min_file_size); + gf_proc_dump_write ("max-file-size", "%u", priv->max_file_size); + } + pthread_mutex_unlock (&priv->table_lock); +out: + if (ret && priv) { + if (!add_section) { + gf_proc_dump_build_key (key_prefix, "xlator." + "performance.io-cache", "priv"); + gf_proc_dump_add_section (key_prefix); } + gf_proc_dump_write ("Unable to dump the state of private " + "structure of io-cache xlator", "(Lock " + "acquisition failed) %s", this->name); } - ioc_table_unlock (priv); -out: + return 0; } @@ -1858,18 +2054,33 @@ out: void fini (xlator_t *this) { - ioc_table_t *table = NULL; + ioc_table_t *table = NULL; + struct ioc_priority *curr = NULL, *tmp = NULL; + int i = 0; table = this->private; if (table == NULL) return; + this->private = NULL; + if (table->mem_pool != NULL) { mem_pool_destroy (table->mem_pool); table->mem_pool = NULL; } + list_for_each_entry_safe (curr, tmp, &table->priority_list, list) { + list_del_init (&curr->list); + GF_FREE (curr->pattern); + GF_FREE (curr); + } + + for (i = 0; i < table->max_pri; i++) { + GF_ASSERT (list_empty (&table->inode_lru[i])); + } + + GF_ASSERT (list_empty (&table->inodes)); pthread_mutex_destroy (&table->table_lock); GF_FREE (table); @@ -1887,22 +2098,28 @@ struct xlator_fops fops = { .lookup = ioc_lookup, .lk = ioc_lk, .setattr = ioc_setattr, - .mknod = ioc_mknod + .mknod = ioc_mknod, + + .readdirp = ioc_readdirp, + .discard = ioc_discard, + .zerofill = ioc_zerofill, }; struct xlator_dumpops dumpops = { .priv = ioc_priv_dump, + .inodectx = ioc_inode_dump, }; struct xlator_cbks cbks = { .forget = ioc_forget, - .release = ioc_release + .release = ioc_release, + .invalidate = ioc_invalidate, }; struct volume_options options[] = { { .key = {"priority"}, - .type = GF_OPTION_TYPE_ANY, + .type = GF_OPTION_TYPE_PRIORITY_LIST, .default_value = "", .description = "Assigns priority to filenames with specific " "patterns so that when a page needs to be ejected " @@ -1921,7 +2138,7 @@ struct volume_options options[] = { { .key = {"cache-size"}, .type = GF_OPTION_TYPE_SIZET, .min = 4 * GF_UNIT_MB, - .max = 6 * GF_UNIT_GB, + .max = 32 * GF_UNIT_GB, .default_value = "32MB", .description = "Size of the read cache." }, |
