diff options
Diffstat (limited to 'xlators/performance/quick-read/src/quick-read.c')
| -rw-r--r-- | xlators/performance/quick-read/src/quick-read.c | 3612 |
1 files changed, 1287 insertions, 2325 deletions
diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c index a952dd55c75..7fe4b3c3a4b 100644 --- a/xlators/performance/quick-read/src/quick-read.c +++ b/xlators/performance/quick-read/src/quick-read.c @@ -1,2682 +1,1644 @@ /* - Copyright (c) 2009-2010 Gluster, Inc. <http://www.gluster.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ +#include <math.h> #include "quick-read.h" -#include "statedump.h" +#include <glusterfs/statedump.h> +#include "quick-read-messages.h" +#include <glusterfs/upcall-utils.h> +#include <glusterfs/atomic.h> -#define QR_DEFAULT_CACHE_SIZE 134217728 +typedef struct qr_local { + inode_t *inode; + uint64_t incident_gen; + fd_t *fd; +} qr_local_t; + +qr_inode_t * +qr_inode_ctx_get(xlator_t *this, inode_t *inode); void -qr_local_free (qr_local_t *local) -{ - if (local == NULL) { - goto out; - } +__qr_inode_prune_data(xlator_t *this, qr_inode_table_t *table, + qr_inode_t *qr_inode); - if (local->stub != NULL) { - call_stub_destroy (local->stub); - } +void +qr_local_wipe(qr_local_t *local) +{ + if (!local) + goto out; - if (local->path != NULL) { - GF_FREE (local->path); - } + if (local->inode) + inode_unref(local->inode); - GF_FREE (local); + if (local->fd) + fd_unref(local->fd); + GF_FREE(local); out: - return; + return; } +uint64_t +__qr_get_generation(xlator_t *this, qr_inode_t *qr_inode) +{ + uint64_t gen = 0, rollover; + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + + priv = this->private; + table = &priv->table; + + gen = GF_ATOMIC_INC(priv->generation); + if (gen == 0) { + qr_inode->gen_rollover = !qr_inode->gen_rollover; + gen = GF_ATOMIC_INC(priv->generation); + __qr_inode_prune_data(this, table, qr_inode); + qr_inode->gen = qr_inode->invalidation_time = gen - 1; + } + + rollover = qr_inode->gen_rollover; + gen |= (rollover << 32); + return gen; +} -int32_t -qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset); - - -static void -qr_loc_wipe (loc_t *loc) +uint64_t +qr_get_generation(xlator_t *this, inode_t *inode) { - if (loc == NULL) { - goto out; - } + qr_inode_t *qr_inode = NULL; + uint64_t gen = 0; + qr_inode_table_t *table = NULL; + qr_private_t *priv = NULL; - if (loc->path) { - GF_FREE ((char *)loc->path); - loc->path = NULL; - } + priv = this->private; + table = &priv->table; - if (loc->inode) { - inode_unref (loc->inode); - loc->inode = NULL; - } + qr_inode = qr_inode_ctx_get(this, inode); - if (loc->parent) { - inode_unref (loc->parent); - loc->parent = NULL; + if (qr_inode) { + LOCK(&table->lock); + { + gen = __qr_get_generation(this, qr_inode); + } + UNLOCK(&table->lock); + } else { + gen = GF_ATOMIC_INC(priv->generation); + if (gen == 0) { + gen = GF_ATOMIC_INC(priv->generation); } + } -out: - return; + return gen; } - -static int32_t -qr_loc_fill (loc_t *loc, inode_t *inode, char *path) +qr_local_t * +qr_local_get(xlator_t *this, inode_t *inode) { - int32_t ret = -1; - char *parent = NULL; + qr_local_t *local = NULL; - if ((loc == NULL) || (inode == NULL) || (path == NULL)) { - ret = -1; - errno = EINVAL; - goto out; - } + local = GF_CALLOC(1, sizeof(*local), gf_common_mt_char); + if (!local) + goto out; - loc->inode = inode_ref (inode); - loc->path = gf_strdup (path); - loc->ino = inode->ino; + local->incident_gen = qr_get_generation(this, inode); +out: + return local; +} - parent = gf_strdup (path); - if (parent == NULL) { - ret = -1; - goto out; - } +#define QR_STACK_UNWIND(fop, frame, params...) \ + do { \ + qr_local_t *__local = NULL; \ + if (frame) { \ + __local = frame->local; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT(fop, frame, params); \ + qr_local_wipe(__local); \ + } while (0) - parent = dirname (parent); +void +__qr_inode_prune(xlator_t *this, qr_inode_table_t *table, qr_inode_t *qr_inode, + uint64_t gen); - loc->parent = inode_from_path (inode->table, parent); - if (loc->parent == NULL) { - ret = -1; - errno = EINVAL; - goto out; - } +int +__qr_inode_ctx_set(xlator_t *this, inode_t *inode, qr_inode_t *qr_inode) +{ + uint64_t value = 0; + int ret = -1; - loc->name = strrchr (loc->path, '/'); - ret = 0; -out: - if (ret == -1) { - qr_loc_wipe (loc); + value = (long)qr_inode; - } + ret = __inode_ctx_set(inode, this, &value); - if (parent) { - GF_FREE (parent); - } - - return ret; + return ret; } - -void -qr_resume_pending_ops (qr_fd_ctx_t *qr_fd_ctx) +qr_inode_t * +__qr_inode_ctx_get(xlator_t *this, inode_t *inode) { - struct list_head waiting_ops; - call_stub_t *stub = NULL, *tmp = NULL; - - if (qr_fd_ctx == NULL) { - goto out; - } + qr_inode_t *qr_inode = NULL; + uint64_t value = 0; + int ret = -1; - INIT_LIST_HEAD (&waiting_ops); + ret = __inode_ctx_get(inode, this, &value); + if (ret) + return NULL; - LOCK (&qr_fd_ctx->lock); - { - list_splice_init (&qr_fd_ctx->waiting_ops, - &waiting_ops); - } - UNLOCK (&qr_fd_ctx->lock); + qr_inode = (void *)((long)value); - if (!list_empty (&waiting_ops)) { - list_for_each_entry_safe (stub, tmp, &waiting_ops, list) { - list_del_init (&stub->list); - call_resume (stub); - } - } - -out: - return; + return qr_inode; } - -static void -qr_fd_ctx_free (qr_fd_ctx_t *qr_fd_ctx) +qr_inode_t * +qr_inode_ctx_get(xlator_t *this, inode_t *inode) { - if (qr_fd_ctx == NULL) { - goto out; - } + qr_inode_t *qr_inode = NULL; - assert (list_empty (&qr_fd_ctx->waiting_ops)); + if (inode == NULL) + goto out; - GF_FREE (qr_fd_ctx->path); - GF_FREE (qr_fd_ctx); + LOCK(&inode->lock); + { + qr_inode = __qr_inode_ctx_get(this, inode); + } + UNLOCK(&inode->lock); out: - return; + return qr_inode; } -static inline uint32_t -is_match (const char *path, const char *pattern) +qr_inode_t * +qr_inode_new(xlator_t *this, inode_t *inode) { - int32_t ret = 0; + qr_inode_t *qr_inode = NULL; - ret = fnmatch (pattern, path, FNM_NOESCAPE); - - return (ret == 0); -} + qr_inode = GF_CALLOC(1, sizeof(*qr_inode), gf_qr_mt_qr_inode_t); + if (!qr_inode) + return NULL; -uint32_t -qr_get_priority (qr_conf_t *conf, const char *path) -{ - uint32_t priority = 0; - struct qr_priority *curr = NULL; - - list_for_each_entry (curr, &conf->priority_list, list) { - if (is_match (path, curr->pattern)) - priority = curr->priority; - } + INIT_LIST_HEAD(&qr_inode->lru); - return priority; -} + qr_inode->priority = 0; /* initial priority */ + return qr_inode; +} -/* To be called with this-priv->table.lock held */ qr_inode_t * -__qr_inode_alloc (xlator_t *this, char *path, inode_t *inode) +qr_inode_ctx_get_or_new(xlator_t *this, inode_t *inode) { - qr_inode_t *qr_inode = NULL; - qr_private_t *priv = NULL; - int priority = 0; + qr_inode_t *qr_inode = NULL; + int ret = -1; + qr_private_t *priv = NULL; - priv = this->private; + priv = this->private; - qr_inode = GF_CALLOC (1, sizeof (*qr_inode), gf_qr_mt_qr_inode_t); - if (qr_inode == NULL) { - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - goto out; - } + LOCK(&inode->lock); + { + qr_inode = __qr_inode_ctx_get(this, inode); + if (qr_inode) + goto unlock; - INIT_LIST_HEAD (&qr_inode->lru); - - priority = qr_get_priority (&priv->conf, path); + qr_inode = qr_inode_new(this, inode); + if (!qr_inode) + goto unlock; - list_add_tail (&qr_inode->lru, &priv->table.lru[priority]); + ret = __qr_inode_ctx_set(this, inode, qr_inode); + if (ret) { + __qr_inode_prune(this, &priv->table, qr_inode, 0); + GF_FREE(qr_inode); + qr_inode = NULL; + } + } +unlock: + UNLOCK(&inode->lock); - qr_inode->inode = inode; - qr_inode->priority = priority; -out: - return qr_inode; + return qr_inode; } - -/* To be called with qr_inode->table->lock held */ -void -__qr_inode_free (qr_inode_t *qr_inode) +uint32_t +qr_get_priority(qr_conf_t *conf, const char *path) { - if (qr_inode == NULL) { - goto out; - } - - if (qr_inode->xattr) { - dict_unref (qr_inode->xattr); - } + uint32_t priority = 0; + struct qr_priority *curr = NULL; - list_del (&qr_inode->lru); + list_for_each_entry(curr, &conf->priority_list, list) + { + if (fnmatch(curr->pattern, path, FNM_NOESCAPE) == 0) + priority = curr->priority; + } - GF_FREE (qr_inode); -out: - return; + return priority; } -/* To be called with priv->table.lock held */ void -__qr_cache_prune (xlator_t *this) +__qr_inode_register(xlator_t *this, qr_inode_table_t *table, + qr_inode_t *qr_inode) { - qr_private_t *priv = NULL; - qr_conf_t *conf = NULL; - qr_inode_table_t *table = NULL; - qr_inode_t *curr = NULL, *next = NULL; - int32_t index = 0; - uint64_t size_to_prune = 0; - uint64_t size_pruned = 0; - - priv = this->private; - table = &priv->table; - conf = &priv->conf; - - size_to_prune = table->cache_used - conf->cache_size; - - for (index=0; index < conf->max_pri; index++) { - list_for_each_entry_safe (curr, next, &table->lru[index], lru) { - size_pruned += curr->stbuf.ia_size; - inode_ctx_del (curr->inode, this, NULL); - __qr_inode_free (curr); - if (size_pruned >= size_to_prune) - goto done; - } - } + qr_private_t *priv = NULL; -done: - table->cache_used -= size_pruned; - return; -} + if (!qr_inode->data) + return; -/* To be called with table->lock held */ -inline char -__qr_need_cache_prune (qr_conf_t *conf, qr_inode_table_t *table) -{ - return (table->cache_used > conf->cache_size); -} + priv = this->private; + if (!priv) + return; + if (list_empty(&qr_inode->lru)) + /* first time addition of this qr_inode into table */ + table->cache_used += qr_inode->size; + else + list_del_init(&qr_inode->lru); -int32_t -qr_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct iatt *buf, dict_t *dict, struct iatt *postparent) + list_add_tail(&qr_inode->lru, &table->lru[qr_inode->priority]); + + GF_ATOMIC_INC(priv->qr_counter.files_cached); + + return; +} + +void +qr_inode_set_priority(xlator_t *this, inode_t *inode, const char *path) { - data_t *content = NULL; - qr_inode_t *qr_inode = NULL; - uint64_t value = 0; - int ret = -1; - qr_conf_t *conf = NULL; - qr_inode_table_t *table = NULL; - qr_private_t *priv = NULL; - qr_local_t *local = NULL; - - if ((op_ret == -1) || (dict == NULL)) { - goto out; - } + uint32_t priority = 0; + qr_inode_table_t *table = NULL; + qr_inode_t *qr_inode = NULL; + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + + qr_inode = qr_inode_ctx_get(this, inode); + if (!qr_inode) + return; - priv = this->private; - conf = &priv->conf; - table = &priv->table; + priv = this->private; + table = &priv->table; + conf = &priv->conf; - local = frame->local; + if (path) + priority = qr_get_priority(conf, path); + else + /* retain existing priority, just bump LRU */ + priority = qr_inode->priority; - if (buf->ia_size > conf->max_file_size) { - goto out; - } + LOCK(&table->lock); + { + qr_inode->priority = priority; - if (IA_ISDIR (buf->ia_type)) { - goto out; - } + __qr_inode_register(this, table, qr_inode); + } + UNLOCK(&table->lock); +} - if (inode == NULL) { - op_ret = -1; - op_errno = EINVAL; - goto out; - } +void +__qr_inode_prune_data(xlator_t *this, qr_inode_table_t *table, + qr_inode_t *qr_inode) +{ + qr_private_t *priv = NULL; - content = dict_get (dict, GF_CONTENT_KEY); - if (content == NULL) { - goto out; - } + priv = this->private; - LOCK (&table->lock); - { - ret = inode_ctx_get (inode, this, &value); - if (ret == -1) { - qr_inode = __qr_inode_alloc (this, local->path, inode); - if (qr_inode == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto unlock; - } - - ret = inode_ctx_put (inode, this, - (uint64_t)(long)qr_inode); - if (ret == -1) { - __qr_inode_free (qr_inode); - qr_inode = NULL; - op_ret = -1; - op_errno = EINVAL; - goto unlock; - } - } else { - qr_inode = (qr_inode_t *)(long)value; - if (qr_inode == NULL) { - op_ret = -1; - op_errno = EINVAL; - goto unlock; - } - } - - if (qr_inode->xattr) { - dict_unref (qr_inode->xattr); - qr_inode->xattr = NULL; - table->cache_used -= qr_inode->stbuf.ia_size; - } - - qr_inode->xattr = dict_ref (dict); - qr_inode->stbuf = *buf; - table->cache_used += buf->ia_size; - - gettimeofday (&qr_inode->tv, NULL); - if (__qr_need_cache_prune (conf, table)) { - __qr_cache_prune (this); - } - } -unlock: - UNLOCK (&table->lock); + GF_FREE(qr_inode->data); + qr_inode->data = NULL; + if (!list_empty(&qr_inode->lru)) { + table->cache_used -= qr_inode->size; + qr_inode->size = 0; -out: - /* - * FIXME: content size in dict can be greater than the size application - * requested for. Applications need to be careful till this is fixed. - */ - QR_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, dict, - postparent); + list_del_init(&qr_inode->lru); - return 0; -} + GF_ATOMIC_DEC(priv->qr_counter.files_cached); + } + memset(&qr_inode->buf, 0, sizeof(qr_inode->buf)); +} -int32_t -qr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) +/* To be called with priv->table.lock held */ +void +__qr_inode_prune(xlator_t *this, qr_inode_table_t *table, qr_inode_t *qr_inode, + uint64_t gen) { - qr_conf_t *conf = NULL; - dict_t *new_req_dict = NULL; - int32_t op_ret = -1, op_errno = -1; - data_t *content = NULL; - uint64_t requested_size = 0, size = 0, value = 0; - char cached = 0; - qr_inode_t *qr_inode = NULL; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - qr_local_t *local = NULL; - - priv = this->private; - conf = &priv->conf; - if (conf == NULL) { - op_ret = -1; - op_errno = EINVAL; - goto unwind; - } - - table = &priv->table; - local = GF_CALLOC (1, sizeof (*local), gf_qr_mt_qr_local_t); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind, op_errno, - ENOMEM); + __qr_inode_prune_data(this, table, qr_inode); + if (gen) + qr_inode->gen = gen; + qr_inode->invalidation_time = __qr_get_generation(this, qr_inode); +} - frame->local = local; +void +qr_inode_prune(xlator_t *this, inode_t *inode, uint64_t gen) +{ + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + qr_inode_t *qr_inode = NULL; - local->path = gf_strdup (loc->path); - GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, unwind, op_errno, - ENOMEM); - LOCK (&table->lock); - { - op_ret = inode_ctx_get (loc->inode, this, &value); - if (op_ret == 0) { - qr_inode = (qr_inode_t *)(long)value; - if (qr_inode != NULL) { - if (qr_inode->xattr) { - cached = 1; - } - } - } - } - UNLOCK (&table->lock); - - if ((xattr_req == NULL) && (conf->max_file_size > 0)) { - new_req_dict = xattr_req = dict_new (); - if (xattr_req == NULL) { - op_ret = -1; - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - goto unwind; - } - } + qr_inode = qr_inode_ctx_get(this, inode); + if (!qr_inode) + return; - if (!cached) { - if (xattr_req) { - content = dict_get (xattr_req, GF_CONTENT_KEY); - if (content) { - requested_size = data_to_uint64 (content); - } - } - - if ((conf->max_file_size > 0) - && (conf->max_file_size != requested_size)) { - size = (conf->max_file_size > requested_size) ? - conf->max_file_size : requested_size; - - op_ret = dict_set (xattr_req, GF_CONTENT_KEY, - data_from_uint64 (size)); - if (op_ret < 0) { - op_ret = -1; - op_errno = ENOMEM; - goto unwind; - } - } - } + priv = this->private; + table = &priv->table; - STACK_WIND (frame, qr_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, loc, xattr_req); + LOCK(&table->lock); + { + __qr_inode_prune(this, table, qr_inode, gen); + } + UNLOCK(&table->lock); +} - if (new_req_dict) { - dict_unref (new_req_dict); - } +/* To be called with priv->table.lock held */ +void +__qr_cache_prune(xlator_t *this, qr_inode_table_t *table, qr_conf_t *conf) +{ + qr_inode_t *curr = NULL; + qr_inode_t *next = NULL; + int index = 0; + size_t size_pruned = 0; - return 0; + for (index = 0; index < conf->max_pri; index++) { + list_for_each_entry_safe(curr, next, &table->lru[index], lru) + { + size_pruned += curr->size; -unwind: - QR_STACK_UNWIND (lookup, frame, op_ret, op_errno, NULL, NULL, NULL, - NULL); + __qr_inode_prune(this, table, curr, 0); - if (new_req_dict) { - dict_unref (new_req_dict); + if (table->cache_used < conf->cache_size) + return; } + } - return 0; + return; } +void +qr_cache_prune(xlator_t *this) +{ + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + qr_inode_table_t *table = NULL; + + priv = this->private; + table = &priv->table; + conf = &priv->conf; + + LOCK(&table->lock); + { + if (table->cache_used > conf->cache_size) + __qr_cache_prune(this, table, conf); + } + UNLOCK(&table->lock); +} -int32_t -qr_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, fd_t *fd) +void * +qr_content_extract(dict_t *xdata) { - uint64_t value = 0; - int32_t ret = -1; - struct list_head waiting_ops; - qr_local_t *local = NULL; - qr_inode_t *qr_inode = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - call_stub_t *stub = NULL, *tmp = NULL; - char is_open = 0; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - - priv = this->private; - table = &priv->table; - - local = frame->local; - if (local != NULL) { - local->op_ret = op_ret; - local->op_errno = op_errno; - is_open = local->is_open; - } + data_t *data = NULL; + void *content = NULL; + int ret = 0; - INIT_LIST_HEAD (&waiting_ops); + ret = dict_get_with_ref(xdata, GF_CONTENT_KEY, &data); + if (ret < 0 || !data) + return NULL; - ret = fd_ctx_get (fd, this, &value); - if ((ret == -1) && (op_ret != -1)) { - op_ret = -1; - op_errno = EINVAL; - goto out; - } + content = GF_MALLOC(data->len, gf_qr_mt_content_t); + if (!content) + goto out; - if (value) { - qr_fd_ctx = (qr_fd_ctx_t *) (long)value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - qr_fd_ctx->open_in_transit = 0; - - if (op_ret == 0) { - qr_fd_ctx->opened = 1; - } - list_splice_init (&qr_fd_ctx->waiting_ops, - &waiting_ops); - } - UNLOCK (&qr_fd_ctx->lock); - - if (local && local->is_open - && ((local->open_flags & O_TRUNC) == O_TRUNC)) { - LOCK (&table->lock); - { - ret = inode_ctx_del (fd->inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long) value; - - if (qr_inode != NULL) { - __qr_inode_free (qr_inode); - } - } - } - UNLOCK (&table->lock); - } - - if (!list_empty (&waiting_ops)) { - list_for_each_entry_safe (stub, tmp, &waiting_ops, - list) { - list_del_init (&stub->list); - call_resume (stub); - } - } - } -out: - if (is_open) { - QR_STACK_UNWIND (open, frame, op_ret, op_errno, fd); - } + memcpy(content, data->data, data->len); - return 0; +out: + data_unref(data); + return content; } - -int32_t -qr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, - fd_t *fd, int32_t wbflags) +void +qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data, + struct iatt *buf, uint64_t gen) { - qr_inode_t *qr_inode = NULL; - int32_t ret = -1; - uint64_t filep = 0; - char content_cached = 0; - qr_fd_ctx_t *qr_fd_ctx = NULL, *tmp_fd_ctx = NULL; - int32_t op_ret = -1, op_errno = -1; - qr_local_t *local = NULL; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - - priv = this->private; - table = &priv->table; - - tmp_fd_ctx = qr_fd_ctx = GF_CALLOC (1, sizeof (*qr_fd_ctx), - gf_qr_mt_qr_fd_ctx_t); - if (qr_fd_ctx == NULL) { - op_ret = -1; - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - goto unwind; - } + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + uint32_t rollover = 0; - LOCK_INIT (&qr_fd_ctx->lock); - INIT_LIST_HEAD (&qr_fd_ctx->waiting_ops); + rollover = gen >> 32; + gen = gen & 0xffffffff; - qr_fd_ctx->path = gf_strdup (loc->path); - qr_fd_ctx->flags = flags; - qr_fd_ctx->wbflags = wbflags; + priv = this->private; + table = &priv->table; - ret = fd_ctx_set (fd, this, (uint64_t)(long)qr_fd_ctx); - if (ret == -1) { - op_ret = -1; - op_errno = EINVAL; - goto unwind; - } - tmp_fd_ctx = NULL; - - local = GF_CALLOC (1, sizeof (*local), - gf_qr_mt_qr_local_t); - if (local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - goto unwind; - } + LOCK(&table->lock); + { + if ((rollover != qr_inode->gen_rollover) || + (gen && qr_inode->gen && (qr_inode->gen >= gen))) + goto unlock; - local->is_open = 1; - local->open_flags = flags; - frame->local = local; - LOCK (&table->lock); - { - ret = inode_ctx_get (fd->inode, this, &filep); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long) filep; - if (qr_inode) { - if (qr_inode->xattr) { - content_cached = 1; - } - } - } - } - UNLOCK (&table->lock); + if ((qr_inode->data == NULL) && (qr_inode->invalidation_time >= gen)) + goto unlock; - if (content_cached && ((flags & O_DIRECTORY) == O_DIRECTORY)) { - op_ret = -1; - op_errno = ENOTDIR; - goto unwind; - } + __qr_inode_prune(this, table, qr_inode, gen); - if (!content_cached || ((flags & O_ACCMODE) == O_WRONLY) - || ((flags & O_TRUNC) == O_TRUNC) - || ((flags & O_DIRECT) == O_DIRECT)) { - LOCK (&qr_fd_ctx->lock); - { - /* - * we really need not set this flag, since open is - * not yet unwounded. - */ - - qr_fd_ctx->open_in_transit = 1; - if ((flags & O_DIRECT) == O_DIRECT) { - qr_fd_ctx->disabled = 1; - } - } - UNLOCK (&qr_fd_ctx->lock); - goto wind; - } else { - op_ret = 0; - op_errno = 0; - goto unwind; - } + qr_inode->data = data; + data = NULL; + qr_inode->size = buf->ia_size; -unwind: - if (tmp_fd_ctx != NULL) { - qr_fd_ctx_free (tmp_fd_ctx); - } + qr_inode->ia_mtime = buf->ia_mtime; + qr_inode->ia_mtime_nsec = buf->ia_mtime_nsec; + qr_inode->ia_ctime = buf->ia_ctime; + qr_inode->ia_ctime_nsec = buf->ia_ctime_nsec; - QR_STACK_UNWIND (open, frame, op_ret, op_errno, fd); - return 0; + qr_inode->buf = *buf; + qr_inode->last_refresh = gf_time(); -wind: - STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, loc, flags, fd, wbflags); - return 0; -} + __qr_inode_register(this, table, qr_inode); + } +unlock: + UNLOCK(&table->lock); + if (data) + GF_FREE(data); -static inline char -qr_time_elapsed (struct timeval *now, struct timeval *then) -{ - return now->tv_sec - then->tv_sec; + qr_cache_prune(this); } - -static inline char -qr_need_validation (qr_conf_t *conf, qr_inode_t *qr_inode) +gf_boolean_t +qr_size_fits(qr_conf_t *conf, struct iatt *buf) { - struct timeval now = {0, }; - char need_validation = 0; - - gettimeofday (&now, NULL); + return (buf->ia_size <= conf->max_file_size); +} - if (qr_time_elapsed (&now, &qr_inode->tv) >= conf->cache_timeout) - need_validation = 1; +gf_boolean_t +qr_mtime_equal(qr_inode_t *qr_inode, struct iatt *buf) +{ + return (qr_inode->ia_mtime == buf->ia_mtime && + qr_inode->ia_mtime_nsec == buf->ia_mtime_nsec); +} - return need_validation; +gf_boolean_t +qr_ctime_equal(qr_inode_t *qr_inode, struct iatt *buf) +{ + return (qr_inode->ia_ctime == buf->ia_ctime && + qr_inode->ia_ctime_nsec == buf->ia_ctime_nsec); } +gf_boolean_t +qr_time_equal(qr_conf_t *conf, qr_inode_t *qr_inode, struct iatt *buf) +{ + if (conf->ctime_invalidation) + return qr_ctime_equal(qr_inode, buf); + else + return qr_mtime_equal(qr_inode, buf); +} -static int32_t -qr_validate_cache_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *buf) +void +__qr_content_refresh(xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf, + uint64_t gen) { - qr_inode_t *qr_inode = NULL; - qr_local_t *local = NULL; - uint64_t value = 0; - int32_t ret = 0; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - call_stub_t *stub = NULL; - - local = frame->local; - if ((local == NULL) || ((local->fd) == NULL)) { - op_ret = -1; - op_errno = EINVAL; - goto unwind; - } - - local->just_validated = 1; + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + qr_conf_t *conf = NULL; + uint32_t rollover = 0; - if (op_ret == -1) { - goto unwind; - } + rollover = gen >> 32; + gen = gen & 0xffffffff; - priv = this->private; - table = &priv->table; + priv = this->private; + table = &priv->table; + conf = &priv->conf; - LOCK (&table->lock); - { - ret = inode_ctx_get (local->fd->inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long) value; - } - - if (qr_inode != NULL) { - gettimeofday (&qr_inode->tv, NULL); - - if ((qr_inode->stbuf.ia_mtime != buf->ia_mtime) - && (qr_inode->stbuf.ia_mtime_nsec - != buf->ia_mtime_nsec)) { - inode_ctx_del (local->fd->inode, this, NULL); - __qr_inode_free (qr_inode); - } - } - } - UNLOCK (&table->lock); + /* allow for rollover of frame->root->unique */ + if ((rollover != qr_inode->gen_rollover) || + (gen && qr_inode->gen && (qr_inode->gen >= gen))) + goto done; - stub = local->stub; - local->stub = NULL; + if ((qr_inode->data == NULL) && (qr_inode->invalidation_time >= gen)) + goto done; - call_resume (stub); - - return 0; + qr_inode->gen = gen; -unwind: - /* this is actually unwind of readv */ - QR_STACK_UNWIND (readv, frame, op_ret, op_errno, NULL, -1, NULL, NULL); - return 0; -} + if (qr_size_fits(conf, buf) && qr_time_equal(conf, qr_inode, buf)) { + qr_inode->buf = *buf; + qr_inode->last_refresh = gf_time(); + __qr_inode_register(this, table, qr_inode); + } else { + __qr_inode_prune(this, table, qr_inode, gen); + } +done: + return; +} -int32_t -qr_validate_cache_helper (call_frame_t *frame, xlator_t *this, fd_t *fd) +void +qr_content_refresh(xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf, + uint64_t gen) { - qr_local_t *local = NULL; - int32_t op_ret = -1, op_errno = -1; + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; - local = frame->local; - if (local == NULL) { - op_ret = -1; - op_errno = EINVAL; - } else { - op_ret = local->op_ret; - op_errno = local->op_errno; - } + priv = this->private; + table = &priv->table; - if (op_ret == -1) { - qr_validate_cache_cbk (frame, NULL, this, op_ret, op_errno, - NULL); - } else { - STACK_WIND (frame, qr_validate_cache_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd); - } - - return 0; + LOCK(&table->lock); + { + __qr_content_refresh(this, qr_inode, buf, gen); + } + UNLOCK(&table->lock); } - -int -qr_validate_cache (call_frame_t *frame, xlator_t *this, fd_t *fd, - call_stub_t *stub) +gf_boolean_t +__qr_cache_is_fresh(xlator_t *this, qr_inode_t *qr_inode) { - int ret = -1; - int flags = 0; - uint64_t value = 0; - loc_t loc = {0, }; - char *path = NULL; - qr_local_t *local = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - call_stub_t *validate_stub = NULL; - char need_open = 0, can_wind = 0; - - local = GF_CALLOC (1, sizeof (*local), - gf_qr_mt_qr_local_t); - if (local == NULL) { - goto out; - } + qr_conf_t *conf = NULL; + qr_private_t *priv = NULL; - local->fd = fd; - local->stub = stub; - frame->local = local; + priv = this->private; + conf = &priv->conf; - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } + if (qr_inode->last_refresh < priv->last_child_down) + return _gf_false; - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - validate_stub = fop_fstat_stub (frame, - qr_validate_cache_helper, - fd); - if (validate_stub == NULL) { - ret = -1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&validate_stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - - if (ret == -1) { - goto out; - } - } else { - can_wind = 1; - } + if (gf_time() - qr_inode->last_refresh >= conf->cache_timeout) + return _gf_false; - if (need_open) { - ret = qr_loc_fill (&loc, fd->inode, path); - if (ret == -1) { - qr_resume_pending_ops (qr_fd_ctx); - goto out; - } - - STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, - &loc, flags, fd, qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } else if (can_wind) { - STACK_WIND (frame, qr_validate_cache_cbk, - FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd); - } + return _gf_true; +} - ret = 0; +int +qr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode_ret, struct iatt *buf, + dict_t *xdata, struct iatt *postparent) +{ + void *content = NULL; + qr_inode_t *qr_inode = NULL; + inode_t *inode = NULL; + qr_local_t *local = NULL; + + local = frame->local; + inode = local->inode; + + if (op_ret == -1) { + qr_inode_prune(this, inode, local->incident_gen); + goto out; + } + + if (dict_get(xdata, GLUSTERFS_BAD_INODE)) { + qr_inode_prune(this, inode, local->incident_gen); + goto out; + } + + if (dict_get(xdata, "sh-failed")) { + qr_inode_prune(this, inode, local->incident_gen); + goto out; + } + + content = qr_content_extract(xdata); + + if (content) { + /* new content came along, always replace old content */ + qr_inode = qr_inode_ctx_get_or_new(this, inode); + if (!qr_inode) { + /* no harm done */ + GF_FREE(content); + goto out; + } + + qr_content_update(this, qr_inode, content, buf, local->incident_gen); + } else { + /* purge old content if necessary */ + qr_inode = qr_inode_ctx_get(this, inode); + if (!qr_inode) + /* usual path for large files */ + goto out; + + qr_content_refresh(this, qr_inode, buf, local->incident_gen); + } out: - return ret; + QR_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode_ret, buf, xdata, + postparent); + return 0; } - -int32_t -qr_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iovec *vector, int32_t count, - struct iatt *stbuf, struct iobref *iobref) +int +qr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - QR_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, - stbuf, iobref); - return 0; -} + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + qr_inode_t *qr_inode = NULL; + int ret = -1; + dict_t *new_xdata = NULL; + qr_local_t *local = NULL; + + priv = this->private; + conf = &priv->conf; + local = qr_local_get(this, loc->inode); + local->inode = inode_ref(loc->inode); + frame->local = local; + + qr_inode = qr_inode_ctx_get(this, loc->inode); + if (qr_inode && qr_inode->data) + /* cached. only validate in qr_lookup_cbk */ + goto wind; + + if (!xdata) + xdata = new_xdata = dict_new(); + + if (!xdata) + goto wind; + + ret = 0; + if (conf->max_file_size) + ret = dict_set(xdata, GF_CONTENT_KEY, + data_from_uint64(conf->max_file_size)); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, QUICK_READ_MSG_DICT_SET_FAILED, + "cannot set key in request dict (%s)", loc->path); +wind: + STACK_WIND(frame, qr_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + if (new_xdata) + dict_unref(new_xdata); -int32_t -qr_readv_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset) -{ - STACK_WIND (frame, qr_readv_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->readv, fd, size, offset); - return 0; + return 0; } - -int32_t -qr_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, - off_t offset) +int +qr_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, gf_dirent_t *entries, dict_t *xdata) { - qr_inode_t *qr_inode = NULL; - int32_t ret = -1, op_ret = -1, op_errno = -1; - uint64_t value = 0; - int count = -1, flags = 0, i = 0; - char content_cached = 0, need_validation = 0; - char need_open = 0, can_wind = 0, need_unwind = 0; - struct iobuf *iobuf = NULL; - struct iobref *iobref = NULL; - struct iatt stbuf = {0, }; - data_t *content = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - call_stub_t *stub = NULL; - loc_t loc = {0, }; - qr_conf_t *conf = NULL; - struct iovec *vector = NULL; - char *path = NULL; - off_t start = 0, end = 0; - size_t len = 0; - struct iobuf_pool *iobuf_pool = NULL; - qr_local_t *local = NULL; - char just_validated = 0; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - - op_ret = 0; - - priv = this->private; - conf = &priv->conf; - table = &priv->table; - - local = frame->local; - - if (local != NULL) { - just_validated = local->just_validated; - } + gf_dirent_t *entry = NULL; + qr_inode_t *qr_inode = NULL; + qr_local_t *local = NULL; - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - if (qr_fd_ctx != NULL) { - if (qr_fd_ctx->disabled) { - goto out; - } - } - } + local = frame->local; - iobuf_pool = this->ctx->iobuf_pool; + if (op_ret <= 0) + goto unwind; - LOCK (&table->lock); - { - ret = inode_ctx_get (fd->inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long)value; - if (qr_inode) { - if (qr_inode->xattr){ - if (!just_validated - && qr_need_validation (conf, - qr_inode)) { - need_validation = 1; - goto unlock; - } - - content = dict_get (qr_inode->xattr, - GF_CONTENT_KEY); - - - stbuf = qr_inode->stbuf; - content_cached = 1; - list_move_tail (&qr_inode->lru, - &table->lru[qr_inode->priority]); - - if (offset > content->len) { - op_ret = 0; - end = content->len; - } else { - if ((offset + size) - > content->len) { - op_ret = content->len - offset; - end = content->len; - } else { - op_ret = size; - end = offset + size; - } - } - - count = (op_ret / iobuf_pool->page_size); - if ((op_ret % iobuf_pool->page_size) - != 0) { - count++; - } - - if (count == 0) { - op_ret = 0; - goto unlock; - } - - vector = GF_CALLOC (count, - sizeof (*vector), - gf_qr_mt_iovec); - if (vector == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - goto unlock; - } - - iobref = iobref_new (); - if (iobref == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - goto unlock; - } - - for (i = 0; i < count; i++) { - iobuf = iobuf_get (iobuf_pool); - if (iobuf == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - goto unlock; - } - - start = offset + (iobuf_pool->page_size * i); - if (start > end) { - len = 0; - } else { - len = (iobuf_pool->page_size - > (end - start)) - ? (end - start) - : iobuf_pool->page_size; - - memcpy (iobuf->ptr, - content->data + start, - len); - } - - iobref_add (iobref, iobuf); - iobuf_unref (iobuf); - - vector[i].iov_base = iobuf->ptr; - vector[i].iov_len = len; - } - } - } - } - } -unlock: - UNLOCK (&table->lock); + list_for_each_entry(entry, &entries->list, list) + { + if (!entry->inode) + continue; -out: - if (content_cached || need_unwind) { - QR_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, - count, &stbuf, iobref); - - } else if (need_validation) { - stub = fop_readv_stub (frame, qr_readv, fd, size, offset); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - goto out; - } - - op_ret = qr_validate_cache (frame, this, fd, stub); - if (op_ret == -1) { - need_unwind = 1; - op_errno = errno; - call_stub_destroy (stub); - goto out; - } - } else { - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - stub = fop_readv_stub (frame, - qr_readv_helper, - fd, size, - offset); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto fdctx_unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - fdctx_unlock: - UNLOCK (&qr_fd_ctx->lock); - - if (op_ret == -1) { - need_unwind = 1; - goto out; - } - } else { - can_wind = 1; - } - - if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx); - goto out; - } - - STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, - &loc, flags, fd, qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } else if (can_wind) { - STACK_WIND (frame, qr_readv_cbk, - FIRST_CHILD (this), - FIRST_CHILD (this)->fops->readv, fd, size, - offset); - } + qr_inode = qr_inode_ctx_get(this, entry->inode); + if (!qr_inode) + /* no harm */ + continue; - } + qr_content_refresh(this, qr_inode, &entry->d_stat, local->incident_gen); + } - if (vector) { - GF_FREE (vector); - } +unwind: + QR_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata); + return 0; +} - if (iobref) { - iobref_unref (iobref); - } +int +qr_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) +{ + qr_local_t *local = NULL; - return 0; -} + local = qr_local_get(this, NULL); + frame->local = local; + STACK_WIND(frame, qr_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata); + return 0; +} -int32_t -qr_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) +int +qr_readv_cached(call_frame_t *frame, qr_inode_t *qr_inode, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) { - QR_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf); - return 0; -} + xlator_t *this = NULL; + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + int op_ret = -1; + struct iobuf *iobuf = NULL; + struct iobref *iobref = NULL; + struct iovec iov = { + 0, + }; + struct iatt buf = { + 0, + }; + this = frame->this; + priv = this->private; + table = &priv->table; -int32_t -qr_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iovec *vector, int32_t count, off_t off, - struct iobref *iobref) -{ - STACK_WIND (frame, qr_writev_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->writev, fd, vector, count, off, - iobref); - return 0; -} + LOCK(&table->lock); + { + if (!qr_inode->data) + goto unlock; + if (offset >= qr_inode->size) + goto unlock; -int32_t -qr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, - int32_t count, off_t off, struct iobref *iobref) -{ - uint64_t value = 0; - int flags = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_inode_t *qr_inode = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t op_ret = -1, op_errno = -1, ret = -1; - char can_wind = 0, need_unwind = 0, need_open = 0; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - - priv = this->private; - table = &priv->table; - - ret = fd_ctx_get (fd, this, &value); - - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } + if (!__qr_cache_is_fresh(this, qr_inode)) + goto unlock; - LOCK (&table->lock); - { - ret = inode_ctx_get (fd->inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long)value; - if (qr_inode != NULL) { - inode_ctx_del (fd->inode, this, NULL); - __qr_inode_free (qr_inode); - } - } - } - UNLOCK (&table->lock); - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - stub = fop_writev_stub (frame, qr_writev_helper, - fd, vector, count, off, - iobref); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; + op_ret = min(size, (qr_inode->size - offset)); + + iobuf = iobuf_get2(this->ctx->iobuf_pool, op_ret); + if (!iobuf) { + op_ret = -1; + goto unlock; } -out: - if (need_unwind) { - QR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, - NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_writev_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->writev, fd, vector, count, - off, iobref); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx); - goto out; - } - - STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); + iobref = iobref_new(); + if (!iobref) { + op_ret = -1; + goto unlock; } - return 0; -} + iobref_add(iobref, iobuf); + memcpy(iobuf->ptr, qr_inode->data + offset, op_ret); -int32_t -qr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *buf) -{ - QR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf); - return 0; -} + buf = qr_inode->buf; + /* bump LRU */ + __qr_inode_register(frame->this, table, qr_inode); + } +unlock: + UNLOCK(&table->lock); -int32_t -qr_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd) -{ - STACK_WIND (frame, qr_fstat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd); - return 0; -} + if (op_ret >= 0) { + iov.iov_base = iobuf->ptr; + iov.iov_len = op_ret; + GF_ATOMIC_INC(priv->qr_counter.cache_hit); + STACK_UNWIND_STRICT(readv, frame, op_ret, 0, &iov, 1, &buf, iobref, + xdata); + } else { + GF_ATOMIC_INC(priv->qr_counter.cache_miss); + } -int32_t -qr_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd) -{ - qr_fd_ctx_t *qr_fd_ctx = NULL; - char need_open = 0, can_wind = 0, need_unwind = 0; - uint64_t value = 0; - int32_t ret = -1, op_ret = -1, op_errno = -1; - call_stub_t *stub = NULL; - loc_t loc = {0, }; - char *path = NULL; - int flags = 0; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } + if (iobuf) + iobuf_unref(iobuf); - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - stub = fop_fstat_stub (frame, qr_fstat_helper, - fd); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } + if (iobref) + iobref_unref(iobref); -out: - if (need_unwind) { - QR_STACK_UNWIND (fstat, frame, op_ret, op_errno, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_fstat_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fstat, fd); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx); - goto out; - } - - STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } - - return 0; + return op_ret; } +int +qr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + qr_inode_t *qr_inode = NULL; + qr_inode = qr_inode_ctx_get(this, fd->inode); + if (!qr_inode) + goto wind; + if (qr_readv_cached(frame, qr_inode, size, offset, flags, xdata) < 0) + goto wind; -int32_t -qr_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iatt *preop, struct iatt *postop) -{ - QR_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, preop, postop); - return 0; + return 0; +wind: + STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); + return 0; } - int32_t -qr_fsetattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid) +qr_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { - STACK_WIND(frame, qr_fsetattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, - valid); - return 0; -} + qr_local_t *local = NULL; + local = frame->local; -int32_t -qr_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, - struct iatt *stbuf, int32_t valid) + qr_inode_prune(this, local->fd->inode, local->incident_gen); + + QR_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; +} + +int +qr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov, + int count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) { - uint64_t value = 0; - int flags = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = -1; - char need_open = 0, can_wind = 0, need_unwind = 0; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } + qr_local_t *local = NULL; - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - stub = fop_fsetattr_stub (frame, - qr_fsetattr_helper, - fd, stbuf, valid); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } + local = qr_local_get(this, fd->inode); + local->fd = fd_ref(fd); -out: - if (need_unwind) { - QR_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, NULL, - NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_fsetattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, - valid); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx); - goto out; - } - - STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } + frame->local = local; - return 0; + STACK_WIND(frame, qr_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, iov, count, offset, flags, + iobref, xdata); + return 0; } - int32_t -qr_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +qr_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - QR_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno); - return 0; -} + qr_local_t *local = NULL; + local = frame->local; + qr_inode_prune(this, local->inode, local->incident_gen); -int32_t -qr_fsetxattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - dict_t *dict, int32_t flags) -{ - STACK_WIND (frame, qr_fsetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetxattr, fd, dict, flags); - return 0; + QR_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; } - -int32_t -qr_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, - int32_t flags) +int +qr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) { - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - int open_flags = 0; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = -1; - char need_open = 0, can_wind = 0, need_unwind = 0; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } + qr_local_t *local = NULL; - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - open_flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - stub = fop_fsetxattr_stub (frame, - qr_fsetxattr_helper, - fd, dict, flags); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } + local = qr_local_get(this, loc->inode); + local->inode = inode_ref(loc->inode); + frame->local = local; -out: - if (need_unwind) { - QR_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno); - } else if (can_wind) { - STACK_WIND (frame, qr_fsetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsetxattr, fd, dict, - flags); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx); - goto out; - } - - STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, open_flags, - fd, qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } - - return 0; + STACK_WIND(frame, qr_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; } - int32_t -qr_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) +qr_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - QR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict); - return 0; -} + qr_local_t *local = NULL; + local = frame->local; + qr_inode_prune(this, local->fd->inode, local->incident_gen); -int32_t -qr_fgetxattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - const char *name) -{ - STACK_WIND (frame, qr_fgetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fgetxattr, fd, name); - return 0; + QR_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; } - -int32_t -qr_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name) +int +qr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) { - int flags = 0; - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = -1; - char need_open = 0, can_wind = 0, need_unwind = 0; - - /* - * FIXME: Can quick-read use the extended attributes stored in the - * cache? this needs to be discussed. - */ - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - } + qr_local_t *local = NULL; - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - stub = fop_fgetxattr_stub (frame, - qr_fgetxattr_helper, - fd, name); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } + local = qr_local_get(this, fd->inode); + local->fd = fd_ref(fd); + frame->local = local; -out: - if (need_unwind) { - QR_STACK_UNWIND (open, frame, op_ret, op_errno, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_fgetxattr_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fgetxattr, fd, name); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx); - goto out; - } - - STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } - - return 0; + STACK_WIND(frame, qr_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; } - int32_t -qr_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno) +qr_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) { - QR_STACK_UNWIND (flush, frame, op_ret, op_errno); - return 0; -} + qr_local_t *local = NULL; + local = frame->local; + qr_inode_prune(this, local->fd->inode, local->incident_gen); -int32_t -qr_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd) -{ - STACK_WIND (frame, qr_flush_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->flush, fd); - return 0; + QR_STACK_UNWIND(fallocate, frame, op_ret, op_errno, pre, post, xdata); + return 0; } - -int32_t -qr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) +static int +qr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int keep_size, + off_t offset, size_t len, dict_t *xdata) { - uint64_t value = 0; - call_stub_t *stub = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = -1; - char can_wind = 0, need_unwind = 0; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - if (qr_fd_ctx->opened) { - can_wind = 1; - } else if (qr_fd_ctx->open_in_transit) { - stub = fop_flush_stub (frame, qr_flush_helper, - fd); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } else { - op_ret = 0; - need_unwind = 1; - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } + qr_local_t *local = NULL; - if (need_unwind) { - QR_STACK_UNWIND (flush, frame, op_ret, op_errno); - } else if (can_wind) { - STACK_WIND (frame, qr_flush_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->flush, fd); - } + local = qr_local_get(this, fd->inode); + local->fd = fd_ref(fd); + frame->local = local; - return 0; + STACK_WIND(frame, qr_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset, len, + xdata); + return 0; } - int32_t -qr_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +qr_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) { - QR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno); - return 0; + qr_local_t *local = NULL; + + local = frame->local; + qr_inode_prune(this, local->fd->inode, local->incident_gen); + + QR_STACK_UNWIND(discard, frame, op_ret, op_errno, pre, post, xdata); + return 0; } -int32_t -qr_fentrylk_helper (call_frame_t *frame, xlator_t *this, const char *volume, - fd_t *fd, const char *basename, entrylk_cmd cmd, - entrylk_type type) +static int +qr_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) { - STACK_WIND(frame, qr_fentrylk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fentrylk, volume, fd, basename, - cmd, type); - return 0; -} + qr_local_t *local = NULL; + local = qr_local_get(this, fd->inode); + local->fd = fd_ref(fd); + frame->local = local; + + STACK_WIND(frame, qr_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); + return 0; +} int32_t -qr_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, - const char *basename, entrylk_cmd cmd, entrylk_type type) +qr_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) { - int flags = 0; - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = -1; - char need_open = 0, can_wind = 0, need_unwind = 0; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } + qr_local_t *local = NULL; - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - stub = fop_fentrylk_stub (frame, - qr_fentrylk_helper, - volume, fd, basename, - cmd, type); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } + local = frame->local; + qr_inode_prune(this, local->fd->inode, local->incident_gen); -out: - if (need_unwind) { - QR_STACK_UNWIND (fentrylk, frame, op_ret, op_errno); - } else if (can_wind) { - STACK_WIND (frame, qr_fentrylk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fentrylk, volume, fd, - basename, cmd, type); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx); - goto out; - } - - STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } - - return 0; + QR_STACK_UNWIND(zerofill, frame, op_ret, op_errno, pre, post, xdata); + return 0; } +static int +qr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) +{ + qr_local_t *local = NULL; -int32_t -qr_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + local = qr_local_get(this, fd->inode); + local->fd = fd_ref(fd); + frame->local = local; -{ - QR_STACK_UNWIND (finodelk, frame, op_ret, op_errno); - return 0; + STACK_WIND(frame, qr_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + return 0; } - -int32_t -qr_finodelk_helper (call_frame_t *frame, xlator_t *this, const char *volume, - fd_t *fd, int32_t cmd, struct flock *lock) +int +qr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, + dict_t *xdata) { - STACK_WIND (frame, qr_finodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, volume, fd, cmd, lock); - return 0; -} + qr_inode_set_priority(this, fd->inode, loc->path); + STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +} -int32_t -qr_finodelk (call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd, - int32_t cmd, struct flock *lock) +int +qr_forget(xlator_t *this, inode_t *inode) { - int flags = 0; - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = -1; - char need_open = 0, can_wind = 0, need_unwind = 0; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } + qr_inode_t *qr_inode = NULL; - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - stub = fop_finodelk_stub (frame, - qr_finodelk_helper, - volume, fd, cmd, - lock); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } + qr_inode = qr_inode_ctx_get(this, inode); -out: - if (need_unwind) { - QR_STACK_UNWIND (finodelk, frame, op_ret, op_errno); - } else if (can_wind) { - STACK_WIND (frame, qr_finodelk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->finodelk, volume, fd, - cmd, lock); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx); - goto out; - } - - STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } - + if (!qr_inode) return 0; -} + qr_inode_prune(this, inode, qr_get_generation(this, inode)); -int32_t -qr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf) -{ - QR_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf); - return 0; -} + GF_FREE(qr_inode); + return 0; +} int32_t -qr_fsync_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) +qr_inodectx_dump(xlator_t *this, inode_t *inode) { - STACK_WIND (frame, qr_fsync_cbk, FIRST_CHILD (this), - FIRST_CHILD(this)->fops->fsync, fd, flags); - return 0; + qr_inode_t *qr_inode = NULL; + int32_t ret = -1; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; + char buf[GF_TIMESTR_SIZE] = { + 0, + }; + + qr_inode = qr_inode_ctx_get(this, inode); + if (!qr_inode) + goto out; + + gf_proc_dump_build_key(key_prefix, "xlator.performance.quick-read", + "inodectx"); + gf_proc_dump_add_section("%s", key_prefix); + + gf_proc_dump_write("entire-file-cached", "%s", + qr_inode->data ? "yes" : "no"); + + if (qr_inode->last_refresh) { + gf_time_fmt(buf, sizeof buf, qr_inode->last_refresh, gf_timefmt_FT); + gf_proc_dump_write("last-cache-validation-time", "%s", buf); + } + + ret = 0; +out: + return ret; } -int32_t -qr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) +int +qr_priv_dump(xlator_t *this) { - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - int open_flags = 0; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = -1; - char need_open = 0, can_wind = 0, need_unwind = 0; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } + qr_conf_t *conf = NULL; + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + uint32_t file_count = 0; + uint32_t i = 0; + qr_inode_t *curr = NULL; + uint64_t total_size = 0; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - open_flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - stub = fop_fsync_stub (frame, qr_fsync_helper, - fd, flags); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } + if (!this) { + return -1; + } -out: - if (need_unwind) { - QR_STACK_UNWIND (fsync, frame, op_ret, op_errno, NULL, - NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_fsync_cbk, FIRST_CHILD (this), - FIRST_CHILD (this)->fops->fsync, fd, flags); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx); - goto out; - } - - STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, open_flags, - fd, qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } + priv = this->private; + conf = &priv->conf; + if (!conf) + return -1; - return 0; -} + table = &priv->table; + gf_proc_dump_build_key(key_prefix, "xlator.performance.quick-read", "priv"); -int32_t -qr_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf) -{ - int32_t ret = 0; - uint64_t value = 0; - qr_inode_t *qr_inode = NULL; - qr_local_t *local = NULL; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - - if (op_ret == -1) { - goto out; - } + gf_proc_dump_add_section("%s", key_prefix); - priv = this->private; - table = &priv->table; - - local = frame->local; - if ((local == NULL) || (local->fd == NULL) - || (local->fd->inode == NULL)) { - op_ret = -1; - op_errno = EINVAL; - goto out; - } + gf_proc_dump_write("max_file_size", "%" PRIu64, conf->max_file_size); + gf_proc_dump_write("cache_timeout", "%d", conf->cache_timeout); - frame->local = NULL; - - LOCK (&table->lock); - { - ret = inode_ctx_get (local->fd->inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long) value; - - if (qr_inode) { - if (qr_inode->stbuf.ia_size != postbuf->ia_size) - { - inode_ctx_del (local->fd->inode, this, - NULL); - __qr_inode_free (qr_inode); - } - } - } - } - UNLOCK (&table->lock); + if (!table) { + goto out; + } else { + for (i = 0; i < conf->max_pri; i++) { + list_for_each_entry(curr, &table->lru[i], lru) + { + file_count++; + total_size += curr->size; + } + } + } + + gf_proc_dump_write("total_files_cached", "%d", file_count); + gf_proc_dump_write("total_cache_used", "%" PRIu64, total_size); + gf_proc_dump_write("cache-hit", "%" GF_PRI_ATOMIC, + GF_ATOMIC_GET(priv->qr_counter.cache_hit)); + gf_proc_dump_write("cache-miss", "%" GF_PRI_ATOMIC, + GF_ATOMIC_GET(priv->qr_counter.cache_miss)); + gf_proc_dump_write("cache-invalidations", "%" GF_PRI_ATOMIC, + GF_ATOMIC_GET(priv->qr_counter.file_data_invals)); out: - QR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf, - postbuf); - return 0; + return 0; } - -int32_t -qr_ftruncate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, - off_t offset) +static int32_t +qr_dump_metrics(xlator_t *this, int fd) { - STACK_WIND (frame, qr_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset); - return 0; + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + + priv = this->private; + table = &priv->table; + + dprintf(fd, "%s.total_files_cached %" PRId64 "\n", this->name, + GF_ATOMIC_GET(priv->qr_counter.files_cached)); + dprintf(fd, "%s.total_cache_used %" PRId64 "\n", this->name, + table->cache_used); + dprintf(fd, "%s.cache-hit %" PRId64 "\n", this->name, + GF_ATOMIC_GET(priv->qr_counter.cache_hit)); + dprintf(fd, "%s.cache-miss %" PRId64 "\n", this->name, + GF_ATOMIC_GET(priv->qr_counter.cache_miss)); + dprintf(fd, "%s.cache-invalidations %" PRId64 "\n", this->name, + GF_ATOMIC_GET(priv->qr_counter.file_data_invals)); + + return 0; } - int32_t -qr_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) +qr_mem_acct_init(xlator_t *this) { - int flags = 0; - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_local_t *local = NULL; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = -1; - char need_open = 0, can_wind = 0, need_unwind = 0; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } - - local = GF_CALLOC (1, sizeof (*local), - gf_qr_mt_qr_local_t); - if (local == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - goto out; - } + int ret = -1; - local->fd = fd; - frame->local = local; - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - stub = fop_ftruncate_stub (frame, - qr_ftruncate_helper, - fd, offset); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } + if (!this) + return ret; -out: - if (need_unwind) { - QR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL, - NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_ftruncate_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ftruncate, fd, offset); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx); - goto out; - } - - STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } + ret = xlator_mem_acct_init(this, gf_qr_mt_end + 1); - return 0; -} + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, QUICK_READ_MSG_NO_MEMORY, + "Memory accounting init failed"); + return ret; + } + return ret; +} -int32_t -qr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno, struct flock *lock) +static gf_boolean_t +check_cache_size_ok(xlator_t *this, int64_t cache_size) { - QR_STACK_UNWIND (lk, frame, op_ret, op_errno, lock); - return 0; + int ret = _gf_true; + uint64_t total_mem = 0; + uint64_t max_cache_size = 0; + volume_option_t *opt = NULL; + + GF_ASSERT(this); + opt = xlator_volume_option_get(this, "cache-size"); + if (!opt) { + ret = _gf_false; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + QUICK_READ_MSG_INVALID_ARGUMENT, + "could not get cache-size option"); + goto out; + } + + total_mem = get_mem_size(); + if (-1 == total_mem) + max_cache_size = opt->max; + else + max_cache_size = total_mem; + + gf_msg_debug(this->name, 0, "Max cache size is %" PRIu64, max_cache_size); + if (cache_size > max_cache_size) { + ret = _gf_false; + gf_msg(this->name, GF_LOG_ERROR, 0, QUICK_READ_MSG_INVALID_ARGUMENT, + "Cache size %" PRIu64 + " is greater than the max size of %" PRIu64, + cache_size, max_cache_size); + goto out; + } +out: + return ret; } - -int32_t -qr_lk_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, - struct flock *lock) +int +qr_reconfigure(xlator_t *this, dict_t *options) { - STACK_WIND (frame, qr_lk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lk, fd, cmd, lock); + int32_t ret = -1; + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + uint64_t cache_size_new = 0; - return 0; -} + GF_VALIDATE_OR_GOTO("quick-read", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + GF_VALIDATE_OR_GOTO(this->name, options, out); + priv = this->private; -int32_t -qr_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, - struct flock *lock) -{ - int flags = 0; - uint64_t value = 0; - call_stub_t *stub = NULL; - char *path = NULL; - loc_t loc = {0, }; - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = -1, op_ret = -1, op_errno = -1; - char need_open = 0, can_wind = 0, need_unwind = 0; - - ret = fd_ctx_get (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long)value; - } - - if (qr_fd_ctx) { - LOCK (&qr_fd_ctx->lock); - { - path = qr_fd_ctx->path; - flags = qr_fd_ctx->flags; - - if (!(qr_fd_ctx->opened - || qr_fd_ctx->open_in_transit)) { - need_open = 1; - qr_fd_ctx->open_in_transit = 1; - } - - if (qr_fd_ctx->opened) { - can_wind = 1; - } else { - stub = fop_lk_stub (frame, qr_lk_helper, fd, - cmd, lock); - if (stub == NULL) { - op_ret = -1; - op_errno = ENOMEM; - need_unwind = 1; - qr_fd_ctx->open_in_transit = 0; - goto unlock; - } - - list_add_tail (&stub->list, - &qr_fd_ctx->waiting_ops); - } - } - unlock: - UNLOCK (&qr_fd_ctx->lock); - } else { - can_wind = 1; - } + conf = &priv->conf; + if (!conf) { + goto out; + } + + GF_OPTION_RECONF("cache-timeout", conf->cache_timeout, options, int32, out); + + GF_OPTION_RECONF("quick-read-cache-invalidation", conf->qr_invalidation, + options, bool, out); + GF_OPTION_RECONF("ctime-invalidation", conf->ctime_invalidation, options, + bool, out); + + GF_OPTION_RECONF("cache-size", cache_size_new, options, size_uint64, out); + if (!check_cache_size_ok(this, cache_size_new)) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, QUICK_READ_MSG_INVALID_CONFIG, + "Not reconfiguring cache-size"); + goto out; + } + conf->cache_size = cache_size_new; + + ret = 0; out: - if (need_unwind) { - QR_STACK_UNWIND (lk, frame, op_ret, op_errno, NULL); - } else if (can_wind) { - STACK_WIND (frame, qr_lk_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lk, fd, cmd, lock); - } else if (need_open) { - op_ret = qr_loc_fill (&loc, fd->inode, path); - if (op_ret == -1) { - qr_resume_pending_ops (qr_fd_ctx); - goto out; - } - - STACK_WIND (frame, qr_open_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->open, &loc, flags, fd, - qr_fd_ctx->wbflags); - - qr_loc_wipe (&loc); - } - - return 0; + return ret; } - int32_t -qr_release (xlator_t *this, fd_t *fd) +qr_get_priority_list(const char *opt_str, struct list_head *first) { - qr_fd_ctx_t *qr_fd_ctx = NULL; - int32_t ret = 0; - uint64_t value = 0; - - ret = fd_ctx_del (fd, this, &value); - if (ret == 0) { - qr_fd_ctx = (qr_fd_ctx_t *)(long) value; - if (qr_fd_ctx) { - qr_fd_ctx_free (qr_fd_ctx); - } + int32_t max_pri = 1; + char *tmp_str = NULL; + char *tmp_str1 = NULL; + char *tmp_str2 = NULL; + char *dup_str = NULL; + char *priority_str = NULL; + char *pattern = NULL; + char *priority = NULL; + char *string = NULL; + struct qr_priority *curr = NULL, *tmp = NULL; + + GF_VALIDATE_OR_GOTO("quick-read", opt_str, out); + GF_VALIDATE_OR_GOTO("quick-read", first, out); + + string = gf_strdup(opt_str); + if (string == NULL) { + max_pri = -1; + goto out; + } + + /* Get the pattern for cache priority. + * "option priority *.jpg:1,abc*:2" etc + */ + /* TODO: inode_lru in table is statically hard-coded to 5, + * should be changed to run-time configuration + */ + priority_str = strtok_r(string, ",", &tmp_str); + while (priority_str) { + curr = GF_CALLOC(1, sizeof(*curr), gf_qr_mt_qr_priority_t); + if (curr == NULL) { + max_pri = -1; + goto out; + } + + list_add_tail(&curr->list, first); + + dup_str = gf_strdup(priority_str); + if (dup_str == NULL) { + max_pri = -1; + goto out; + } + + pattern = strtok_r(dup_str, ":", &tmp_str1); + if (!pattern) { + max_pri = -1; + goto out; + } + + priority = strtok_r(NULL, ":", &tmp_str1); + if (!priority) { + max_pri = -1; + goto out; + } + + gf_msg_trace("quick-read", 0, + "quick-read priority : pattern %s : priority %s", pattern, + priority); + + curr->pattern = gf_strdup(pattern); + if (curr->pattern == NULL) { + max_pri = -1; + goto out; + } + + curr->priority = strtol(priority, &tmp_str2, 0); + if (tmp_str2 && (*tmp_str2)) { + max_pri = -1; + goto out; + } else { + max_pri = max(max_pri, curr->priority); } - return 0; -} - + GF_FREE(dup_str); + dup_str = NULL; -int32_t -qr_forget (xlator_t *this, inode_t *inode) -{ - qr_inode_t *qr_inode = NULL; - uint64_t value = 0; - int32_t ret = -1; - qr_private_t *priv = NULL; + priority_str = strtok_r(NULL, ",", &tmp_str); + } +out: + GF_FREE(string); - priv = this->private; + GF_FREE(dup_str); - LOCK (&priv->table.lock); + if (max_pri == -1) { + list_for_each_entry_safe(curr, tmp, first, list) { - ret = inode_ctx_del (inode, this, &value); - if (ret == 0) { - qr_inode = (qr_inode_t *)(long) value; - __qr_inode_free (qr_inode); - } + list_del_init(&curr->list); + GF_FREE(curr->pattern); + GF_FREE(curr); } - UNLOCK (&priv->table.lock); + } - return 0; + return max_pri; } -int -qr_priv_dump (xlator_t *this) +int32_t +qr_init(xlator_t *this) { - qr_conf_t *conf = NULL; - char key[GF_DUMP_MAX_BUF_LEN]; - char key_prefix[GF_DUMP_MAX_BUF_LEN]; - qr_private_t *priv = NULL; - qr_inode_table_t *table = NULL; - uint32_t file_count = 0; - uint32_t i = 0; - qr_inode_t *curr = NULL; - uint64_t total_size = 0; - - if (!this) - return -1; - - priv = this->private; - conf = &priv->conf; - - if (!conf) { - gf_log (this->name, GF_LOG_WARNING, - "conf null in xlator"); - return -1; - } - - table = &priv->table; - + int32_t ret = -1, i = 0; + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + + if (!this->children || this->children->next) { + gf_msg(this->name, GF_LOG_ERROR, 0, + QUICK_READ_MSG_XLATOR_CHILD_MISCONFIGURED, + "FATAL: volume (%s) not configured with exactly one " + "child", + this->name); + return -1; + } + + if (!this->parents) { + gf_msg(this->name, GF_LOG_WARNING, 0, QUICK_READ_MSG_VOL_MISCONFIGURED, + "dangling volume. check volfile "); + } + + priv = GF_CALLOC(1, sizeof(*priv), gf_qr_mt_qr_private_t); + if (priv == NULL) { + ret = -1; + goto out; + } + + LOCK_INIT(&priv->table.lock); + conf = &priv->conf; + + GF_OPTION_INIT("max-file-size", conf->max_file_size, size_uint64, out); + + GF_OPTION_INIT("cache-timeout", conf->cache_timeout, int32, out); + + GF_OPTION_INIT("quick-read-cache-invalidation", conf->qr_invalidation, bool, + out); + + GF_OPTION_INIT("cache-size", conf->cache_size, size_uint64, out); + if (!check_cache_size_ok(this, conf->cache_size)) { + ret = -1; + goto out; + } + + GF_OPTION_INIT("ctime-invalidation", conf->ctime_invalidation, bool, out); + + INIT_LIST_HEAD(&conf->priority_list); + conf->max_pri = 1; + if (dict_get(this->options, "priority")) { + char *option_list = data_to_str(dict_get(this->options, "priority")); + gf_msg_trace(this->name, 0, "option path %s", option_list); + /* parse the list of pattern:priority */ + conf->max_pri = qr_get_priority_list(option_list, &conf->priority_list); + + if (conf->max_pri == -1) { + goto out; + } + conf->max_pri++; + } + + priv->table.lru = GF_CALLOC(conf->max_pri, sizeof(*priv->table.lru), + gf_common_mt_list_head); + if (priv->table.lru == NULL) { + ret = -1; + goto out; + } + + for (i = 0; i < conf->max_pri; i++) { + INIT_LIST_HEAD(&priv->table.lru[i]); + } + + ret = 0; + + priv->last_child_down = gf_time(); + GF_ATOMIC_INIT(priv->generation, 0); + this->private = priv; +out: + if ((ret == -1) && priv) { + GF_FREE(priv); + } - gf_proc_dump_build_key (key_prefix, - "xlator.performance.quick-read", - "priv"); + return ret; +} - gf_proc_dump_add_section (key_prefix); +void +qr_inode_table_destroy(qr_private_t *priv) +{ + int i = 0; + qr_conf_t *conf = NULL; - gf_proc_dump_build_key (key, key_prefix, "max_file_size"); - gf_proc_dump_write (key, "%d", conf->max_file_size); - gf_proc_dump_build_key (key, key_prefix, "cache_timeout"); - gf_proc_dump_write (key, "%d", conf->cache_timeout); + conf = &priv->conf; - if (!table) { - gf_log (this->name, GF_LOG_WARNING, - "table is NULL"); - goto out; - } else { - for (i = 0; i < conf->max_pri; i++) { - list_for_each_entry (curr, &table->lru[i], lru) { - file_count++; - total_size += curr->stbuf.ia_size; - } - } + for (i = 0; i < conf->max_pri; i++) { + /* There is a known leak of inodes, hence until + * that is fixed, log the assert as warning. + GF_ASSERT (list_empty (&priv->table.lru[i]));*/ + if (!list_empty(&priv->table.lru[i])) { + gf_msg("quick-read", GF_LOG_INFO, 0, QUICK_READ_MSG_LRU_NOT_EMPTY, + "quick read inode table lru not empty"); } + } - gf_proc_dump_build_key (key, key_prefix, "total_files_cached"); - gf_proc_dump_write (key, "%d", file_count); - gf_proc_dump_build_key (key, key_prefix, "total_cache_used"); - gf_proc_dump_write (key, "%d", total_size); + LOCK_DESTROY(&priv->table.lock); -out: - return 0; + return; } -int32_t -mem_acct_init (xlator_t *this) +void +qr_conf_destroy(qr_conf_t *conf) { - int ret = -1; - - if (!this) - return ret; + struct qr_priority *curr = NULL, *tmp = NULL; - ret = xlator_mem_acct_init (this, gf_qr_mt_end + 1); - - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - return ret; - } + list_for_each_entry_safe(curr, tmp, &conf->priority_list, list) + { + list_del(&curr->list); + GF_FREE(curr->pattern); + GF_FREE(curr); + } - return ret; + return; } - -int32_t -qr_get_priority_list (const char *opt_str, struct list_head *first) +void +qr_update_child_down_time(xlator_t *this, time_t now) { - int32_t max_pri = 1; - char *tmp_str = NULL; - char *tmp_str1 = NULL; - char *tmp_str2 = NULL; - char *dup_str = NULL; - char *priority_str = NULL; - char *pattern = NULL; - char *priority = NULL; - char *string = NULL; - struct qr_priority *curr = NULL, *tmp = NULL; - - string = gf_strdup (opt_str); - if (string == NULL) { - max_pri = -1; - goto out; - } - - /* Get the pattern for cache priority. - * "option priority *.jpg:1,abc*:2" etc - */ - /* TODO: inode_lru in table is statically hard-coded to 5, - * should be changed to run-time configuration - */ - priority_str = strtok_r (string, ",", &tmp_str); - while (priority_str) { - curr = GF_CALLOC (1, sizeof (*curr), gf_qr_mt_qr_priority_t); - if (curr == NULL) { - max_pri = -1; - goto out; - } - - list_add_tail (&curr->list, first); - - dup_str = gf_strdup (priority_str); - if (dup_str == NULL) { - max_pri = -1; - goto out; - } - - pattern = strtok_r (dup_str, ":", &tmp_str1); - if (!pattern) { - max_pri = -1; - goto out; - } - - priority = strtok_r (NULL, ":", &tmp_str1); - if (!priority) { - max_pri = -1; - goto out; - } - - gf_log ("quick-read", GF_LOG_TRACE, - "quick-read priority : pattern %s : priority %s", - pattern, - priority); - - curr->pattern = gf_strdup (pattern); - if (curr->pattern == NULL) { - max_pri = -1; - goto out; - } - - curr->priority = strtol (priority, &tmp_str2, 0); - if (tmp_str2 && (*tmp_str2)) { - max_pri = -1; - goto out; - } else { - max_pri = max (max_pri, curr->priority); - } - - GF_FREE (dup_str); - dup_str = NULL; - - priority_str = strtok_r (NULL, ",", &tmp_str); - } -out: - if (string != NULL) { - GF_FREE (string); - } + qr_private_t *priv = NULL; - if (dup_str != NULL) { - GF_FREE (dup_str); - } + priv = this->private; - if (max_pri == -1) { - list_for_each_entry_safe (curr, tmp, first, list) { - list_del_init (&curr->list); - GF_FREE (curr->pattern); - GF_FREE (curr); - } - } - - return max_pri; + LOCK(&priv->lock); + { + priv->last_child_down = now; + } + UNLOCK(&priv->lock); } - -int32_t -init (xlator_t *this) +static int +qr_invalidate(xlator_t *this, void *data) { - char *str = NULL; - int32_t ret = -1, i = 0; - qr_private_t *priv = NULL; - qr_conf_t *conf = NULL; - - if (!this->children || this->children->next) { - gf_log (this->name, GF_LOG_ERROR, - "FATAL: volume (%s) not configured with exactly one " - "child", this->name); - return -1; - } - - if (!this->parents) { - gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile "); - } - - priv = GF_CALLOC (1, sizeof (*priv), - gf_qr_mt_qr_private_t); - if (priv == NULL) { - gf_log (this->name, GF_LOG_ERROR, - "out of memory"); - ret = -1; - goto out; - } + struct gf_upcall *up_data = NULL; + struct gf_upcall_cache_invalidation *up_ci = NULL; + inode_t *inode = NULL; + int ret = 0; + inode_table_t *itable = NULL; + qr_private_t *priv = NULL; - LOCK_INIT (&priv->table.lock); - conf = &priv->conf; - conf->max_file_size = 65536; - ret = dict_get_str (this->options, "max-file-size", - &str); - if (ret == 0) { - ret = gf_string2bytesize (str, &conf->max_file_size); - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number format \"%s\" of \"option " - "max-file-size\"", - str); - ret = -1; - goto out; - } - } + up_data = (struct gf_upcall *)data; - conf->cache_timeout = 1; - ret = dict_get_str (this->options, "cache-timeout", &str); - if (ret == 0) { - ret = gf_string2uint_base10 (str, - (unsigned int *)&conf->cache_timeout); - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid cache-timeout value %s", str); - ret = -1; - goto out; - } - } - - conf->cache_size = QR_DEFAULT_CACHE_SIZE; - ret = dict_get_str (this->options, "cache-size", &str); - if (ret == 0) { - ret = gf_string2bytesize (str, &conf->cache_size); - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid cache-size value %s", str); - ret = -1; - goto out; - } - } + if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION) + goto out; - INIT_LIST_HEAD (&conf->priority_list); - conf->max_pri = 1; - if (dict_get (this->options, "priority")) { - char *option_list = data_to_str (dict_get (this->options, - "priority")); - gf_log (this->name, GF_LOG_TRACE, - "option path %s", option_list); - /* parse the list of pattern:priority */ - conf->max_pri = qr_get_priority_list (option_list, - &conf->priority_list); - - if (conf->max_pri == -1) { - goto out; - } - conf->max_pri ++; - } - - priv->table.lru = GF_CALLOC (conf->max_pri, - sizeof (*priv->table.lru), - gf_common_mt_list_head); - if (priv->table.lru == NULL) { - ret = -1; - gf_log (this->name, GF_LOG_ERROR, "out of memory"); - goto out; - } + priv = this->private; + up_ci = (struct gf_upcall_cache_invalidation *)up_data->data; - for (i = 0; i < conf->max_pri; i++) { - INIT_LIST_HEAD (&priv->table.lru[i]); + if (up_ci && (up_ci->flags & UP_WRITE_FLAGS)) { + GF_ATOMIC_INC(priv->qr_counter.file_data_invals); + itable = ((xlator_t *)this->graph->top)->itable; + inode = inode_find(itable, up_data->gfid); + if (!inode) { + ret = -1; + goto out; } + qr_inode_prune(this, inode, qr_get_generation(this, inode)); + } - ret = 0; - - this->private = priv; out: - if ((ret == -1) && priv) { - GF_FREE (priv); - } + if (inode) + inode_unref(inode); - return ret; + return ret; } +int +qr_notify(xlator_t *this, int event, void *data, ...) +{ + int ret = 0; + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + + priv = this->private; + conf = &priv->conf; + + switch (event) { + case GF_EVENT_CHILD_DOWN: + case GF_EVENT_SOME_DESCENDENT_DOWN: + qr_update_child_down_time(this, gf_time()); + break; + case GF_EVENT_UPCALL: + if (conf->qr_invalidation) + ret = qr_invalidate(this, data); + break; + default: + break; + } + + if (default_notify(this, event, data) != 0) + ret = -1; + + return ret; +} void -fini (xlator_t *this) +qr_fini(xlator_t *this) { - return; -} + qr_private_t *priv = NULL; + if (this == NULL) { + goto out; + } -struct xlator_fops fops = { - .lookup = qr_lookup, - .open = qr_open, - .readv = qr_readv, - .writev = qr_writev, - .fstat = qr_fstat, - .fsetxattr = qr_fsetxattr, - .fgetxattr = qr_fgetxattr, - .flush = qr_flush, - .fentrylk = qr_fentrylk, - .finodelk = qr_finodelk, - .fsync = qr_fsync, - .ftruncate = qr_ftruncate, - .lk = qr_lk, - .fsetattr = qr_fsetattr, -}; + priv = this->private; + if (priv == NULL) { + goto out; + } + qr_inode_table_destroy(priv); + qr_conf_destroy(&priv->conf); + this->private = NULL; + + GF_FREE(priv); +out: + return; +} -struct xlator_cbks cbks = { - .forget = qr_forget, - .release = qr_release, +struct xlator_fops qr_fops = {.lookup = qr_lookup, + .readdirp = qr_readdirp, + .open = qr_open, + .readv = qr_readv, + .writev = qr_writev, + .truncate = qr_truncate, + .ftruncate = qr_ftruncate, + .fallocate = qr_fallocate, + .discard = qr_discard, + .zerofill = qr_zerofill}; + +struct xlator_cbks qr_cbks = { + .forget = qr_forget, }; -struct xlator_dumpops dumpops = { - .priv = qr_priv_dump, +struct xlator_dumpops qr_dumpops = { + .priv = qr_priv_dump, + .inodectx = qr_inodectx_dump, }; -struct volume_options options[] = { - { .key = {"priority"}, - .type = GF_OPTION_TYPE_ANY - }, - { .key = {"cache-size"}, - .type = GF_OPTION_TYPE_SIZET, - .min = 0, - .max = 6 * GF_UNIT_GB, - }, - { .key = {"cache-timeout"}, - .type = GF_OPTION_TYPE_INT, - .min = 1, - .max = 60 - }, - { .key = {"max-file-size"}, - .type = GF_OPTION_TYPE_SIZET, - .min = 0, - .max = 1 * GF_UNIT_KB * 1000, - }, +struct volume_options qr_options[] = { + { + .key = {"quick-read"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable quick-read", + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE, + }, + {.key = {"priority"}, .type = GF_OPTION_TYPE_ANY}, + {.key = {"cache-size"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 0, + .max = INFINITY, + .default_value = "128MB", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .description = "Size of small file read cache."}, + { + .key = {"cache-timeout"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "1", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + }, + { + .key = {"max-file-size"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 0, + .max = 1 * GF_UNIT_KB * 1000, + .default_value = "64KB", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + }, + { + .key = {"quick-read-cache-invalidation"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .op_version = {GD_OP_VERSION_4_0_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .description = "When \"on\", invalidates/updates the metadata cache," + " on receiving the cache-invalidation notifications", + }, + { + .key = {"ctime-invalidation"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .op_version = {GD_OP_VERSION_5_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .description = "Quick-read by default uses mtime to identify changes " + "to file data. However there are applications like " + "rsync which explicitly set mtime making it unreliable " + "for the purpose of identifying change in file content " + ". Since ctime also changes when content of a file " + " changes and it cannot be set explicitly, it becomes " + " suitable for identifying staleness of cached data. " + "This option makes quick-read to prefer ctime over " + "mtime to validate its cache. However, using ctime " + "can result in false positives as ctime changes with " + "just attribute changes like permission without " + "changes to file data. So, use this only when mtime " + "is not reliable", + }, + {.key = {NULL}}}; + +xlator_api_t xlator_api = { + .init = qr_init, + .fini = qr_fini, + .notify = qr_notify, + .reconfigure = qr_reconfigure, + .mem_acct_init = qr_mem_acct_init, + .dump_metrics = qr_dump_metrics, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &qr_dumpops, + .fops = &qr_fops, + .cbks = &qr_cbks, + .options = qr_options, + .identifier = "quick-read", + .category = GF_MAINTAINED, }; |
