diff options
Diffstat (limited to 'xlators/performance/quick-read')
| -rw-r--r-- | xlators/performance/quick-read/Makefile.am | 3 | ||||
| -rw-r--r-- | xlators/performance/quick-read/src/Makefile.am | 16 | ||||
| -rw-r--r-- | xlators/performance/quick-read/src/quick-read-mem-types.h | 23 | ||||
| -rw-r--r-- | xlators/performance/quick-read/src/quick-read-messages.h | 31 | ||||
| -rw-r--r-- | xlators/performance/quick-read/src/quick-read.c | 1644 | ||||
| -rw-r--r-- | xlators/performance/quick-read/src/quick-read.h | 91 |
6 files changed, 1808 insertions, 0 deletions
diff --git a/xlators/performance/quick-read/Makefile.am b/xlators/performance/quick-read/Makefile.am new file mode 100644 index 00000000000..d471a3f9243 --- /dev/null +++ b/xlators/performance/quick-read/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/performance/quick-read/src/Makefile.am b/xlators/performance/quick-read/src/Makefile.am new file mode 100644 index 00000000000..8eb6cece738 --- /dev/null +++ b/xlators/performance/quick-read/src/Makefile.am @@ -0,0 +1,16 @@ +xlator_LTLIBRARIES = quick-read.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/performance + +quick_read_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) + +quick_read_la_SOURCES = quick-read.c +quick_read_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +noinst_HEADERS = quick-read.h quick-read-mem-types.h quick-read-messages.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +CLEANFILES = diff --git a/xlators/performance/quick-read/src/quick-read-mem-types.h b/xlators/performance/quick-read/src/quick-read-mem-types.h new file mode 100644 index 00000000000..e4aef8549ff --- /dev/null +++ b/xlators/performance/quick-read/src/quick-read-mem-types.h @@ -0,0 +1,23 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __QR_MEM_TYPES_H__ +#define __QR_MEM_TYPES_H__ + +#include <glusterfs/mem-types.h> + +enum gf_qr_mem_types_ { + gf_qr_mt_qr_inode_t = gf_common_mt_end + 1, + gf_qr_mt_content_t, + gf_qr_mt_qr_priority_t, + gf_qr_mt_qr_private_t, + gf_qr_mt_end +}; +#endif diff --git a/xlators/performance/quick-read/src/quick-read-messages.h b/xlators/performance/quick-read/src/quick-read-messages.h new file mode 100644 index 00000000000..da9724a3c9c --- /dev/null +++ b/xlators/performance/quick-read/src/quick-read-messages.h @@ -0,0 +1,31 @@ +/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _QUICK_READ_MESSAGES_H_ +#define _QUICK_READ_MESSAGES_H_ + +#include <glusterfs/glfs-message-id.h> + +/* To add new message IDs, append new identifiers at the end of the list. + * + * Never remove a message ID. If it's not used anymore, you can rename it or + * leave it as it is, but not delete it. This is to prevent reutilization of + * IDs by other messages. + * + * The component name must match one of the entries defined in + * glfs-message-id.h. + */ + +GLFS_MSGID(QUICK_READ, QUICK_READ_MSG_ENFORCEMENT_FAILED, + QUICK_READ_MSG_INVALID_ARGUMENT, + QUICK_READ_MSG_XLATOR_CHILD_MISCONFIGURED, QUICK_READ_MSG_NO_MEMORY, + QUICK_READ_MSG_VOL_MISCONFIGURED, QUICK_READ_MSG_DICT_SET_FAILED, + QUICK_READ_MSG_INVALID_CONFIG, QUICK_READ_MSG_LRU_NOT_EMPTY); + +#endif /* _QUICK_READ_MESSAGES_H_ */ diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c new file mode 100644 index 00000000000..7fe4b3c3a4b --- /dev/null +++ b/xlators/performance/quick-read/src/quick-read.c @@ -0,0 +1,1644 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#include <math.h> +#include "quick-read.h" +#include <glusterfs/statedump.h> +#include "quick-read-messages.h" +#include <glusterfs/upcall-utils.h> +#include <glusterfs/atomic.h> + +typedef struct qr_local { + inode_t *inode; + uint64_t incident_gen; + fd_t *fd; +} qr_local_t; + +qr_inode_t * +qr_inode_ctx_get(xlator_t *this, inode_t *inode); + +void +__qr_inode_prune_data(xlator_t *this, qr_inode_table_t *table, + qr_inode_t *qr_inode); + +void +qr_local_wipe(qr_local_t *local) +{ + if (!local) + goto out; + + if (local->inode) + inode_unref(local->inode); + + if (local->fd) + fd_unref(local->fd); + + GF_FREE(local); +out: + return; +} + +uint64_t +__qr_get_generation(xlator_t *this, qr_inode_t *qr_inode) +{ + uint64_t gen = 0, rollover; + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + + priv = this->private; + table = &priv->table; + + gen = GF_ATOMIC_INC(priv->generation); + if (gen == 0) { + qr_inode->gen_rollover = !qr_inode->gen_rollover; + gen = GF_ATOMIC_INC(priv->generation); + __qr_inode_prune_data(this, table, qr_inode); + qr_inode->gen = qr_inode->invalidation_time = gen - 1; + } + + rollover = qr_inode->gen_rollover; + gen |= (rollover << 32); + return gen; +} + +uint64_t +qr_get_generation(xlator_t *this, inode_t *inode) +{ + qr_inode_t *qr_inode = NULL; + uint64_t gen = 0; + qr_inode_table_t *table = NULL; + qr_private_t *priv = NULL; + + priv = this->private; + table = &priv->table; + + qr_inode = qr_inode_ctx_get(this, inode); + + if (qr_inode) { + LOCK(&table->lock); + { + gen = __qr_get_generation(this, qr_inode); + } + UNLOCK(&table->lock); + } else { + gen = GF_ATOMIC_INC(priv->generation); + if (gen == 0) { + gen = GF_ATOMIC_INC(priv->generation); + } + } + + return gen; +} + +qr_local_t * +qr_local_get(xlator_t *this, inode_t *inode) +{ + qr_local_t *local = NULL; + + local = GF_CALLOC(1, sizeof(*local), gf_common_mt_char); + if (!local) + goto out; + + local->incident_gen = qr_get_generation(this, inode); +out: + return local; +} + +#define QR_STACK_UNWIND(fop, frame, params...) \ + do { \ + qr_local_t *__local = NULL; \ + if (frame) { \ + __local = frame->local; \ + frame->local = NULL; \ + } \ + STACK_UNWIND_STRICT(fop, frame, params); \ + qr_local_wipe(__local); \ + } while (0) + +void +__qr_inode_prune(xlator_t *this, qr_inode_table_t *table, qr_inode_t *qr_inode, + uint64_t gen); + +int +__qr_inode_ctx_set(xlator_t *this, inode_t *inode, qr_inode_t *qr_inode) +{ + uint64_t value = 0; + int ret = -1; + + value = (long)qr_inode; + + ret = __inode_ctx_set(inode, this, &value); + + return ret; +} + +qr_inode_t * +__qr_inode_ctx_get(xlator_t *this, inode_t *inode) +{ + qr_inode_t *qr_inode = NULL; + uint64_t value = 0; + int ret = -1; + + ret = __inode_ctx_get(inode, this, &value); + if (ret) + return NULL; + + qr_inode = (void *)((long)value); + + return qr_inode; +} + +qr_inode_t * +qr_inode_ctx_get(xlator_t *this, inode_t *inode) +{ + qr_inode_t *qr_inode = NULL; + + if (inode == NULL) + goto out; + + LOCK(&inode->lock); + { + qr_inode = __qr_inode_ctx_get(this, inode); + } + UNLOCK(&inode->lock); + +out: + return qr_inode; +} + +qr_inode_t * +qr_inode_new(xlator_t *this, inode_t *inode) +{ + qr_inode_t *qr_inode = NULL; + + qr_inode = GF_CALLOC(1, sizeof(*qr_inode), gf_qr_mt_qr_inode_t); + if (!qr_inode) + return NULL; + + INIT_LIST_HEAD(&qr_inode->lru); + + qr_inode->priority = 0; /* initial priority */ + + return qr_inode; +} + +qr_inode_t * +qr_inode_ctx_get_or_new(xlator_t *this, inode_t *inode) +{ + qr_inode_t *qr_inode = NULL; + int ret = -1; + qr_private_t *priv = NULL; + + priv = this->private; + + LOCK(&inode->lock); + { + qr_inode = __qr_inode_ctx_get(this, inode); + if (qr_inode) + goto unlock; + + qr_inode = qr_inode_new(this, inode); + if (!qr_inode) + goto unlock; + + ret = __qr_inode_ctx_set(this, inode, qr_inode); + if (ret) { + __qr_inode_prune(this, &priv->table, qr_inode, 0); + GF_FREE(qr_inode); + qr_inode = NULL; + } + } +unlock: + UNLOCK(&inode->lock); + + return qr_inode; +} + +uint32_t +qr_get_priority(qr_conf_t *conf, const char *path) +{ + uint32_t priority = 0; + struct qr_priority *curr = NULL; + + list_for_each_entry(curr, &conf->priority_list, list) + { + if (fnmatch(curr->pattern, path, FNM_NOESCAPE) == 0) + priority = curr->priority; + } + + return priority; +} + +void +__qr_inode_register(xlator_t *this, qr_inode_table_t *table, + qr_inode_t *qr_inode) +{ + qr_private_t *priv = NULL; + + if (!qr_inode->data) + return; + + priv = this->private; + if (!priv) + return; + + if (list_empty(&qr_inode->lru)) + /* first time addition of this qr_inode into table */ + table->cache_used += qr_inode->size; + else + list_del_init(&qr_inode->lru); + + list_add_tail(&qr_inode->lru, &table->lru[qr_inode->priority]); + + GF_ATOMIC_INC(priv->qr_counter.files_cached); + + return; +} + +void +qr_inode_set_priority(xlator_t *this, inode_t *inode, const char *path) +{ + uint32_t priority = 0; + qr_inode_table_t *table = NULL; + qr_inode_t *qr_inode = NULL; + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + + qr_inode = qr_inode_ctx_get(this, inode); + if (!qr_inode) + return; + + priv = this->private; + table = &priv->table; + conf = &priv->conf; + + if (path) + priority = qr_get_priority(conf, path); + else + /* retain existing priority, just bump LRU */ + priority = qr_inode->priority; + + LOCK(&table->lock); + { + qr_inode->priority = priority; + + __qr_inode_register(this, table, qr_inode); + } + UNLOCK(&table->lock); +} + +void +__qr_inode_prune_data(xlator_t *this, qr_inode_table_t *table, + qr_inode_t *qr_inode) +{ + qr_private_t *priv = NULL; + + priv = this->private; + + GF_FREE(qr_inode->data); + qr_inode->data = NULL; + + if (!list_empty(&qr_inode->lru)) { + table->cache_used -= qr_inode->size; + qr_inode->size = 0; + + list_del_init(&qr_inode->lru); + + GF_ATOMIC_DEC(priv->qr_counter.files_cached); + } + + memset(&qr_inode->buf, 0, sizeof(qr_inode->buf)); +} + +/* To be called with priv->table.lock held */ +void +__qr_inode_prune(xlator_t *this, qr_inode_table_t *table, qr_inode_t *qr_inode, + uint64_t gen) +{ + __qr_inode_prune_data(this, table, qr_inode); + if (gen) + qr_inode->gen = gen; + qr_inode->invalidation_time = __qr_get_generation(this, qr_inode); +} + +void +qr_inode_prune(xlator_t *this, inode_t *inode, uint64_t gen) +{ + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + qr_inode_t *qr_inode = NULL; + + qr_inode = qr_inode_ctx_get(this, inode); + if (!qr_inode) + return; + + priv = this->private; + table = &priv->table; + + LOCK(&table->lock); + { + __qr_inode_prune(this, table, qr_inode, gen); + } + UNLOCK(&table->lock); +} + +/* To be called with priv->table.lock held */ +void +__qr_cache_prune(xlator_t *this, qr_inode_table_t *table, qr_conf_t *conf) +{ + qr_inode_t *curr = NULL; + qr_inode_t *next = NULL; + int index = 0; + size_t size_pruned = 0; + + for (index = 0; index < conf->max_pri; index++) { + list_for_each_entry_safe(curr, next, &table->lru[index], lru) + { + size_pruned += curr->size; + + __qr_inode_prune(this, table, curr, 0); + + if (table->cache_used < conf->cache_size) + return; + } + } + + return; +} + +void +qr_cache_prune(xlator_t *this) +{ + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + qr_inode_table_t *table = NULL; + + priv = this->private; + table = &priv->table; + conf = &priv->conf; + + LOCK(&table->lock); + { + if (table->cache_used > conf->cache_size) + __qr_cache_prune(this, table, conf); + } + UNLOCK(&table->lock); +} + +void * +qr_content_extract(dict_t *xdata) +{ + data_t *data = NULL; + void *content = NULL; + int ret = 0; + + ret = dict_get_with_ref(xdata, GF_CONTENT_KEY, &data); + if (ret < 0 || !data) + return NULL; + + content = GF_MALLOC(data->len, gf_qr_mt_content_t); + if (!content) + goto out; + + memcpy(content, data->data, data->len); + +out: + data_unref(data); + return content; +} + +void +qr_content_update(xlator_t *this, qr_inode_t *qr_inode, void *data, + struct iatt *buf, uint64_t gen) +{ + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + uint32_t rollover = 0; + + rollover = gen >> 32; + gen = gen & 0xffffffff; + + priv = this->private; + table = &priv->table; + + LOCK(&table->lock); + { + if ((rollover != qr_inode->gen_rollover) || + (gen && qr_inode->gen && (qr_inode->gen >= gen))) + goto unlock; + + if ((qr_inode->data == NULL) && (qr_inode->invalidation_time >= gen)) + goto unlock; + + __qr_inode_prune(this, table, qr_inode, gen); + + qr_inode->data = data; + data = NULL; + qr_inode->size = buf->ia_size; + + qr_inode->ia_mtime = buf->ia_mtime; + qr_inode->ia_mtime_nsec = buf->ia_mtime_nsec; + qr_inode->ia_ctime = buf->ia_ctime; + qr_inode->ia_ctime_nsec = buf->ia_ctime_nsec; + + qr_inode->buf = *buf; + qr_inode->last_refresh = gf_time(); + + __qr_inode_register(this, table, qr_inode); + } +unlock: + UNLOCK(&table->lock); + + if (data) + GF_FREE(data); + + qr_cache_prune(this); +} + +gf_boolean_t +qr_size_fits(qr_conf_t *conf, struct iatt *buf) +{ + return (buf->ia_size <= conf->max_file_size); +} + +gf_boolean_t +qr_mtime_equal(qr_inode_t *qr_inode, struct iatt *buf) +{ + return (qr_inode->ia_mtime == buf->ia_mtime && + qr_inode->ia_mtime_nsec == buf->ia_mtime_nsec); +} + +gf_boolean_t +qr_ctime_equal(qr_inode_t *qr_inode, struct iatt *buf) +{ + return (qr_inode->ia_ctime == buf->ia_ctime && + qr_inode->ia_ctime_nsec == buf->ia_ctime_nsec); +} + +gf_boolean_t +qr_time_equal(qr_conf_t *conf, qr_inode_t *qr_inode, struct iatt *buf) +{ + if (conf->ctime_invalidation) + return qr_ctime_equal(qr_inode, buf); + else + return qr_mtime_equal(qr_inode, buf); +} + +void +__qr_content_refresh(xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf, + uint64_t gen) +{ + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + qr_conf_t *conf = NULL; + uint32_t rollover = 0; + + rollover = gen >> 32; + gen = gen & 0xffffffff; + + priv = this->private; + table = &priv->table; + conf = &priv->conf; + + /* allow for rollover of frame->root->unique */ + if ((rollover != qr_inode->gen_rollover) || + (gen && qr_inode->gen && (qr_inode->gen >= gen))) + goto done; + + if ((qr_inode->data == NULL) && (qr_inode->invalidation_time >= gen)) + goto done; + + qr_inode->gen = gen; + + if (qr_size_fits(conf, buf) && qr_time_equal(conf, qr_inode, buf)) { + qr_inode->buf = *buf; + qr_inode->last_refresh = gf_time(); + __qr_inode_register(this, table, qr_inode); + } else { + __qr_inode_prune(this, table, qr_inode, gen); + } + +done: + return; +} + +void +qr_content_refresh(xlator_t *this, qr_inode_t *qr_inode, struct iatt *buf, + uint64_t gen) +{ + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + + priv = this->private; + table = &priv->table; + + LOCK(&table->lock); + { + __qr_content_refresh(this, qr_inode, buf, gen); + } + UNLOCK(&table->lock); +} + +gf_boolean_t +__qr_cache_is_fresh(xlator_t *this, qr_inode_t *qr_inode) +{ + qr_conf_t *conf = NULL; + qr_private_t *priv = NULL; + + priv = this->private; + conf = &priv->conf; + + if (qr_inode->last_refresh < priv->last_child_down) + return _gf_false; + + if (gf_time() - qr_inode->last_refresh >= conf->cache_timeout) + return _gf_false; + + return _gf_true; +} + +int +qr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, inode_t *inode_ret, struct iatt *buf, + dict_t *xdata, struct iatt *postparent) +{ + void *content = NULL; + qr_inode_t *qr_inode = NULL; + inode_t *inode = NULL; + qr_local_t *local = NULL; + + local = frame->local; + inode = local->inode; + + if (op_ret == -1) { + qr_inode_prune(this, inode, local->incident_gen); + goto out; + } + + if (dict_get(xdata, GLUSTERFS_BAD_INODE)) { + qr_inode_prune(this, inode, local->incident_gen); + goto out; + } + + if (dict_get(xdata, "sh-failed")) { + qr_inode_prune(this, inode, local->incident_gen); + goto out; + } + + content = qr_content_extract(xdata); + + if (content) { + /* new content came along, always replace old content */ + qr_inode = qr_inode_ctx_get_or_new(this, inode); + if (!qr_inode) { + /* no harm done */ + GF_FREE(content); + goto out; + } + + qr_content_update(this, qr_inode, content, buf, local->incident_gen); + } else { + /* purge old content if necessary */ + qr_inode = qr_inode_ctx_get(this, inode); + if (!qr_inode) + /* usual path for large files */ + goto out; + + qr_content_refresh(this, qr_inode, buf, local->incident_gen); + } +out: + QR_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode_ret, buf, xdata, + postparent); + return 0; +} + +int +qr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + qr_inode_t *qr_inode = NULL; + int ret = -1; + dict_t *new_xdata = NULL; + qr_local_t *local = NULL; + + priv = this->private; + conf = &priv->conf; + local = qr_local_get(this, loc->inode); + local->inode = inode_ref(loc->inode); + frame->local = local; + + qr_inode = qr_inode_ctx_get(this, loc->inode); + if (qr_inode && qr_inode->data) + /* cached. only validate in qr_lookup_cbk */ + goto wind; + + if (!xdata) + xdata = new_xdata = dict_new(); + + if (!xdata) + goto wind; + + ret = 0; + if (conf->max_file_size) + ret = dict_set(xdata, GF_CONTENT_KEY, + data_from_uint64(conf->max_file_size)); + if (ret) + gf_msg(this->name, GF_LOG_WARNING, 0, QUICK_READ_MSG_DICT_SET_FAILED, + "cannot set key in request dict (%s)", loc->path); +wind: + STACK_WIND(frame, qr_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + + if (new_xdata) + dict_unref(new_xdata); + + return 0; +} + +int +qr_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, gf_dirent_t *entries, dict_t *xdata) +{ + gf_dirent_t *entry = NULL; + qr_inode_t *qr_inode = NULL; + qr_local_t *local = NULL; + + local = frame->local; + + if (op_ret <= 0) + goto unwind; + + list_for_each_entry(entry, &entries->list, list) + { + if (!entry->inode) + continue; + + qr_inode = qr_inode_ctx_get(this, entry->inode); + if (!qr_inode) + /* no harm */ + continue; + + qr_content_refresh(this, qr_inode, &entry->d_stat, local->incident_gen); + } + +unwind: + QR_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata); + return 0; +} + +int +qr_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, dict_t *xdata) +{ + qr_local_t *local = NULL; + + local = qr_local_get(this, NULL); + frame->local = local; + + STACK_WIND(frame, qr_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata); + return 0; +} + +int +qr_readv_cached(call_frame_t *frame, qr_inode_t *qr_inode, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + xlator_t *this = NULL; + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + int op_ret = -1; + struct iobuf *iobuf = NULL; + struct iobref *iobref = NULL; + struct iovec iov = { + 0, + }; + struct iatt buf = { + 0, + }; + + this = frame->this; + priv = this->private; + table = &priv->table; + + LOCK(&table->lock); + { + if (!qr_inode->data) + goto unlock; + + if (offset >= qr_inode->size) + goto unlock; + + if (!__qr_cache_is_fresh(this, qr_inode)) + goto unlock; + + op_ret = min(size, (qr_inode->size - offset)); + + iobuf = iobuf_get2(this->ctx->iobuf_pool, op_ret); + if (!iobuf) { + op_ret = -1; + goto unlock; + } + + iobref = iobref_new(); + if (!iobref) { + op_ret = -1; + goto unlock; + } + + iobref_add(iobref, iobuf); + + memcpy(iobuf->ptr, qr_inode->data + offset, op_ret); + + buf = qr_inode->buf; + + /* bump LRU */ + __qr_inode_register(frame->this, table, qr_inode); + } +unlock: + UNLOCK(&table->lock); + + if (op_ret >= 0) { + iov.iov_base = iobuf->ptr; + iov.iov_len = op_ret; + + GF_ATOMIC_INC(priv->qr_counter.cache_hit); + STACK_UNWIND_STRICT(readv, frame, op_ret, 0, &iov, 1, &buf, iobref, + xdata); + } else { + GF_ATOMIC_INC(priv->qr_counter.cache_miss); + } + + if (iobuf) + iobuf_unref(iobuf); + + if (iobref) + iobref_unref(iobref); + + return op_ret; +} + +int +qr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + qr_inode_t *qr_inode = NULL; + + qr_inode = qr_inode_ctx_get(this, fd->inode); + if (!qr_inode) + goto wind; + + if (qr_readv_cached(frame, qr_inode, size, offset, flags, xdata) < 0) + goto wind; + + return 0; +wind: + STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata); + return 0; +} + +int32_t +qr_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) +{ + qr_local_t *local = NULL; + + local = frame->local; + + qr_inode_prune(this, local->fd->inode, local->incident_gen); + + QR_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; +} + +int +qr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov, + int count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) +{ + qr_local_t *local = NULL; + + local = qr_local_get(this, fd->inode); + local->fd = fd_ref(fd); + + frame->local = local; + + STACK_WIND(frame, qr_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, iov, count, offset, flags, + iobref, xdata); + return 0; +} + +int32_t +qr_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + qr_local_t *local = NULL; + + local = frame->local; + qr_inode_prune(this, local->inode, local->incident_gen); + + QR_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; +} + +int +qr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + qr_local_t *local = NULL; + + local = qr_local_get(this, loc->inode); + local->inode = inode_ref(loc->inode); + frame->local = local; + + STACK_WIND(frame, qr_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, xdata); + return 0; +} + +int32_t +qr_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + qr_local_t *local = NULL; + + local = frame->local; + qr_inode_prune(this, local->fd->inode, local->incident_gen); + + QR_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, postbuf, xdata); + return 0; +} + +int +qr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + qr_local_t *local = NULL; + + local = qr_local_get(this, fd->inode); + local->fd = fd_ref(fd); + frame->local = local; + + STACK_WIND(frame, qr_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata); + return 0; +} + +int32_t +qr_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + qr_local_t *local = NULL; + + local = frame->local; + qr_inode_prune(this, local->fd->inode, local->incident_gen); + + QR_STACK_UNWIND(fallocate, frame, op_ret, op_errno, pre, post, xdata); + return 0; +} + +static int +qr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int keep_size, + off_t offset, size_t len, dict_t *xdata) +{ + qr_local_t *local = NULL; + + local = qr_local_get(this, fd->inode); + local->fd = fd_ref(fd); + frame->local = local; + + STACK_WIND(frame, qr_fallocate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset, len, + xdata); + return 0; +} + +int32_t +qr_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + qr_local_t *local = NULL; + + local = frame->local; + qr_inode_prune(this, local->fd->inode, local->incident_gen); + + QR_STACK_UNWIND(discard, frame, op_ret, op_errno, pre, post, xdata); + return 0; +} + +static int +qr_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + qr_local_t *local = NULL; + + local = qr_local_get(this, fd->inode); + local->fd = fd_ref(fd); + frame->local = local; + + STACK_WIND(frame, qr_discard_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata); + return 0; +} + +int32_t +qr_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *pre, + struct iatt *post, dict_t *xdata) +{ + qr_local_t *local = NULL; + + local = frame->local; + qr_inode_prune(this, local->fd->inode, local->incident_gen); + + QR_STACK_UNWIND(zerofill, frame, op_ret, op_errno, pre, post, xdata); + return 0; +} + +static int +qr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + off_t len, dict_t *xdata) +{ + qr_local_t *local = NULL; + + local = qr_local_get(this, fd->inode); + local->fd = fd_ref(fd); + frame->local = local; + + STACK_WIND(frame, qr_zerofill_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata); + return 0; +} + +int +qr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd, + dict_t *xdata) +{ + qr_inode_set_priority(this, fd->inode, loc->path); + + STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +} + +int +qr_forget(xlator_t *this, inode_t *inode) +{ + qr_inode_t *qr_inode = NULL; + + qr_inode = qr_inode_ctx_get(this, inode); + + if (!qr_inode) + return 0; + + qr_inode_prune(this, inode, qr_get_generation(this, inode)); + + GF_FREE(qr_inode); + + return 0; +} + +int32_t +qr_inodectx_dump(xlator_t *this, inode_t *inode) +{ + qr_inode_t *qr_inode = NULL; + int32_t ret = -1; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = { + 0, + }; + char buf[GF_TIMESTR_SIZE] = { + 0, + }; + + qr_inode = qr_inode_ctx_get(this, inode); + if (!qr_inode) + goto out; + + gf_proc_dump_build_key(key_prefix, "xlator.performance.quick-read", + "inodectx"); + gf_proc_dump_add_section("%s", key_prefix); + + gf_proc_dump_write("entire-file-cached", "%s", + qr_inode->data ? "yes" : "no"); + + if (qr_inode->last_refresh) { + gf_time_fmt(buf, sizeof buf, qr_inode->last_refresh, gf_timefmt_FT); + gf_proc_dump_write("last-cache-validation-time", "%s", buf); + } + + ret = 0; +out: + return ret; +} + +int +qr_priv_dump(xlator_t *this) +{ + qr_conf_t *conf = NULL; + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + uint32_t file_count = 0; + uint32_t i = 0; + qr_inode_t *curr = NULL; + uint64_t total_size = 0; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + + if (!this) { + return -1; + } + + priv = this->private; + conf = &priv->conf; + if (!conf) + return -1; + + table = &priv->table; + + gf_proc_dump_build_key(key_prefix, "xlator.performance.quick-read", "priv"); + + gf_proc_dump_add_section("%s", key_prefix); + + gf_proc_dump_write("max_file_size", "%" PRIu64, conf->max_file_size); + gf_proc_dump_write("cache_timeout", "%d", conf->cache_timeout); + + if (!table) { + goto out; + } else { + for (i = 0; i < conf->max_pri; i++) { + list_for_each_entry(curr, &table->lru[i], lru) + { + file_count++; + total_size += curr->size; + } + } + } + + gf_proc_dump_write("total_files_cached", "%d", file_count); + gf_proc_dump_write("total_cache_used", "%" PRIu64, total_size); + gf_proc_dump_write("cache-hit", "%" GF_PRI_ATOMIC, + GF_ATOMIC_GET(priv->qr_counter.cache_hit)); + gf_proc_dump_write("cache-miss", "%" GF_PRI_ATOMIC, + GF_ATOMIC_GET(priv->qr_counter.cache_miss)); + gf_proc_dump_write("cache-invalidations", "%" GF_PRI_ATOMIC, + GF_ATOMIC_GET(priv->qr_counter.file_data_invals)); + +out: + return 0; +} + +static int32_t +qr_dump_metrics(xlator_t *this, int fd) +{ + qr_private_t *priv = NULL; + qr_inode_table_t *table = NULL; + + priv = this->private; + table = &priv->table; + + dprintf(fd, "%s.total_files_cached %" PRId64 "\n", this->name, + GF_ATOMIC_GET(priv->qr_counter.files_cached)); + dprintf(fd, "%s.total_cache_used %" PRId64 "\n", this->name, + table->cache_used); + dprintf(fd, "%s.cache-hit %" PRId64 "\n", this->name, + GF_ATOMIC_GET(priv->qr_counter.cache_hit)); + dprintf(fd, "%s.cache-miss %" PRId64 "\n", this->name, + GF_ATOMIC_GET(priv->qr_counter.cache_miss)); + dprintf(fd, "%s.cache-invalidations %" PRId64 "\n", this->name, + GF_ATOMIC_GET(priv->qr_counter.file_data_invals)); + + return 0; +} + +int32_t +qr_mem_acct_init(xlator_t *this) +{ + int ret = -1; + + if (!this) + return ret; + + ret = xlator_mem_acct_init(this, gf_qr_mt_end + 1); + + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, QUICK_READ_MSG_NO_MEMORY, + "Memory accounting init failed"); + return ret; + } + + return ret; +} + +static gf_boolean_t +check_cache_size_ok(xlator_t *this, int64_t cache_size) +{ + int ret = _gf_true; + uint64_t total_mem = 0; + uint64_t max_cache_size = 0; + volume_option_t *opt = NULL; + + GF_ASSERT(this); + opt = xlator_volume_option_get(this, "cache-size"); + if (!opt) { + ret = _gf_false; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, + QUICK_READ_MSG_INVALID_ARGUMENT, + "could not get cache-size option"); + goto out; + } + + total_mem = get_mem_size(); + if (-1 == total_mem) + max_cache_size = opt->max; + else + max_cache_size = total_mem; + + gf_msg_debug(this->name, 0, "Max cache size is %" PRIu64, max_cache_size); + if (cache_size > max_cache_size) { + ret = _gf_false; + gf_msg(this->name, GF_LOG_ERROR, 0, QUICK_READ_MSG_INVALID_ARGUMENT, + "Cache size %" PRIu64 + " is greater than the max size of %" PRIu64, + cache_size, max_cache_size); + goto out; + } +out: + return ret; +} + +int +qr_reconfigure(xlator_t *this, dict_t *options) +{ + int32_t ret = -1; + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + uint64_t cache_size_new = 0; + + GF_VALIDATE_OR_GOTO("quick-read", this, out); + GF_VALIDATE_OR_GOTO(this->name, this->private, out); + GF_VALIDATE_OR_GOTO(this->name, options, out); + + priv = this->private; + + conf = &priv->conf; + if (!conf) { + goto out; + } + + GF_OPTION_RECONF("cache-timeout", conf->cache_timeout, options, int32, out); + + GF_OPTION_RECONF("quick-read-cache-invalidation", conf->qr_invalidation, + options, bool, out); + + GF_OPTION_RECONF("ctime-invalidation", conf->ctime_invalidation, options, + bool, out); + + GF_OPTION_RECONF("cache-size", cache_size_new, options, size_uint64, out); + if (!check_cache_size_ok(this, cache_size_new)) { + ret = -1; + gf_msg(this->name, GF_LOG_ERROR, EINVAL, QUICK_READ_MSG_INVALID_CONFIG, + "Not reconfiguring cache-size"); + goto out; + } + conf->cache_size = cache_size_new; + + ret = 0; +out: + return ret; +} + +int32_t +qr_get_priority_list(const char *opt_str, struct list_head *first) +{ + int32_t max_pri = 1; + char *tmp_str = NULL; + char *tmp_str1 = NULL; + char *tmp_str2 = NULL; + char *dup_str = NULL; + char *priority_str = NULL; + char *pattern = NULL; + char *priority = NULL; + char *string = NULL; + struct qr_priority *curr = NULL, *tmp = NULL; + + GF_VALIDATE_OR_GOTO("quick-read", opt_str, out); + GF_VALIDATE_OR_GOTO("quick-read", first, out); + + string = gf_strdup(opt_str); + if (string == NULL) { + max_pri = -1; + goto out; + } + + /* Get the pattern for cache priority. + * "option priority *.jpg:1,abc*:2" etc + */ + /* TODO: inode_lru in table is statically hard-coded to 5, + * should be changed to run-time configuration + */ + priority_str = strtok_r(string, ",", &tmp_str); + while (priority_str) { + curr = GF_CALLOC(1, sizeof(*curr), gf_qr_mt_qr_priority_t); + if (curr == NULL) { + max_pri = -1; + goto out; + } + + list_add_tail(&curr->list, first); + + dup_str = gf_strdup(priority_str); + if (dup_str == NULL) { + max_pri = -1; + goto out; + } + + pattern = strtok_r(dup_str, ":", &tmp_str1); + if (!pattern) { + max_pri = -1; + goto out; + } + + priority = strtok_r(NULL, ":", &tmp_str1); + if (!priority) { + max_pri = -1; + goto out; + } + + gf_msg_trace("quick-read", 0, + "quick-read priority : pattern %s : priority %s", pattern, + priority); + + curr->pattern = gf_strdup(pattern); + if (curr->pattern == NULL) { + max_pri = -1; + goto out; + } + + curr->priority = strtol(priority, &tmp_str2, 0); + if (tmp_str2 && (*tmp_str2)) { + max_pri = -1; + goto out; + } else { + max_pri = max(max_pri, curr->priority); + } + + GF_FREE(dup_str); + dup_str = NULL; + + priority_str = strtok_r(NULL, ",", &tmp_str); + } +out: + GF_FREE(string); + + GF_FREE(dup_str); + + if (max_pri == -1) { + list_for_each_entry_safe(curr, tmp, first, list) + { + list_del_init(&curr->list); + GF_FREE(curr->pattern); + GF_FREE(curr); + } + } + + return max_pri; +} + +int32_t +qr_init(xlator_t *this) +{ + int32_t ret = -1, i = 0; + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + + if (!this->children || this->children->next) { + gf_msg(this->name, GF_LOG_ERROR, 0, + QUICK_READ_MSG_XLATOR_CHILD_MISCONFIGURED, + "FATAL: volume (%s) not configured with exactly one " + "child", + this->name); + return -1; + } + + if (!this->parents) { + gf_msg(this->name, GF_LOG_WARNING, 0, QUICK_READ_MSG_VOL_MISCONFIGURED, + "dangling volume. check volfile "); + } + + priv = GF_CALLOC(1, sizeof(*priv), gf_qr_mt_qr_private_t); + if (priv == NULL) { + ret = -1; + goto out; + } + + LOCK_INIT(&priv->table.lock); + conf = &priv->conf; + + GF_OPTION_INIT("max-file-size", conf->max_file_size, size_uint64, out); + + GF_OPTION_INIT("cache-timeout", conf->cache_timeout, int32, out); + + GF_OPTION_INIT("quick-read-cache-invalidation", conf->qr_invalidation, bool, + out); + + GF_OPTION_INIT("cache-size", conf->cache_size, size_uint64, out); + if (!check_cache_size_ok(this, conf->cache_size)) { + ret = -1; + goto out; + } + + GF_OPTION_INIT("ctime-invalidation", conf->ctime_invalidation, bool, out); + + INIT_LIST_HEAD(&conf->priority_list); + conf->max_pri = 1; + if (dict_get(this->options, "priority")) { + char *option_list = data_to_str(dict_get(this->options, "priority")); + gf_msg_trace(this->name, 0, "option path %s", option_list); + /* parse the list of pattern:priority */ + conf->max_pri = qr_get_priority_list(option_list, &conf->priority_list); + + if (conf->max_pri == -1) { + goto out; + } + conf->max_pri++; + } + + priv->table.lru = GF_CALLOC(conf->max_pri, sizeof(*priv->table.lru), + gf_common_mt_list_head); + if (priv->table.lru == NULL) { + ret = -1; + goto out; + } + + for (i = 0; i < conf->max_pri; i++) { + INIT_LIST_HEAD(&priv->table.lru[i]); + } + + ret = 0; + + priv->last_child_down = gf_time(); + GF_ATOMIC_INIT(priv->generation, 0); + this->private = priv; +out: + if ((ret == -1) && priv) { + GF_FREE(priv); + } + + return ret; +} + +void +qr_inode_table_destroy(qr_private_t *priv) +{ + int i = 0; + qr_conf_t *conf = NULL; + + conf = &priv->conf; + + for (i = 0; i < conf->max_pri; i++) { + /* There is a known leak of inodes, hence until + * that is fixed, log the assert as warning. + GF_ASSERT (list_empty (&priv->table.lru[i]));*/ + if (!list_empty(&priv->table.lru[i])) { + gf_msg("quick-read", GF_LOG_INFO, 0, QUICK_READ_MSG_LRU_NOT_EMPTY, + "quick read inode table lru not empty"); + } + } + + LOCK_DESTROY(&priv->table.lock); + + return; +} + +void +qr_conf_destroy(qr_conf_t *conf) +{ + struct qr_priority *curr = NULL, *tmp = NULL; + + list_for_each_entry_safe(curr, tmp, &conf->priority_list, list) + { + list_del(&curr->list); + GF_FREE(curr->pattern); + GF_FREE(curr); + } + + return; +} + +void +qr_update_child_down_time(xlator_t *this, time_t now) +{ + qr_private_t *priv = NULL; + + priv = this->private; + + LOCK(&priv->lock); + { + priv->last_child_down = now; + } + UNLOCK(&priv->lock); +} + +static int +qr_invalidate(xlator_t *this, void *data) +{ + struct gf_upcall *up_data = NULL; + struct gf_upcall_cache_invalidation *up_ci = NULL; + inode_t *inode = NULL; + int ret = 0; + inode_table_t *itable = NULL; + qr_private_t *priv = NULL; + + up_data = (struct gf_upcall *)data; + + if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION) + goto out; + + priv = this->private; + up_ci = (struct gf_upcall_cache_invalidation *)up_data->data; + + if (up_ci && (up_ci->flags & UP_WRITE_FLAGS)) { + GF_ATOMIC_INC(priv->qr_counter.file_data_invals); + itable = ((xlator_t *)this->graph->top)->itable; + inode = inode_find(itable, up_data->gfid); + if (!inode) { + ret = -1; + goto out; + } + qr_inode_prune(this, inode, qr_get_generation(this, inode)); + } + +out: + if (inode) + inode_unref(inode); + + return ret; +} + +int +qr_notify(xlator_t *this, int event, void *data, ...) +{ + int ret = 0; + qr_private_t *priv = NULL; + qr_conf_t *conf = NULL; + + priv = this->private; + conf = &priv->conf; + + switch (event) { + case GF_EVENT_CHILD_DOWN: + case GF_EVENT_SOME_DESCENDENT_DOWN: + qr_update_child_down_time(this, gf_time()); + break; + case GF_EVENT_UPCALL: + if (conf->qr_invalidation) + ret = qr_invalidate(this, data); + break; + default: + break; + } + + if (default_notify(this, event, data) != 0) + ret = -1; + + return ret; +} + +void +qr_fini(xlator_t *this) +{ + qr_private_t *priv = NULL; + + if (this == NULL) { + goto out; + } + + priv = this->private; + if (priv == NULL) { + goto out; + } + + qr_inode_table_destroy(priv); + qr_conf_destroy(&priv->conf); + + this->private = NULL; + + GF_FREE(priv); +out: + return; +} + +struct xlator_fops qr_fops = {.lookup = qr_lookup, + .readdirp = qr_readdirp, + .open = qr_open, + .readv = qr_readv, + .writev = qr_writev, + .truncate = qr_truncate, + .ftruncate = qr_ftruncate, + .fallocate = qr_fallocate, + .discard = qr_discard, + .zerofill = qr_zerofill}; + +struct xlator_cbks qr_cbks = { + .forget = qr_forget, +}; + +struct xlator_dumpops qr_dumpops = { + .priv = qr_priv_dump, + .inodectx = qr_inodectx_dump, +}; + +struct volume_options qr_options[] = { + { + .key = {"quick-read"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "off", + .description = "enable/disable quick-read", + .op_version = {GD_OP_VERSION_6_0}, + .flags = OPT_FLAG_SETTABLE, + }, + {.key = {"priority"}, .type = GF_OPTION_TYPE_ANY}, + {.key = {"cache-size"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 0, + .max = INFINITY, + .default_value = "128MB", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .description = "Size of small file read cache."}, + { + .key = {"cache-timeout"}, + .type = GF_OPTION_TYPE_INT, + .default_value = "1", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + }, + { + .key = {"max-file-size"}, + .type = GF_OPTION_TYPE_SIZET, + .min = 0, + .max = 1 * GF_UNIT_KB * 1000, + .default_value = "64KB", + .op_version = {1}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + }, + { + .key = {"quick-read-cache-invalidation"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .op_version = {GD_OP_VERSION_4_0_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .description = "When \"on\", invalidates/updates the metadata cache," + " on receiving the cache-invalidation notifications", + }, + { + .key = {"ctime-invalidation"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .op_version = {GD_OP_VERSION_5_0}, + .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC, + .description = "Quick-read by default uses mtime to identify changes " + "to file data. However there are applications like " + "rsync which explicitly set mtime making it unreliable " + "for the purpose of identifying change in file content " + ". Since ctime also changes when content of a file " + " changes and it cannot be set explicitly, it becomes " + " suitable for identifying staleness of cached data. " + "This option makes quick-read to prefer ctime over " + "mtime to validate its cache. However, using ctime " + "can result in false positives as ctime changes with " + "just attribute changes like permission without " + "changes to file data. So, use this only when mtime " + "is not reliable", + }, + {.key = {NULL}}}; + +xlator_api_t xlator_api = { + .init = qr_init, + .fini = qr_fini, + .notify = qr_notify, + .reconfigure = qr_reconfigure, + .mem_acct_init = qr_mem_acct_init, + .dump_metrics = qr_dump_metrics, + .op_version = {1}, /* Present from the initial version */ + .dumpops = &qr_dumpops, + .fops = &qr_fops, + .cbks = &qr_cbks, + .options = qr_options, + .identifier = "quick-read", + .category = GF_MAINTAINED, +}; diff --git a/xlators/performance/quick-read/src/quick-read.h b/xlators/performance/quick-read/src/quick-read.h new file mode 100644 index 00000000000..20fcc70b3a7 --- /dev/null +++ b/xlators/performance/quick-read/src/quick-read.h @@ -0,0 +1,91 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __QUICK_READ_H +#define __QUICK_READ_H + +#include <glusterfs/glusterfs.h> +#include <glusterfs/logging.h> +#include <glusterfs/dict.h> +#include <glusterfs/xlator.h> +#include <glusterfs/list.h> +#include <glusterfs/compat.h> +#include <glusterfs/compat-errno.h> +#include <glusterfs/common-utils.h> +#include <glusterfs/call-stub.h> +#include <glusterfs/defaults.h> +#include <libgen.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fnmatch.h> +#include "quick-read-mem-types.h" + +struct qr_inode { + void *data; + size_t size; + int priority; + uint32_t ia_mtime; + uint32_t ia_mtime_nsec; + uint32_t ia_ctime; + uint32_t ia_ctime_nsec; + uint32_t gen_rollover; + struct iatt buf; + time_t last_refresh; + struct list_head lru; + uint64_t gen; + uint64_t invalidation_time; +}; +typedef struct qr_inode qr_inode_t; + +struct qr_priority { + char *pattern; + int32_t priority; + struct list_head list; +}; +typedef struct qr_priority qr_priority_t; + +struct qr_conf { + uint64_t max_file_size; + int32_t cache_timeout; + uint64_t cache_size; + int max_pri; + gf_boolean_t qr_invalidation; + gf_boolean_t ctime_invalidation; + struct list_head priority_list; +}; +typedef struct qr_conf qr_conf_t; + +struct qr_inode_table { + uint64_t cache_used; + struct list_head *lru; + gf_lock_t lock; +}; +typedef struct qr_inode_table qr_inode_table_t; + +struct qr_statistics { + gf_atomic_t cache_hit; + gf_atomic_t cache_miss; + gf_atomic_t file_data_invals; /* No. of invalidates received from upcall */ + gf_atomic_t files_cached; +}; + +struct qr_private { + qr_conf_t conf; + qr_inode_table_t table; + time_t last_child_down; + gf_lock_t lock; + struct qr_statistics qr_counter; + gf_atomic_int32_t generation; +}; +typedef struct qr_private qr_private_t; + +#endif /* #ifndef __QUICK_READ_H */ |
