summaryrefslogtreecommitdiffstats
path: root/xlators/performance/readdir-ahead/src
diff options
context:
space:
mode:
Diffstat (limited to 'xlators/performance/readdir-ahead/src')
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am7
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h12
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-messages.h97
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c1652
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h96
5 files changed, 1271 insertions, 593 deletions
diff --git a/xlators/performance/readdir-ahead/src/Makefile.am b/xlators/performance/readdir-ahead/src/Makefile.am
index e54ab168a09..3d6b6ae951f 100644
--- a/xlators/performance/readdir-ahead/src/Makefile.am
+++ b/xlators/performance/readdir-ahead/src/Makefile.am
@@ -6,9 +6,12 @@ readdir_ahead_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
readdir_ahead_la_SOURCES = readdir-ahead.c
readdir_ahead_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = readdir-ahead.h readdir-ahead-mem-types.h readdir-ahead-messages.h
+noinst_HEADERS = readdir-ahead.h readdir-ahead-mem-types.h \
+ readdir-ahead-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
-AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h
index 39e2c536975..498ffae7f64 100644
--- a/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h
@@ -8,17 +8,17 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef __RDA_MEM_TYPES_H__
#define __RDA_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_rda_mem_types_ {
- gf_rda_mt_rda_local = gf_common_mt_end + 1,
- gf_rda_mt_rda_fd_ctx,
- gf_rda_mt_rda_priv,
- gf_rda_mt_end
+ gf_rda_mt_rda_local = gf_common_mt_end + 1,
+ gf_rda_mt_rda_fd_ctx,
+ gf_rda_mt_rda_priv,
+ gf_rda_mt_inode_ctx_t,
+ gf_rda_mt_end
};
#endif
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h b/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h
index 0e19348b954..28ec14dd845 100644
--- a/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead-messages.h
@@ -10,96 +10,21 @@
#ifndef _READDIR_AHEAD_MESSAGES_H_
#define _READDIR_AHEAD_MESSAGES_H_
-#include "glfs-message-id.h"
+#include <glusterfs/glfs-message-id.h>
-/*! \file readdir-ahead-messages.h
- * \brief READDIR_AHEAD log-message IDs and their descriptions
+/* To add new message IDs, append new identifiers at the end of the list.
*
- */
-
-/* NOTE: Rules for message additions
- * 1) Each instance of a message is _better_ left with a unique message ID, even
- * if the message format is the same. Reasoning is that, if the message
- * format needs to change in one instance, the other instances are not
- * impacted or the new change does not change the ID of the instance being
- * modified.
- * 2) Addition of a message,
- * - Should increment the GLFS_NUM_MESSAGES
- * - Append to the list of messages defined, towards the end
- * - Retain macro naming as glfs_msg_X (for redability across developers)
- * NOTE: Rules for message format modifications
- * 3) Check acorss the code if the message ID macro in question is reused
- * anywhere. If reused then then the modifications should ensure correctness
- * everywhere, or needs a new message ID as (1) above was not adhered to. If
- * not used anywhere, proceed with the required modification.
- * NOTE: Rules for message deletion
- * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
- * anywhere, then can be deleted, but will leave a hole by design, as
- * addition rules specify modification to the end of the list and not filling
- * holes.
- */
-
-#define GLFS_READDIR_AHEAD_BASE GLFS_MSGID_COMP_READDIR_AHEAD
-#define GLFS_READDIR_AHEAD_NUM_MESSAGES 5
-#define GLFS_MSGID_END (GLFS_READDIR_AHEAD_BASE +\
- GLFS_READDIR_AHEAD_NUM_MESSAGES + 1)
-
-/* Messages with message IDs */
-#define glfs_msg_start_x GLFS_READDIR_AHEAD_BASE, "Invalid: Start of messages"
-
-
-
-
-/*!
- * @messageid
- * @diagnosis
- * @recommendedaction None
- *
- */
-
-#define READDIR_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED\
- (GLFS_READDIR_AHEAD_BASE + 1)
-
-/*!
- * @messageid
- * @diagnosis
- * @recommendedaction None
- *
- */
-
-#define READDIR_AHEAD_MSG_VOL_MISCONFIGURED (GLFS_READDIR_AHEAD_BASE + 2)
-
-/*!
- * @messageid
- * @diagnosis
- * @recommendedaction None
- *
- */
-
-#define READDIR_AHEAD_MSG_NO_MEMORY (GLFS_READDIR_AHEAD_BASE + 3)
-
-/*!
- * @messageid
- * @diagnosis
- * @recommendedaction None
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
*
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
*/
-#define READDIR_AHEAD_MSG_DIR_RELEASE_PENDING_STUB \
- (GLFS_READDIR_AHEAD_BASE + 4)
-
-/*!
- * @messageid
- * @diagnosis
- * @recommendedaction None
- *
- */
-
-#define READDIR_AHEAD_MSG_OUT_OF_SEQUENCE (GLFS_READDIR_AHEAD_BASE + 5)
-
-
-/*------------*/
-#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
-
+GLFS_MSGID(READDIR_AHEAD, READDIR_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED,
+ READDIR_AHEAD_MSG_VOL_MISCONFIGURED, READDIR_AHEAD_MSG_NO_MEMORY,
+ READDIR_AHEAD_MSG_DIR_RELEASE_PENDING_STUB,
+ READDIR_AHEAD_MSG_OUT_OF_SEQUENCE, READDIR_AHEAD_MSG_DICT_OP_FAILED);
#endif /* _READDIR_AHEAD_MESSAGES_H_ */
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.c b/xlators/performance/readdir-ahead/src/readdir-ahead.c
index c3daf916e97..4ba7ee7077a 100644
--- a/xlators/performance/readdir-ahead/src/readdir-ahead.c
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead.c
@@ -23,68 +23,242 @@
* preloads on the directory.
*/
-#include "glusterfs.h"
-#include "xlator.h"
-#include "call-stub.h"
+#include <math.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/call-stub.h>
#include "readdir-ahead.h"
#include "readdir-ahead-mem-types.h"
-#include "defaults.h"
+#include <glusterfs/defaults.h>
#include "readdir-ahead-messages.h"
-static int rda_fill_fd(call_frame_t *, xlator_t *, fd_t *);
+static int
+rda_fill_fd(call_frame_t *, xlator_t *, fd_t *);
+
+static void
+rda_local_wipe(struct rda_local *local)
+{
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->xattrs)
+ dict_unref(local->xattrs);
+ if (local->inode)
+ inode_unref(local->inode);
+}
/*
* Get (or create) the fd context for storing prepopulated directory
* entries.
*/
-static struct
-rda_fd_ctx *get_rda_fd_ctx(fd_t *fd, xlator_t *this)
-{
- uint64_t val;
- struct rda_fd_ctx *ctx;
-
- LOCK(&fd->lock);
-
- if (__fd_ctx_get(fd, this, &val) < 0) {
- ctx = GF_CALLOC(1, sizeof(struct rda_fd_ctx),
- gf_rda_mt_rda_fd_ctx);
- if (!ctx)
- goto out;
-
- LOCK_INIT(&ctx->lock);
- INIT_LIST_HEAD(&ctx->entries.list);
- ctx->state = RDA_FD_NEW;
- /* ctx offset values initialized to 0 */
- ctx->xattrs = NULL;
-
- if (__fd_ctx_set(fd, this, (uint64_t) ctx) < 0) {
- GF_FREE(ctx);
- ctx = NULL;
- goto out;
- }
- } else {
- ctx = (struct rda_fd_ctx *) val;
- }
+static struct rda_fd_ctx *
+get_rda_fd_ctx(fd_t *fd, xlator_t *this)
+{
+ uint64_t val;
+ struct rda_fd_ctx *ctx;
+
+ LOCK(&fd->lock);
+
+ if (__fd_ctx_get(fd, this, &val) < 0) {
+ ctx = GF_CALLOC(1, sizeof(struct rda_fd_ctx), gf_rda_mt_rda_fd_ctx);
+ if (!ctx)
+ goto out;
+
+ LOCK_INIT(&ctx->lock);
+ INIT_LIST_HEAD(&ctx->entries.list);
+ ctx->state = RDA_FD_NEW;
+ /* ctx offset values initialized to 0 */
+ ctx->xattrs = NULL;
+
+ if (__fd_ctx_set(fd, this, (uint64_t)(uintptr_t)ctx) < 0) {
+ GF_FREE(ctx);
+ ctx = NULL;
+ goto out;
+ }
+ } else {
+ ctx = (struct rda_fd_ctx *)(uintptr_t)val;
+ }
out:
- UNLOCK(&fd->lock);
- return ctx;
+ UNLOCK(&fd->lock);
+ return ctx;
+}
+
+static rda_inode_ctx_t *
+__rda_inode_ctx_get(inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ rda_inode_ctx_t *ctx_p = NULL;
+
+ ret = __inode_ctx_get1(inode, this, &ctx_uint);
+ if (ret == 0)
+ return (rda_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+ ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_rda_mt_inode_ctx_t);
+ if (!ctx_p)
+ return NULL;
+
+ GF_ATOMIC_INIT(ctx_p->generation, 0);
+
+ ctx_uint = (uint64_t)(uintptr_t)ctx_p;
+ ret = __inode_ctx_set1(inode, this, &ctx_uint);
+ if (ret < 0) {
+ GF_FREE(ctx_p);
+ return NULL;
+ }
+
+ return ctx_p;
+}
+
+static int
+__rda_inode_ctx_update_iatts(inode_t *inode, xlator_t *this,
+ struct iatt *stbuf_in, struct iatt *stbuf_out,
+ uint64_t generation)
+{
+ rda_inode_ctx_t *ctx_p = NULL;
+ struct iatt tmp_stat = {
+ 0,
+ };
+
+ ctx_p = __rda_inode_ctx_get(inode, this);
+ if (!ctx_p)
+ return -1;
+
+ if ((!stbuf_in) || (stbuf_in->ia_ctime == 0)) {
+ /* A fop modified a file but valid stbuf is not provided.
+ * Can't update iatt to reflect results of fop and hence
+ * invalidate the iatt stored in dentry.
+ *
+ * An example of this case can be response of write request
+ * that is cached in write-behind.
+ */
+ if (stbuf_in)
+ tmp_stat = *stbuf_in;
+ else
+ tmp_stat = ctx_p->statbuf;
+ memset(&ctx_p->statbuf, 0, sizeof(ctx_p->statbuf));
+ gf_uuid_copy(ctx_p->statbuf.ia_gfid, tmp_stat.ia_gfid);
+ ctx_p->statbuf.ia_type = tmp_stat.ia_type;
+ GF_ATOMIC_INC(ctx_p->generation);
+ } else {
+ if (ctx_p->statbuf.ia_ctime) {
+ if (stbuf_in->ia_ctime < ctx_p->statbuf.ia_ctime) {
+ goto out;
+ }
+
+ if ((stbuf_in->ia_ctime == ctx_p->statbuf.ia_ctime) &&
+ (stbuf_in->ia_ctime_nsec < ctx_p->statbuf.ia_ctime_nsec)) {
+ goto out;
+ }
+ } else {
+ if ((generation != -1) &&
+ (generation != GF_ATOMIC_GET(ctx_p->generation)))
+ goto out;
+ }
+
+ ctx_p->statbuf = *stbuf_in;
+ }
+
+out:
+ if (stbuf_out)
+ *stbuf_out = ctx_p->statbuf;
+
+ return 0;
+}
+
+static int
+rda_inode_ctx_update_iatts(inode_t *inode, xlator_t *this,
+ struct iatt *stbuf_in, struct iatt *stbuf_out,
+ uint64_t generation)
+{
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __rda_inode_ctx_update_iatts(inode, this, stbuf_in, stbuf_out,
+ generation);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
}
/*
* Reset the tracking state of the context.
*/
static void
-rda_reset_ctx(struct rda_fd_ctx *ctx)
-{
- ctx->state = RDA_FD_NEW;
- ctx->cur_offset = 0;
- ctx->cur_size = 0;
- ctx->next_offset = 0;
- ctx->op_errno = 0;
- gf_dirent_free(&ctx->entries);
- if (ctx->xattrs) {
- dict_unref (ctx->xattrs);
- ctx->xattrs = NULL;
+rda_reset_ctx(xlator_t *this, struct rda_fd_ctx *ctx)
+{
+ struct rda_priv *priv = NULL;
+
+ priv = this->private;
+
+ ctx->state = RDA_FD_NEW;
+ ctx->cur_offset = 0;
+ ctx->next_offset = 0;
+ ctx->op_errno = 0;
+
+ gf_dirent_free(&ctx->entries);
+ GF_ATOMIC_SUB(priv->rda_cache_size, ctx->cur_size);
+ ctx->cur_size = 0;
+
+ if (ctx->xattrs) {
+ dict_unref(ctx->xattrs);
+ ctx->xattrs = NULL;
+ }
+}
+
+static void
+rda_mark_inode_dirty(xlator_t *this, inode_t *inode)
+{
+ inode_t *parent = NULL;
+ fd_t *fd = NULL;
+ uint64_t val = 0;
+ int32_t ret = 0;
+ struct rda_fd_ctx *fd_ctx = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ parent = inode_parent(inode, NULL, NULL);
+ if (parent) {
+ LOCK(&parent->lock);
+ {
+ list_for_each_entry(fd, &parent->fd_list, inode_list)
+ {
+ val = 0;
+ fd_ctx_get(fd, this, &val);
+ if (val == 0)
+ continue;
+
+ fd_ctx = (void *)(uintptr_t)val;
+ uuid_utoa_r(inode->gfid, gfid);
+ if (!GF_ATOMIC_GET(fd_ctx->prefetching))
+ continue;
+
+ LOCK(&fd_ctx->lock);
+ {
+ if (GF_ATOMIC_GET(fd_ctx->prefetching)) {
+ if (fd_ctx->writes_during_prefetch == NULL)
+ fd_ctx->writes_during_prefetch = dict_new();
+
+ ret = dict_set_int8(fd_ctx->writes_during_prefetch,
+ gfid, 1);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "marking to invalidate stats of %s from an "
+ "in progress "
+ "prefetching has failed, might result in "
+ "stale stat to "
+ "application",
+ gfid);
+ }
+ }
+ }
+ UNLOCK(&fd_ctx->lock);
+ }
}
+ UNLOCK(&parent->lock);
+ inode_unref(parent);
+ }
+
+ return;
}
/*
@@ -95,12 +269,33 @@ rda_reset_ctx(struct rda_fd_ctx *ctx)
static gf_boolean_t
rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size)
{
- if ((ctx->state & RDA_FD_EOD) ||
- (ctx->state & RDA_FD_ERROR) ||
- (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0)))
- return _gf_true;
+ if ((ctx->state & RDA_FD_EOD) || (ctx->state & RDA_FD_ERROR) ||
+ (!(ctx->state & RDA_FD_PLUGGED) && (ctx->cur_size > 0)) ||
+ (request_size && ctx->cur_size >= request_size))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+void
+rda_inode_ctx_get_iatt(inode_t *inode, xlator_t *this, struct iatt *attr)
+{
+ rda_inode_ctx_t *ctx_p = NULL;
+
+ if (!inode || !this || !attr)
+ goto out;
+
+ LOCK(&inode->lock);
+ {
+ ctx_p = __rda_inode_ctx_get(inode, this);
+ if (ctx_p) {
+ *attr = ctx_p->statbuf;
+ }
+ }
+ UNLOCK(&inode->lock);
- return _gf_false;
+out:
+ return;
}
/*
@@ -108,242 +303,337 @@ rda_can_serve_readdirp(struct rda_fd_ctx *ctx, size_t request_size)
* buffer. ctx must be locked.
*/
static int32_t
-__rda_serve_readdirp(xlator_t *this, gf_dirent_t *entries, size_t request_size,
- struct rda_fd_ctx *ctx)
+__rda_fill_readdirp(xlator_t *this, gf_dirent_t *entries, size_t request_size,
+ struct rda_fd_ctx *ctx)
{
- gf_dirent_t *dirent, *tmp;
- size_t dirent_size, size = 0;
- int32_t count = 0;
- struct rda_priv *priv = this->private;
+ gf_dirent_t *dirent, *tmp;
+ size_t dirent_size, size = 0;
+ int32_t count = 0;
+ struct rda_priv *priv = NULL;
+ struct iatt tmp_stat = {
+ 0,
+ };
+
+ priv = this->private;
+
+ list_for_each_entry_safe(dirent, tmp, &ctx->entries.list, list)
+ {
+ dirent_size = gf_dirent_size(dirent->d_name);
+ if (size + dirent_size > request_size)
+ break;
+
+ memset(&tmp_stat, 0, sizeof(tmp_stat));
+
+ if (dirent->inode && (!((strcmp(dirent->d_name, ".") == 0) ||
+ (strcmp(dirent->d_name, "..") == 0)))) {
+ rda_inode_ctx_get_iatt(dirent->inode, this, &tmp_stat);
+ dirent->d_stat = tmp_stat;
+ }
- list_for_each_entry_safe(dirent, tmp, &ctx->entries.list, list) {
- dirent_size = gf_dirent_size(dirent->d_name);
- if (size + dirent_size > request_size)
- break;
+ size += dirent_size;
+ list_del_init(&dirent->list);
+ ctx->cur_size -= dirent_size;
- size += dirent_size;
- list_del_init(&dirent->list);
- ctx->cur_size -= dirent_size;
+ GF_ATOMIC_SUB(priv->rda_cache_size, dirent_size);
- list_add_tail(&dirent->list, &entries->list);
- ctx->cur_offset = dirent->d_off;
- count++;
- }
+ list_add_tail(&dirent->list, &entries->list);
+ ctx->cur_offset = dirent->d_off;
+ count++;
+ }
- if (ctx->cur_size <= priv->rda_low_wmark)
- ctx->state |= RDA_FD_PLUGGED;
+ if (ctx->cur_size <= priv->rda_low_wmark)
+ ctx->state |= RDA_FD_PLUGGED;
- return count;
+ return count;
}
static int32_t
-rda_readdirp_stub(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
+__rda_serve_readdirp(xlator_t *this, struct rda_fd_ctx *ctx, size_t size,
+ gf_dirent_t *entries, int *op_errno)
{
- gf_dirent_t entries;
- int32_t ret;
- struct rda_fd_ctx *ctx;
- int op_errno = 0;
-
- ctx = get_rda_fd_ctx(fd, this);
- INIT_LIST_HEAD(&entries.list);
- ret = __rda_serve_readdirp(this, &entries, size, ctx);
+ int32_t ret = 0;
- if (!ret && (ctx->state & RDA_FD_ERROR)) {
- ret = -1;
- ctx->state &= ~RDA_FD_ERROR;
+ ret = __rda_fill_readdirp(this, entries, size, ctx);
- /*
- * the preload has stopped running in the event of an error, so
- * pass all future requests along
- */
- ctx->state |= RDA_FD_BYPASS;
- }
+ if (!ret && (ctx->state & RDA_FD_ERROR)) {
+ ret = -1;
+ ctx->state &= ~RDA_FD_ERROR;
/*
- * Use the op_errno sent by lower layers as xlators above will check
- * the op_errno for identifying whether readdir is completed or not.
+ * the preload has stopped running in the event of an error, so
+ * pass all future requests along
*/
- op_errno = ctx->op_errno;
-
- STACK_UNWIND_STRICT(readdirp, frame, ret, op_errno, &entries, xdata);
- gf_dirent_free(&entries);
-
- return 0;
+ ctx->state |= RDA_FD_BYPASS;
+ }
+ /*
+ * Use the op_errno sent by lower layers as xlators above will check
+ * the op_errno for identifying whether readdir is completed or not.
+ */
+ *op_errno = ctx->op_errno;
+
+ return ret;
}
static int32_t
rda_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off, dict_t *xdata)
-{
- struct rda_fd_ctx *ctx;
- call_stub_t *stub;
- int fill = 0;
-
- ctx = get_rda_fd_ctx(fd, this);
- if (!ctx)
- goto err;
-
- if (ctx->state & RDA_FD_BYPASS)
- goto bypass;
-
- LOCK(&ctx->lock);
-
- /* recheck now that we have the lock */
- if (ctx->state & RDA_FD_BYPASS) {
- UNLOCK(&ctx->lock);
- goto bypass;
- }
-
- /*
- * If a new read comes in at offset 0 and the buffer has been
- * completed, reset the context and kickstart the filler again.
- */
- if (!off && (ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)) {
- rda_reset_ctx(ctx);
- /*
- * Unref and discard the 'list of xattrs to be fetched'
- * stored during opendir call. This is done above - inside
- * rda_reset_ctx().
- * Now, ref the xdata passed by md-cache in actual readdirp()
- * call and use that for all subsequent internal readdirp()
- * requests issued by this xlator.
- */
- ctx->xattrs = dict_ref (xdata);
- fill = 1;
- }
-
- /*
- * If a readdir occurs at an unexpected offset or we already have a
- * request pending, admit defeat and just get out of the way.
- */
- if (off != ctx->cur_offset || ctx->stub) {
- ctx->state |= RDA_FD_BYPASS;
- UNLOCK(&ctx->lock);
- goto bypass;
- }
-
- stub = fop_readdirp_stub(frame, rda_readdirp_stub, fd, size, off, xdata);
- if (!stub) {
- UNLOCK(&ctx->lock);
- goto err;
- }
-
- /*
- * If we haven't bypassed the preload, this means we can either serve
- * the request out of the preload or the request that enables us to do
- * so is in flight...
- */
- if (rda_can_serve_readdirp(ctx, size))
- call_resume(stub);
- else
- ctx->stub = stub;
-
- UNLOCK(&ctx->lock);
-
- if (fill)
- rda_fill_fd(frame, this, fd);
-
- return 0;
+ off_t off, dict_t *xdata)
+{
+ struct rda_fd_ctx *ctx = NULL;
+ int fill = 0;
+ gf_dirent_t entries;
+ int ret = 0;
+ int op_errno = 0;
+ gf_boolean_t serve = _gf_false;
+
+ ctx = get_rda_fd_ctx(fd, this);
+ if (!ctx)
+ goto err;
+
+ if (ctx->state & RDA_FD_BYPASS)
+ goto bypass;
+
+ INIT_LIST_HEAD(&entries.list);
+ LOCK(&ctx->lock);
+
+ /* recheck now that we have the lock */
+ if (ctx->state & RDA_FD_BYPASS) {
+ UNLOCK(&ctx->lock);
+ goto bypass;
+ }
+
+ /*
+ * If a new read comes in at offset 0 and the buffer has been
+ * completed, reset the context and kickstart the filler again.
+ */
+ if (!off && (ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)) {
+ rda_reset_ctx(this, ctx);
+ /*
+ * Unref and discard the 'list of xattrs to be fetched'
+ * stored during opendir call. This is done above - inside
+ * rda_reset_ctx().
+ * Now, ref the xdata passed by md-cache in actual readdirp()
+ * call and use that for all subsequent internal readdirp()
+ * requests issued by this xlator.
+ */
+ ctx->xattrs = dict_ref(xdata);
+ fill = 1;
+ }
+
+ /*
+ * If a readdir occurs at an unexpected offset or we already have a
+ * request pending, admit defeat and just get out of the way.
+ */
+ if (off != ctx->cur_offset || ctx->stub) {
+ ctx->state |= RDA_FD_BYPASS;
+ UNLOCK(&ctx->lock);
+ goto bypass;
+ }
+
+ /*
+ * If we haven't bypassed the preload, this means we can either serve
+ * the request out of the preload or the request that enables us to do
+ * so is in flight...
+ */
+ if (rda_can_serve_readdirp(ctx, size)) {
+ ret = __rda_serve_readdirp(this, ctx, size, &entries, &op_errno);
+ serve = _gf_true;
+
+ if (op_errno == ENOENT &&
+ !((ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)))
+ op_errno = 0;
+ } else {
+ ctx->stub = fop_readdirp_stub(frame, NULL, fd, size, off, xdata);
+ if (!ctx->stub) {
+ UNLOCK(&ctx->lock);
+ goto err;
+ }
+
+ if (!(ctx->state & RDA_FD_RUNNING)) {
+ fill = 1;
+ if (!ctx->xattrs)
+ ctx->xattrs = dict_ref(xdata);
+ ctx->state |= RDA_FD_RUNNING;
+ }
+ }
+
+ UNLOCK(&ctx->lock);
+
+ if (serve) {
+ STACK_UNWIND_STRICT(readdirp, frame, ret, op_errno, &entries, xdata);
+ gf_dirent_free(&entries);
+ }
+
+ if (fill)
+ rda_fill_fd(frame, this, fd);
+
+ return 0;
bypass:
- STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
- return 0;
+ STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+ return 0;
err:
- STACK_UNWIND_STRICT(readdirp, frame, -1, ENOMEM, NULL, NULL);
- return 0;
+ STACK_UNWIND_STRICT(readdirp, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
}
static int32_t
rda_fill_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- gf_dirent_t *dirent, *tmp;
- struct rda_local *local = frame->local;
- struct rda_fd_ctx *ctx = local->ctx;
- struct rda_priv *priv = this->private;
- int fill = 1;
-
- LOCK(&ctx->lock);
-
- /* Verify that the preload buffer is still pending on this data. */
- if (ctx->next_offset != local->offset) {
- gf_msg(this->name, GF_LOG_ERROR,
- 0, READDIR_AHEAD_MSG_OUT_OF_SEQUENCE,
- "Out of sequence directory preload.");
- ctx->state |= (RDA_FD_BYPASS|RDA_FD_ERROR);
- ctx->op_errno = EUCLEAN;
-
- goto out;
- }
-
- if (entries) {
- list_for_each_entry_safe(dirent, tmp, &entries->list, list) {
- list_del_init(&dirent->list);
- /* must preserve entry order */
- list_add_tail(&dirent->list, &ctx->entries.list);
-
- ctx->cur_size += gf_dirent_size(dirent->d_name);
- ctx->next_offset = dirent->d_off;
- }
- }
-
- if (ctx->cur_size >= priv->rda_high_wmark)
- ctx->state &= ~RDA_FD_PLUGGED;
-
- if (!op_ret) {
- /* we've hit eod */
- ctx->state &= ~RDA_FD_RUNNING;
- ctx->state |= RDA_FD_EOD;
- ctx->op_errno = op_errno;
- } else if (op_ret == -1) {
- /* kill the preload and pend the error */
- ctx->state &= ~RDA_FD_RUNNING;
- ctx->state |= RDA_FD_ERROR;
- ctx->op_errno = op_errno;
- }
-
- /*
- * NOTE: The strict bypass logic in readdirp() means a pending request
- * is always based on ctx->cur_offset.
- */
- if (ctx->stub &&
- rda_can_serve_readdirp(ctx, ctx->stub->args.size)) {
- call_resume(ctx->stub);
- ctx->stub = NULL;
- }
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *dirent = NULL;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t serve_entries;
+ struct rda_local *local = frame->local;
+ struct rda_fd_ctx *ctx = local->ctx;
+ struct rda_priv *priv = this->private;
+ int fill = 1;
+ size_t dirent_size = 0;
+ int ret = 0;
+ gf_boolean_t serve = _gf_false;
+ call_stub_t *stub = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+ uint64_t generation = 0;
+ call_frame_t *fill_frame = NULL;
+
+ INIT_LIST_HEAD(&serve_entries.list);
+ LOCK(&ctx->lock);
+
+ /* Verify that the preload buffer is still pending on this data. */
+ if (ctx->next_offset != local->offset) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, READDIR_AHEAD_MSG_OUT_OF_SEQUENCE,
+ "Out of sequence directory preload.");
+ ctx->state |= (RDA_FD_BYPASS | RDA_FD_ERROR);
+ ctx->op_errno = EUCLEAN;
+
+ goto out;
+ }
+
+ if (entries) {
+ list_for_each_entry_safe(dirent, tmp, &entries->list, list)
+ {
+ list_del_init(&dirent->list);
+
+ /* must preserve entry order */
+ list_add_tail(&dirent->list, &ctx->entries.list);
+ if (dirent->inode) {
+ /* If ctxp->stat is invalidated, don't update it
+ * with dirent->d_stat as we don't have
+ * generation number of the inode when readdirp
+ * request was initiated. So, we pass 0 for
+ * generation number
+ */
+
+ generation = -1;
+ if (ctx->writes_during_prefetch) {
+ memset(gfid, 0, sizeof(gfid));
+ uuid_utoa_r(dirent->inode->gfid, gfid);
+ if (dict_get(ctx->writes_during_prefetch, gfid))
+ generation = 0;
+ }
+
+ if (!((strcmp(dirent->d_name, ".") == 0) ||
+ (strcmp(dirent->d_name, "..") == 0))) {
+ rda_inode_ctx_update_iatts(dirent->inode, this,
+ &dirent->d_stat, &dirent->d_stat,
+ generation);
+ }
+ }
+
+ dirent_size = gf_dirent_size(dirent->d_name);
+
+ ctx->cur_size += dirent_size;
+
+ GF_ATOMIC_ADD(priv->rda_cache_size, dirent_size);
+
+ ctx->next_offset = dirent->d_off;
+ }
+ }
+
+ if (ctx->writes_during_prefetch) {
+ dict_unref(ctx->writes_during_prefetch);
+ ctx->writes_during_prefetch = NULL;
+ }
+
+ GF_ATOMIC_DEC(ctx->prefetching);
+
+ if (ctx->cur_size >= priv->rda_high_wmark)
+ ctx->state &= ~RDA_FD_PLUGGED;
+
+ if (!op_ret || op_errno == ENOENT) {
+ /* we've hit eod */
+ ctx->state &= ~RDA_FD_RUNNING;
+ ctx->state |= RDA_FD_EOD;
+ ctx->op_errno = op_errno;
+ } else if (op_ret == -1) {
+ /* kill the preload and pend the error */
+ ctx->state &= ~RDA_FD_RUNNING;
+ ctx->state |= RDA_FD_ERROR;
+ ctx->op_errno = op_errno;
+ }
+
+ /*
+ * NOTE: The strict bypass logic in readdirp() means a pending request
+ * is always based on ctx->cur_offset.
+ */
+ if (ctx->stub && rda_can_serve_readdirp(ctx, ctx->stub->args.size)) {
+ ret = __rda_serve_readdirp(this, ctx, ctx->stub->args.size,
+ &serve_entries, &op_errno);
+ serve = _gf_true;
+ stub = ctx->stub;
+ ctx->stub = NULL;
+ }
out:
- /*
- * If we have been marked for bypass and have no pending stub, clear the
- * run state so we stop preloading the context with entries.
- */
- if ((ctx->state & RDA_FD_BYPASS) && !ctx->stub)
- ctx->state &= ~RDA_FD_RUNNING;
-
- if (!(ctx->state & RDA_FD_RUNNING)) {
- fill = 0;
+ /*
+ * If we have been marked for bypass and have no pending stub, clear the
+ * run state so we stop preloading the context with entries.
+ */
+ if (!ctx->stub &&
+ ((ctx->state & RDA_FD_BYPASS) ||
+ GF_ATOMIC_GET(priv->rda_cache_size) > priv->rda_cache_limit))
+ ctx->state &= ~RDA_FD_RUNNING;
+
+ if (!(ctx->state & RDA_FD_RUNNING)) {
+ fill = 0;
if (ctx->xattrs) {
- /*
- * fill = 0 and hence rda_fill_fd() won't be invoked.
- * unref for ref taken in rda_fill_fd()
- */
- dict_unref (ctx->xattrs);
- ctx->xattrs = NULL;
+ /*
+ * fill = 0 and hence rda_fill_fd() won't be invoked.
+ * unref for ref taken in rda_fill_fd()
+ */
+ dict_unref(ctx->xattrs);
+ ctx->xattrs = NULL;
}
- STACK_DESTROY(ctx->fill_frame->root);
- ctx->fill_frame = NULL;
- }
- UNLOCK(&ctx->lock);
+ fill_frame = ctx->fill_frame;
+ ctx->fill_frame = NULL;
+ }
+
+ if (op_errno == ENOENT &&
+ !((ctx->state & RDA_FD_EOD) && (ctx->cur_size == 0)))
+ op_errno = 0;
+
+ UNLOCK(&ctx->lock);
+ if (fill_frame) {
+ rda_local_wipe(fill_frame->local);
+ STACK_DESTROY(fill_frame->root);
+ }
+
+ if (serve) {
+ STACK_UNWIND_STRICT(readdirp, stub->frame, ret, op_errno,
+ &serve_entries, xdata);
+ gf_dirent_free(&serve_entries);
+ call_stub_destroy(stub);
+ }
- if (fill)
- rda_fill_fd(frame, this, local->fd);
+ if (fill)
+ rda_fill_fd(frame, this, local->fd);
- return 0;
+ return 0;
}
/*
@@ -352,331 +642,741 @@ out:
static int
rda_fill_fd(call_frame_t *frame, xlator_t *this, fd_t *fd)
{
- call_frame_t *nframe = NULL;
- struct rda_local *local = NULL;
- struct rda_local *orig_local = frame->local;
- struct rda_fd_ctx *ctx;
- off_t offset;
- struct rda_priv *priv = this->private;
+ call_frame_t *nframe = NULL;
+ struct rda_local *local = NULL;
+ struct rda_local *orig_local = frame->local;
+ struct rda_fd_ctx *ctx;
+ off_t offset;
+ struct rda_priv *priv = this->private;
+
+ ctx = get_rda_fd_ctx(fd, this);
+ if (!ctx)
+ goto err;
+
+ LOCK(&ctx->lock);
+
+ if (ctx->state & RDA_FD_NEW) {
+ ctx->state &= ~RDA_FD_NEW;
+ ctx->state |= RDA_FD_RUNNING;
+ if (priv->rda_low_wmark)
+ ctx->state |= RDA_FD_PLUGGED;
+ }
+
+ offset = ctx->next_offset;
+
+ if (!ctx->fill_frame) {
+ nframe = copy_frame(frame);
+ if (!nframe) {
+ UNLOCK(&ctx->lock);
+ goto err;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ UNLOCK(&ctx->lock);
+ goto err;
+ }
- ctx = get_rda_fd_ctx(fd, this);
- if (!ctx)
- goto err;
+ local->ctx = ctx;
+ local->fd = fd_ref(fd);
+ nframe->local = local;
- LOCK(&ctx->lock);
+ ctx->fill_frame = nframe;
- if (ctx->state & RDA_FD_NEW) {
- ctx->state &= ~RDA_FD_NEW;
- ctx->state |= RDA_FD_RUNNING;
- if (priv->rda_low_wmark)
- ctx->state |= RDA_FD_PLUGGED;
- }
+ if (!ctx->xattrs && orig_local && orig_local->xattrs) {
+ /* when this function is invoked by rda_opendir_cbk */
+ ctx->xattrs = dict_ref(orig_local->xattrs);
+ }
+ } else {
+ nframe = ctx->fill_frame;
+ local = nframe->local;
+ }
- offset = ctx->next_offset;
+ local->offset = offset;
+ GF_ATOMIC_INC(ctx->prefetching);
- if (!ctx->fill_frame) {
- nframe = copy_frame(frame);
- if (!nframe) {
- UNLOCK(&ctx->lock);
- goto err;
- }
+ UNLOCK(&ctx->lock);
- local = mem_get0(this->local_pool);
- if (!local) {
- UNLOCK(&ctx->lock);
- goto err;
- }
+ STACK_WIND(nframe, rda_fill_fd_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, priv->rda_req_size,
+ offset, ctx->xattrs);
- local->ctx = ctx;
- local->fd = fd;
- nframe->local = local;
+ return 0;
- ctx->fill_frame = nframe;
+err:
+ if (nframe) {
+ rda_local_wipe(nframe->local);
+ FRAME_DESTROY(nframe);
+ }
- if (!ctx->xattrs && orig_local && orig_local->xattrs) {
- /* when this function is invoked by rda_opendir_cbk */
- ctx->xattrs = dict_ref(orig_local->xattrs);
+ return -1;
+}
+
+static int32_t
+rda_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ if (!op_ret)
+ rda_fill_fd(frame, this, fd);
+
+ RDA_STACK_UNWIND(opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+static int32_t
+rda_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ int op_errno = 0;
+ struct rda_local *local = NULL;
+
+ if (xdata) {
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto unwind;
}
- } else {
- nframe = ctx->fill_frame;
- local = nframe->local;
- }
- local->offset = offset;
+ /*
+ * Retrieve list of keys set by md-cache xlator and store it
+ * in local to be consumed in rda_opendir_cbk
+ */
+ local->xattrs = dict_copy_with_ref(xdata, NULL);
+ frame->local = local;
+ }
+
+ STACK_WIND(frame, rda_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(opendir, frame, -1, op_errno, fd, xdata);
+ return 0;
+}
+
+static int32_t
+rda_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
- UNLOCK(&ctx->lock);
+ if (op_ret < 0)
+ goto unwind;
- STACK_WIND(nframe, rda_fill_fd_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd,
- priv->rda_req_size, offset, ctx->xattrs);
+ local = frame->local;
- return 0;
+ rda_mark_inode_dirty(this, local->inode);
-err:
- if (nframe)
- FRAME_DESTROY(nframe);
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
- return -1;
+unwind:
+ RDA_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
}
+static int32_t
+rda_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(writev, frame, this, fd->inode, xdata, fd,
+ vector, count, off, flags, iobref);
+ return 0;
+}
-static int
-rda_unpack_mdc_loaded_keys_to_dict(char *payload, dict_t *dict)
+static int32_t
+rda_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- int ret = -1;
- char *mdc_key = NULL;
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
- if (!payload || !dict) {
- goto out;
- }
+ if (op_ret < 0)
+ goto unwind;
- mdc_key = strtok(payload, " ");
- while (mdc_key != NULL) {
- ret = dict_set_int8 (dict, mdc_key, 0);
- if (ret) {
- goto out;
- }
- mdc_key = strtok(NULL, " ");
- }
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
-out:
- return ret;
+unwind:
+ RDA_STACK_UNWIND(fallocate, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
}
+static int32_t
+rda_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(fallocate, frame, this, fd->inode, xdata, fd,
+ keep_size, offset, len);
+ return 0;
+}
static int32_t
-rda_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+rda_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- struct rda_local *local = frame->local;
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
- if (!op_ret)
- rda_fill_fd(frame, this, fd);
+ if (op_ret < 0)
+ goto unwind;
- frame->local = NULL;
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
- STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata);
+unwind:
+ RDA_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
+}
- if (local && local->xattrs) {
- /* unref for dict_new() done in rda_opendir */
- dict_unref (local->xattrs);
- local->xattrs = NULL;
- }
+static int32_t
+rda_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(zerofill, frame, this, fd->inode, xdata, fd,
+ offset, len);
+ return 0;
+}
+
+static int32_t
+rda_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
- if (local)
- mem_put (local);
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
- return 0;
+unwind:
+ RDA_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
}
static int32_t
-rda_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
- dict_t *xdata)
-{
- int ret = -1;
- int op_errno = 0;
- char *payload = NULL;
- struct rda_local *local = NULL;
- dict_t *xdata_from_req = NULL;
-
- if (xdata) {
- /*
- * Retrieve list of keys set by md-cache xlator and store it
- * in local to be consumed in rda_opendir_cbk
- */
- ret = dict_get_str (xdata, GF_MDC_LOADED_KEY_NAMES, &payload);
- if (ret)
- goto wind;
-
- xdata_from_req = dict_new();
- if (!xdata_from_req) {
- op_errno = ENOMEM;
- goto unwind;
- }
+rda_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(discard, frame, this, fd->inode, xdata, fd,
+ offset, len);
+ return 0;
+}
- ret = rda_unpack_mdc_loaded_keys_to_dict((char *) payload,
- xdata_from_req);
- if (ret) {
- dict_unref(xdata_from_req);
- goto wind;
- }
+static int32_t
+rda_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
- local = mem_get0(this->local_pool);
- if (!local) {
- dict_unref(xdata_from_req);
- op_errno = ENOMEM;
- goto unwind;
- }
+ if (op_ret < 0)
+ goto unwind;
- local->xattrs = xdata_from_req;
- frame->local = local;
- }
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
-wind:
- if (xdata)
- /* Remove the key after consumption. */
- dict_del (xdata, GF_MDC_LOADED_KEY_NAMES);
+unwind:
+ RDA_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
+}
- STACK_WIND(frame, rda_opendir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
- return 0;
+static int32_t
+rda_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(ftruncate, frame, this, fd->inode, xdata, fd,
+ offset);
+ return 0;
+}
+
+static int32_t
+rda_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, postbuf, &postbuf_out,
+ local->generation);
unwind:
- STACK_UNWIND_STRICT(opendir, frame, -1, op_errno, fd, xdata);
- return 0;
+ RDA_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(truncate, frame, this, loc->inode, xdata, loc,
+ offset);
+ return 0;
+}
+
+static int32_t
+rda_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL,
+ local->generation);
+unwind:
+ RDA_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+rda_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(setxattr, frame, this, loc->inode, xdata, loc,
+ dict, flags);
+ return 0;
+}
+
+static int32_t
+rda_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL,
+ local->generation);
+unwind:
+ RDA_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+rda_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(fsetxattr, frame, this, fd->inode, xdata, fd,
+ dict, flags);
+ return 0;
+}
+
+static int32_t
+rda_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, statpost, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(setattr, frame, op_ret, op_errno, statpre, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(setattr, frame, this, loc->inode, xdata, loc,
+ stbuf, valid);
+ return 0;
+}
+
+static int32_t
+rda_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+ struct iatt postbuf_out = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, statpost, &postbuf_out,
+ local->generation);
+
+unwind:
+ RDA_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, statpre, &postbuf_out,
+ xdata);
+ return 0;
+}
+
+static int32_t
+rda_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(fsetattr, frame, this, fd->inode, xdata, fd,
+ stbuf, valid);
+ return 0;
+}
+
+static int32_t
+rda_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL,
+ local->generation);
+unwind:
+ RDA_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+rda_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(removexattr, frame, this, loc->inode, xdata,
+ loc, name);
+ return 0;
+}
+
+static int32_t
+rda_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct rda_local *local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ local = frame->local;
+ rda_mark_inode_dirty(this, local->inode);
+ rda_inode_ctx_update_iatts(local->inode, this, NULL, NULL,
+ local->generation);
+unwind:
+ RDA_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+rda_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ RDA_COMMON_MODIFICATION_FOP(fremovexattr, frame, this, fd->inode, xdata, fd,
+ name);
+ return 0;
}
static int32_t
rda_releasedir(xlator_t *this, fd_t *fd)
{
- uint64_t val;
- struct rda_fd_ctx *ctx;
+ uint64_t val;
+ struct rda_fd_ctx *ctx;
+
+ if (fd_ctx_del(fd, this, &val) < 0)
+ return -1;
+
+ ctx = (struct rda_fd_ctx *)(uintptr_t)val;
+ if (!ctx)
+ return 0;
+
+ rda_reset_ctx(this, ctx);
+
+ if (ctx->fill_frame)
+ STACK_DESTROY(ctx->fill_frame->root);
+
+ if (ctx->stub)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ READDIR_AHEAD_MSG_DIR_RELEASE_PENDING_STUB,
+ "released a directory with a pending stub");
- if (fd_ctx_del(fd, this, &val) < 0)
- return -1;
+ GF_FREE(ctx);
+ return 0;
+}
- ctx = (struct rda_fd_ctx *) val;
- if (!ctx)
- return 0;
+static int
+rda_forget(xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_uint = 0;
+ rda_inode_ctx_t *ctx = NULL;
- rda_reset_ctx(ctx);
+ inode_ctx_del1(inode, this, &ctx_uint);
+ if (!ctx_uint)
+ return 0;
- if (ctx->fill_frame)
- STACK_DESTROY(ctx->fill_frame->root);
+ ctx = (rda_inode_ctx_t *)(uintptr_t)ctx_uint;
- if (ctx->stub)
- gf_msg(this->name, GF_LOG_ERROR, 0,
- READDIR_AHEAD_MSG_DIR_RELEASE_PENDING_STUB,
- "released a directory with a pending stub");
+ GF_FREE(ctx);
- GF_FREE(ctx);
- return 0;
+ return 0;
}
int32_t
mem_acct_init(xlator_t *this)
{
- int ret = -1;
+ int ret = -1;
- if (!this)
- goto out;
+ if (!this)
+ goto out;
- ret = xlator_mem_acct_init(this, gf_rda_mt_end + 1);
+ ret = xlator_mem_acct_init(this, gf_rda_mt_end + 1);
- if (ret != 0)
- gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
- READDIR_AHEAD_MSG_NO_MEMORY, "Memory accounting init"
- "failed");
+ if (ret != 0)
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, READDIR_AHEAD_MSG_NO_MEMORY,
+ "Memory accounting init"
+ "failed");
out:
- return ret;
+ return ret;
}
int
reconfigure(xlator_t *this, dict_t *options)
{
- struct rda_priv *priv = this->private;
-
- GF_OPTION_RECONF("rda-request-size", priv->rda_req_size, options,
- uint32, err);
- GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size_uint64,
- err);
- GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options, size_uint64,
- err);
-
- return 0;
+ struct rda_priv *priv = this->private;
+
+ GF_OPTION_RECONF("rda-request-size", priv->rda_req_size, options,
+ size_uint64, err);
+ GF_OPTION_RECONF("rda-low-wmark", priv->rda_low_wmark, options, size_uint64,
+ err);
+ GF_OPTION_RECONF("rda-high-wmark", priv->rda_high_wmark, options,
+ size_uint64, err);
+ GF_OPTION_RECONF("rda-cache-limit", priv->rda_cache_limit, options,
+ size_uint64, err);
+ GF_OPTION_RECONF("parallel-readdir", priv->parallel_readdir, options, bool,
+ err);
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, err);
+
+ return 0;
err:
- return -1;
+ return -1;
}
int
init(xlator_t *this)
{
- struct rda_priv *priv = NULL;
+ struct rda_priv *priv = NULL;
- GF_VALIDATE_OR_GOTO("readdir-ahead", this, err);
+ GF_VALIDATE_OR_GOTO("readdir-ahead", this, err);
- if (!this->children || this->children->next) {
- gf_msg(this->name, GF_LOG_ERROR, 0,
- READDIR_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED,
- "FATAL: readdir-ahead not configured with exactly one"
- " child");
- goto err;
- }
+ if (!this->children || this->children->next) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ READDIR_AHEAD_MSG_XLATOR_CHILD_MISCONFIGURED,
+ "FATAL: readdir-ahead not configured with exactly one"
+ " child");
+ goto err;
+ }
- if (!this->parents) {
- gf_msg(this->name, GF_LOG_WARNING, 0,
- READDIR_AHEAD_MSG_VOL_MISCONFIGURED,
- "dangling volume. check volfile ");
- }
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ READDIR_AHEAD_MSG_VOL_MISCONFIGURED,
+ "dangling volume. check volfile ");
+ }
- priv = GF_CALLOC(1, sizeof(struct rda_priv), gf_rda_mt_rda_priv);
- if (!priv)
- goto err;
- this->private = priv;
+ priv = GF_CALLOC(1, sizeof(struct rda_priv), gf_rda_mt_rda_priv);
+ if (!priv)
+ goto err;
+ this->private = priv;
- this->local_pool = mem_pool_new(struct rda_local, 32);
- if (!this->local_pool)
- goto err;
+ GF_ATOMIC_INIT(priv->rda_cache_size, 0);
- GF_OPTION_INIT("rda-request-size", priv->rda_req_size, uint32, err);
- GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size_uint64, err);
- GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size_uint64, err);
+ this->local_pool = mem_pool_new(struct rda_local, 32);
+ if (!this->local_pool)
+ goto err;
- return 0;
+ GF_OPTION_INIT("rda-request-size", priv->rda_req_size, size_uint64, err);
+ GF_OPTION_INIT("rda-low-wmark", priv->rda_low_wmark, size_uint64, err);
+ GF_OPTION_INIT("rda-high-wmark", priv->rda_high_wmark, size_uint64, err);
+ GF_OPTION_INIT("rda-cache-limit", priv->rda_cache_limit, size_uint64, err);
+ GF_OPTION_INIT("parallel-readdir", priv->parallel_readdir, bool, err);
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, err);
+
+ return 0;
err:
- if (this->local_pool)
- mem_pool_destroy(this->local_pool);
- if (priv)
- GF_FREE(priv);
+ if (this->local_pool)
+ mem_pool_destroy(this->local_pool);
+ if (priv)
+ GF_FREE(priv);
- return -1;
+ return -1;
}
-
void
fini(xlator_t *this)
{
- GF_VALIDATE_OR_GOTO ("readdir-ahead", this, out);
+ GF_VALIDATE_OR_GOTO("readdir-ahead", this, out);
- GF_FREE(this->private);
+ GF_FREE(this->private);
out:
- return;
+ return;
}
struct xlator_fops fops = {
- .opendir = rda_opendir,
- .readdirp = rda_readdirp,
+ .opendir = rda_opendir,
+ .readdirp = rda_readdirp,
+ /* inode write */
+ /* TODO: invalidate a dentry's stats if its pointing to a directory
+ * when entry operations happen in that directory
+ */
+ .writev = rda_writev,
+ .truncate = rda_truncate,
+ .ftruncate = rda_ftruncate,
+ .fallocate = rda_fallocate,
+ .discard = rda_discard,
+ .zerofill = rda_zerofill,
+ /* metadata write */
+ .setxattr = rda_setxattr,
+ .fsetxattr = rda_fsetxattr,
+ .setattr = rda_setattr,
+ .fsetattr = rda_fsetattr,
+ .removexattr = rda_removexattr,
+ .fremovexattr = rda_fremovexattr,
};
struct xlator_cbks cbks = {
- .releasedir = rda_releasedir,
+ .releasedir = rda_releasedir,
+ .forget = rda_forget,
};
struct volume_options options[] = {
- { .key = {"rda-request-size"},
- .type = GF_OPTION_TYPE_INT,
- .min = 4096,
- .max = 131072,
- .default_value = "131072",
- .description = "readdir-ahead request size",
- },
- { .key = {"rda-low-wmark"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 0,
- .max = 10 * GF_UNIT_MB,
- .default_value = "4096",
- .description = "the value under which we plug",
- },
- { .key = {"rda-high-wmark"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 0,
- .max = 100 * GF_UNIT_MB,
- .default_value = "131072",
- .description = "the value over which we unplug",
- },
- { .key = {NULL} },
+ {
+ .key = {"readdir-ahead"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable readdir-ahead",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {
+ .key = {"rda-request-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 4096,
+ .max = 131072,
+ .default_value = "131072",
+ .description = "size of buffer in readdirp calls initiated by "
+ "readdir-ahead ",
+ },
+ {
+ .key = {"rda-low-wmark"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = 10 * GF_UNIT_MB,
+ .default_value = "4096",
+ .description = "the value under which readdir-ahead plugs",
+ },
+ {
+ .key = {"rda-high-wmark"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = 100 * GF_UNIT_MB,
+ .default_value = "128KB",
+ .description = "the value over which readdir-ahead unplugs",
+ },
+ {
+ .key = {"rda-cache-limit"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .min = 0,
+ .max = INFINITY,
+ .default_value = "10MB",
+ .description = "maximum size of cache consumed by readdir-ahead "
+ "xlator. This value is global and total memory "
+ "consumption by readdir-ahead is capped by this "
+ "value, irrespective of the number/size of "
+ "directories cached",
+ },
+ {.key = {"parallel-readdir"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .op_version = {GD_OP_VERSION_3_10_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .default_value = "off",
+ .description = "If this option is enabled, the readdir operation "
+ "is performed in parallel on all the bricks, thus "
+ "improving the performance of readdir. Note that "
+ "the performance improvement is higher in large "
+ "clusters"},
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"readdir-ahead"},
+ .description = "Enable/Disable readdir ahead translator"},
+ {.key = {NULL}},
};
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "readdir-ahead",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/performance/readdir-ahead/src/readdir-ahead.h b/xlators/performance/readdir-ahead/src/readdir-ahead.h
index f030f10a0af..619c41059ff 100644
--- a/xlators/performance/readdir-ahead/src/readdir-ahead.h
+++ b/xlators/performance/readdir-ahead/src/readdir-ahead.h
@@ -12,37 +12,87 @@
#define __READDIR_AHEAD_H
/* state flags */
-#define RDA_FD_NEW (1 << 0)
-#define RDA_FD_RUNNING (1 << 1)
-#define RDA_FD_EOD (1 << 2)
-#define RDA_FD_ERROR (1 << 3)
-#define RDA_FD_BYPASS (1 << 4)
-#define RDA_FD_PLUGGED (1 << 5)
+#define RDA_FD_NEW (1 << 0)
+#define RDA_FD_RUNNING (1 << 1)
+#define RDA_FD_EOD (1 << 2)
+#define RDA_FD_ERROR (1 << 3)
+#define RDA_FD_BYPASS (1 << 4)
+#define RDA_FD_PLUGGED (1 << 5)
+
+#define RDA_COMMON_MODIFICATION_FOP(name, frame, this, __inode, __xdata, \
+ args...) \
+ do { \
+ struct rda_local *__local = NULL; \
+ rda_inode_ctx_t *ctx_p = NULL; \
+ \
+ __local = mem_get0(this->local_pool); \
+ __local->inode = inode_ref(__inode); \
+ LOCK(&__inode->lock); \
+ { \
+ ctx_p = __rda_inode_ctx_get(__inode, this); \
+ } \
+ UNLOCK(&__inode->lock); \
+ __local->generation = GF_ATOMIC_GET(ctx_p->generation); \
+ \
+ frame->local = __local; \
+ if (__xdata) \
+ __local->xattrs = dict_ref(__xdata); \
+ \
+ STACK_WIND(frame, rda_##name##_cbk, FIRST_CHILD(this), \
+ FIRST_CHILD(this)->fops->name, args, __xdata); \
+ } while (0)
+
+#define RDA_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ struct rda_local *__local = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ if (__local) { \
+ rda_local_wipe(__local); \
+ mem_put(__local); \
+ } \
+ } while (0)
struct rda_fd_ctx {
- off_t cur_offset; /* current head of the ctx */
- size_t cur_size; /* current size of the preload */
- off_t next_offset; /* tail of the ctx */
- uint32_t state;
- gf_lock_t lock;
- gf_dirent_t entries;
- call_frame_t *fill_frame;
- call_stub_t *stub;
- int op_errno;
- dict_t *xattrs; /* md-cache keys to be sent in readdirp() */
+ off_t cur_offset; /* current head of the ctx */
+ size_t cur_size; /* current size of the preload */
+ off_t next_offset; /* tail of the ctx */
+ uint32_t state;
+ gf_lock_t lock;
+ gf_dirent_t entries;
+ call_frame_t *fill_frame;
+ call_stub_t *stub;
+ int op_errno;
+ dict_t *xattrs; /* md-cache keys to be sent in readdirp() */
+ dict_t *writes_during_prefetch;
+ gf_atomic_t prefetching;
};
struct rda_local {
- struct rda_fd_ctx *ctx;
- fd_t *fd;
- off_t offset;
- dict_t *xattrs; /* md-cache keys to be sent in readdirp() */
+ struct rda_fd_ctx *ctx;
+ fd_t *fd;
+ dict_t *xattrs; /* md-cache keys to be sent in readdirp() */
+ inode_t *inode;
+ off_t offset;
+ uint64_t generation;
+ int32_t skip_dir;
};
struct rda_priv {
- uint32_t rda_req_size;
- uint64_t rda_low_wmark;
- uint64_t rda_high_wmark;
+ uint64_t rda_req_size;
+ uint64_t rda_low_wmark;
+ uint64_t rda_high_wmark;
+ uint64_t rda_cache_limit;
+ gf_atomic_t rda_cache_size;
+ gf_boolean_t parallel_readdir;
};
+typedef struct rda_inode_ctx {
+ struct iatt statbuf;
+ gf_atomic_t generation;
+} rda_inode_ctx_t;
+
#endif /* __READDIR_AHEAD_H */