summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAnand Avati <avati@redhat.com>2012-09-13 22:26:59 -0700
committerAnand Avati <avati@redhat.com>2012-10-01 12:45:18 -0700
commitc903de38da917239fe905fc6efa1f413d120fc04 (patch)
tree6387677f13c700305e704dad7d274f656526287b
parentdd8eb06e26138e149252365043706067782416af (diff)
write-behind: implement causal ordering and other cleanup
Rules of causal ordering implemented: - If request A arrives after the acknowledgement (to the app, i.e, STACK_UNWIND) of another request B, then request B is said to have 'caused' request A. - (corollary) Two requests, which at any point of time, are unacknowledged simultaneously in the system can never 'cause' each other (wb_inode->gen is based on this) - If request A is caused by request B, AND request A's region has an overlap with request B's region, then then the fulfillment of request A is guaranteed to happen after the fulfillment of B. - FD of origin is not considered for the determination of causal ordering. - Append operation's region is considered the whole file. Other cleanup: - wb_file_t not required any more. - wb_local_t not required any more. - O_RDONLY fd's operations now go through the queue to make sure writes in the requested region get fulfilled before getting processed. - O_SYNC fd's operations now go through the queue to make sure previously acknowledged writes on the file (via other fds) are fulfilled before getting processed. - Option to not honor O_SYNC is now removed. - Option to ignore O_DIRECT is added (useful when running a VM and the drive appears with NCQ/TCQ or WCE=1 for the guest.) - Option to disable_first_nbytes is removed (as the cause of the bug which required this was diagnosed to be missing TCP_NODELAY.) - General cleanup and better conformance to coding style and convention. Change-Id: Ib44fb72da3727246b4a85174cb568c2f0231f6de BUG: 857673 Signed-off-by: Anand Avati <avati@redhat.com> Reviewed-on: http://review.gluster.org/3947 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-by: Amar Tumballi <amarts@redhat.com>
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c2
-rw-r--r--xlators/performance/write-behind/src/write-behind.c3486
2 files changed, 1070 insertions, 2418 deletions
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 685470d6473..acfe8386515 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -164,6 +164,8 @@ static struct volopt_map_entry glusterd_volopt_map[] = {
{"performance.disk-usage-limit", "performance/quota", NULL, NULL, NO_DOC, 0},
{"performance.min-free-disk-limit", "performance/quota", NULL, NULL, NO_DOC, 0},
{"performance.write-behind-window-size", "performance/write-behind", "cache-size", NULL, DOC},
+ {"performance.strict-o-direct", "performance/write-behind", "strict-O_DIRECT", NULL, DOC},
+ {"performance.strict-write-ordering", "performance/write-behind", "strict-write-ordering", NULL, DOC},
{"performance.read-ahead-page-count", "performance/read-ahead", "page-count", NULL, DOC},
{"network.frame-timeout", "protocol/client", NULL, NULL, NO_DOC, 0},
diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
index ad1e5f03111..53506d948ba 100644
--- a/xlators/performance/write-behind/src/write-behind.c
+++ b/xlators/performance/write-behind/src/write-behind.c
@@ -8,8 +8,6 @@
cases as published by the Free Software Foundation.
*/
-/*TODO: check for non null wb_file_data before getting wb_file */
-
#ifndef _CONFIG_H
#define _CONFIG_H
@@ -26,6 +24,7 @@
#include "common-utils.h"
#include "call-stub.h"
#include "statedump.h"
+#include "defaults.h"
#include "write-behind-mem-types.h"
#define MAX_VECTOR_COUNT 8
@@ -34,96 +33,146 @@
typedef struct list_head list_head_t;
struct wb_conf;
-struct wb_page;
struct wb_inode;
typedef struct wb_inode {
- size_t window_conf;
- size_t window_current;
- size_t aggregate_current;
- int32_t op_ret;
+ ssize_t window_conf;
+ ssize_t window_current;
+ ssize_t transit; /* size of data stack_wound, and yet
+ to be fulfilled (wb_fulfill_cbk).
+ used for trickling_writes
+ */
+
+ int32_t op_ret; /* Last found op_ret and op_errno
+ while completing a liability
+ operation. Will be picked by
+ the next arriving writev/flush/fsync
+ */
int32_t op_errno;
- list_head_t request;
- list_head_t passive_requests;
+
+ list_head_t all; /* All requests, from enqueue() till destroy().
+ Used only for resetting generation
+ number when empty.
+ */
+ list_head_t todo; /* Work to do (i.e, STACK_WIND to server).
+ Once we STACK_WIND, the entry is taken
+ off the list. If it is non-sync write,
+ then we continue to track it via @liability
+ or @temptation depending on the status
+ of its writeback.
+ */
+ list_head_t liability; /* Non-sync writes which are lied
+ (STACK_UNWIND'ed to caller) but ack
+ from server not yet complete. This
+ is the "liability" which we hold, and
+ must guarantee that dependent operations
+ which arrive later (which overlap, etc.)
+ are issued only after their dependencies
+ in this list are "fulfilled".
+
+ Server acks for entries in this list
+ shrinks the window.
+
+ The sum total of all req->write_size
+ of entries in this list must be kept less
+ than the permitted window size.
+ */
+ list_head_t temptation; /* Operations for which we are tempted
+ to 'lie' (write-behind), but temporarily
+ holding off (because of insufficient
+ window capacity, etc.)
+
+ This is the list to look at to grow
+ the window (in __wb_pick_unwinds()).
+
+ Entries typically get chosen from
+ write-behind from this list, and therefore
+ get "upgraded" to the "liability" list.
+ */
+ uint64_t gen; /* Liability generation number. Represents
+ the current 'state' of liability. Every
+ new addition to the liability list bumps
+ the generation number.
+
+ a newly arrived request is only required
+ to perform causal checks against the entries
+ in the liability list which were present
+ at the time of its addition. the generation
+ number at the time of its addition is stored
+ in the request and used during checks.
+
+ the liability list can grow while the request
+ waits in the todo list waiting for its
+ dependent operations to complete. however
+ it is not of the request's concern to depend
+ itself on those new entries which arrived
+ after it arrived (i.e, those that have a
+ liability generation higher than itself)
+ */
gf_lock_t lock;
xlator_t *this;
-}wb_inode_t;
-
-typedef struct wb_file {
- int32_t flags;
- int disabled;
- fd_t *fd;
- size_t disable_till;
- enum _gf_boolean dont_wind;
-} wb_file_t;
+} wb_inode_t;
typedef struct wb_request {
- list_head_t list;
+ list_head_t all;
+ list_head_t todo;
+ list_head_t lie; /* either in @liability or @temptation */
list_head_t winds;
list_head_t unwinds;
- list_head_t other_requests;
+
call_stub_t *stub;
- size_t write_size;
+
+ size_t write_size; /* currently held size
+ (after collapsing) */
+ size_t orig_size; /* size which arrived with the request.
+ This is the size by which we grow
+ the window when unwinding the frame.
+ */
+ size_t total_size; /* valid only in @head in wb_fulfill().
+ This is the size with which we perform
+ STACK_WIND to server and therefore the
+ amount by which we shrink the window.
+ */
+
+ int op_ret;
+ int op_errno;
+
int32_t refcount;
wb_inode_t *wb_inode;
glusterfs_fop_t fop;
gf_lkowner_t lk_owner;
- union {
- struct {
- char write_behind;
- char stack_wound;
- char got_reply;
- char virgin;
- char flush_all; /* while trying to sync to back-end,
- * don't wait till a data of size
- * equal to configured aggregate-size
- * is accumulated, instead sync
- * whatever data currently present in
- * request queue.
- */
-
- }write_request;
-
- struct {
- char marked_for_resume;
- }other_requests;
- }flags;
+ struct iobref *iobref;
+ uint64_t gen; /* inode liability state at the time of
+ request arrival */
+
+ fd_t *fd;
+ struct {
+ size_t size; /* 0 size == till infinity */
+ off_t off;
+ int append:1; /* offset is invalid. only one
+ outstanding append at a time */
+ int tempted:1; /* true only for non-sync writes */
+ int lied:1; /* sin committed */
+ int fulfilled:1; /* got server acknowledgement */
+ int go:1; /* enough aggregating, good to go */
+ } ordering;
} wb_request_t;
-struct wb_conf {
+
+typedef struct wb_conf {
uint64_t aggregate_size;
uint64_t window_size;
- uint64_t disable_till;
- gf_boolean_t enable_O_SYNC;
gf_boolean_t flush_behind;
- gf_boolean_t enable_trickling_writes;
-};
+ gf_boolean_t trickling_writes;
+ gf_boolean_t strict_write_ordering;
+ gf_boolean_t strict_O_DIRECT;
+} wb_conf_t;
-typedef struct wb_local {
- list_head_t winds;
- int32_t flags;
- fd_t *fd;
- wb_request_t *request;
- int op_ret;
- int op_errno;
- call_frame_t *frame;
- int32_t reply_count;
- wb_inode_t *wb_inode;
-} wb_local_t;
-
-typedef struct wb_conf wb_conf_t;
-typedef struct wb_page wb_page_t;
-int32_t
-wb_process_queue (call_frame_t *frame, wb_inode_t *wb_inode);
-
-ssize_t
-wb_sync (call_frame_t *frame, wb_inode_t *wb_inode, list_head_t *winds);
+void
+wb_process_queue (wb_inode_t *wb_inode);
-ssize_t
-__wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_size,
- char enable_trickling_writes);
wb_inode_t *
__wb_inode_ctx_get (xlator_t *this, inode_t *inode)
@@ -156,37 +205,6 @@ out:
}
-wb_file_t *
-__wb_fd_ctx_get (xlator_t *this, fd_t *fd)
-{
- wb_file_t *wb_file = NULL;
- uint64_t value = 0;
-
- __fd_ctx_get (fd, this, &value);
- wb_file = (wb_file_t *)(unsigned long)value;
-
- return wb_file;
-}
-
-
-wb_file_t *
-wb_fd_ctx_get (xlator_t *this, fd_t *fd)
-{
- wb_file_t *wb_file = NULL;
-
- GF_VALIDATE_OR_GOTO ("write-behind", this, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
-
- LOCK (&fd->lock);
- {
- wb_file = __wb_fd_ctx_get (this, fd);
- }
- UNLOCK (&fd->lock);
-
-out:
- return wb_file;
-}
-
/*
Below is a succinct explanation of the code deciding whether two regions
overlap, from Pavan <tcp@gluster.com>.
@@ -211,19 +229,26 @@ out:
}
*/
-static inline char
-wb_requests_overlap (wb_request_t *request1, wb_request_t *request2)
+gf_boolean_t
+wb_requests_overlap (wb_request_t *req1, wb_request_t *req2)
{
- off_t r1_start = 0, r1_end = 0, r2_start = 0, r2_end = 0;
+ off_t r1_start = 0;
+ off_t r1_end = 0;
+ off_t r2_start = 0;
+ off_t r2_end = 0;
enum _gf_boolean do_overlap = 0;
- r1_start = request1->stub->args.writev.off;
- r1_end = r1_start + iov_length (request1->stub->args.writev.vector,
- request1->stub->args.writev.count);
+ r1_start = req1->ordering.off;
+ if (req1->ordering.size)
+ r1_end = r1_start + req1->ordering.size - 1;
+ else
+ r1_end = ULLONG_MAX;
- r2_start = request2->stub->args.writev.off;
- r2_end = r2_start + iov_length (request2->stub->args.writev.vector,
- request2->stub->args.writev.count);
+ r2_start = req2->ordering.off;
+ if (req2->ordering.size)
+ r2_end = r2_start + req2->ordering.size - 1;
+ else
+ r2_end = ULLONG_MAX;
do_overlap = ((r1_end >= r2_start) && (r2_end >= r1_start));
@@ -231,72 +256,112 @@ wb_requests_overlap (wb_request_t *request1, wb_request_t *request2)
}
-static inline char
-wb_overlap (list_head_t *list, wb_request_t *request)
+gf_boolean_t
+wb_requests_conflict (wb_request_t *lie, wb_request_t *req)
{
- char overlap = 0;
- wb_request_t *tmp = NULL;
+ wb_conf_t *conf = NULL;
- GF_VALIDATE_OR_GOTO ("write-behind", list, out);
- GF_VALIDATE_OR_GOTO ("write-behind", request, out);
+ conf = req->wb_inode->this->private;
- list_for_each_entry (tmp, list, list) {
- if (tmp == request) {
- break;
- }
+ if (lie == req)
+ /* request cannot conflict with itself */
+ return _gf_false;
- overlap = wb_requests_overlap (tmp, request);
- if (overlap) {
- break;
- }
+ if (lie->gen >= req->gen)
+ /* this liability entry was behind
+ us in the todo list */
+ return _gf_false;
+
+ if (lie->ordering.append)
+ /* all modifications wait for the completion
+ of outstanding append */
+ return _gf_true;
+
+ if (conf->strict_write_ordering)
+ /* We are sure (lie->gen < req->gen) by now. So
+ skip overlap check if strict write ordering is
+ requested and always return "conflict" against a
+ lower generation lie. */
+ return _gf_true;
+
+ return wb_requests_overlap (lie, req);
+}
+
+
+gf_boolean_t
+wb_liability_has_conflict (wb_inode_t *wb_inode, wb_request_t *req)
+{
+ wb_request_t *each = NULL;
+
+ list_for_each_entry (each, &wb_inode->liability, lie) {
+ if (wb_requests_conflict (each, req))
+ return _gf_true;
}
-out:
- return overlap;
+ return _gf_false;
}
static int
-__wb_request_unref (wb_request_t *this)
+__wb_request_unref (wb_request_t *req)
{
- int ret = -1;
+ int ret = -1;
+ wb_inode_t *wb_inode = NULL;
- GF_VALIDATE_OR_GOTO ("write-behind", this, out);
+ wb_inode = req->wb_inode;
- if (this->refcount <= 0) {
+ if (req->refcount <= 0) {
gf_log ("wb-request", GF_LOG_WARNING,
- "refcount(%d) is <= 0", this->refcount);
+ "refcount(%d) is <= 0", req->refcount);
goto out;
}
- ret = --this->refcount;
- if (this->refcount == 0) {
- list_del_init (&this->list);
- if (this->stub && this->stub->fop == GF_FOP_WRITE) {
- call_stub_destroy (this->stub);
- }
+ ret = --req->refcount;
+ if (req->refcount == 0) {
+ list_del_init (&req->todo);
+ list_del_init (&req->lie);
- GF_FREE (this);
- }
+ list_del_init (&req->all);
+ if (list_empty (&wb_inode->all)) {
+ wb_inode->gen = 0;
+ /* in case of accounting errors? */
+ wb_inode->window_current = 0;
+ }
+
+ list_del_init (&req->winds);
+ list_del_init (&req->unwinds);
+
+ if (req->stub && req->ordering.tempted) {
+ call_stub_destroy (req->stub);
+ req->stub = NULL;
+ } /* else we would have call_resume()'ed */
+
+ if (req->iobref)
+ iobref_unref (req->iobref);
+ if (req->fd)
+ fd_unref (req->fd);
+
+ GF_FREE (req);
+ }
out:
return ret;
}
static int
-wb_request_unref (wb_request_t *this)
+wb_request_unref (wb_request_t *req)
{
wb_inode_t *wb_inode = NULL;
int ret = -1;
- GF_VALIDATE_OR_GOTO ("write-behind", this, out);
+ GF_VALIDATE_OR_GOTO ("write-behind", req, out);
- wb_inode = this->wb_inode;
+ wb_inode = req->wb_inode;
LOCK (&wb_inode->lock);
{
- ret = __wb_request_unref (this);
+ ret = __wb_request_unref (req);
}
UNLOCK (&wb_inode->lock);
@@ -306,117 +371,155 @@ out:
static wb_request_t *
-__wb_request_ref (wb_request_t *this)
+__wb_request_ref (wb_request_t *req)
{
- GF_VALIDATE_OR_GOTO ("write-behind", this, out);
+ GF_VALIDATE_OR_GOTO ("write-behind", req, out);
- if (this->refcount < 0) {
+ if (req->refcount < 0) {
gf_log ("wb-request", GF_LOG_WARNING,
- "refcount(%d) is < 0", this->refcount);
- this = NULL;
+ "refcount(%d) is < 0", req->refcount);
+ req = NULL;
goto out;
}
- this->refcount++;
+ req->refcount++;
out:
- return this;
+ return req;
}
wb_request_t *
-wb_request_ref (wb_request_t *this)
+wb_request_ref (wb_request_t *req)
{
wb_inode_t *wb_inode = NULL;
- GF_VALIDATE_OR_GOTO ("write-behind", this, out);
+ GF_VALIDATE_OR_GOTO ("write-behind", req, out);
- wb_inode = this->wb_inode;
+ wb_inode = req->wb_inode;
LOCK (&wb_inode->lock);
{
- this = __wb_request_ref (this);
+ req = __wb_request_ref (req);
}
UNLOCK (&wb_inode->lock);
out:
- return this;
+ return req;
}
-wb_request_t *
-wb_enqueue (wb_inode_t *wb_inode, call_stub_t *stub)
+gf_boolean_t
+wb_enqueue_common (wb_inode_t *wb_inode, call_stub_t *stub, int tempted)
{
- wb_request_t *request = NULL, *tmp = NULL;
- call_frame_t *frame = NULL;
- wb_local_t *local = NULL;
- struct iovec *vector = NULL;
- int32_t count = 0;
+ wb_request_t *req = NULL;
GF_VALIDATE_OR_GOTO ("write-behind", wb_inode, out);
GF_VALIDATE_OR_GOTO (wb_inode->this->name, stub, out);
- request = GF_CALLOC (1, sizeof (*request), gf_wb_mt_wb_request_t);
- if (request == NULL) {
+ req = GF_CALLOC (1, sizeof (*req), gf_wb_mt_wb_request_t);
+ if (!req)
goto out;
- }
- INIT_LIST_HEAD (&request->list);
- INIT_LIST_HEAD (&request->winds);
- INIT_LIST_HEAD (&request->unwinds);
- INIT_LIST_HEAD (&request->other_requests);
+ INIT_LIST_HEAD (&req->all);
+ INIT_LIST_HEAD (&req->todo);
+ INIT_LIST_HEAD (&req->lie);
+ INIT_LIST_HEAD (&req->winds);
+ INIT_LIST_HEAD (&req->unwinds);
- request->stub = stub;
- request->wb_inode = wb_inode;
- request->fop = stub->fop;
-
- frame = stub->frame;
- local = frame->local;
- if (local) {
- local->request = request;
- }
+ req->stub = stub;
+ req->wb_inode = wb_inode;
+ req->fop = stub->fop;
+ req->ordering.tempted = tempted;
if (stub->fop == GF_FOP_WRITE) {
- vector = stub->args.writev.vector;
- count = stub->args.writev.count;
+ req->write_size = iov_length (stub->args.writev.vector,
+ stub->args.writev.count);
- request->write_size = iov_length (vector, count);
- if (local) {
- local->op_ret = request->write_size;
- local->op_errno = 0;
- }
+ /* req->write_size can change as we collapse
+ small writes. But the window needs to grow
+ only by how much we acknowledge the app. so
+ copy the original size in orig_size for the
+ purpose of accounting.
+ */
+ req->orig_size = req->write_size;
+
+ /* Let's be optimistic that we can
+ lie about it
+ */
+ req->op_ret = req->write_size;
+ req->op_errno = 0;
- request->flags.write_request.virgin = 1;
+ if (stub->args.writev.fd->flags & O_APPEND)
+ req->ordering.append = 1;
}
- request->lk_owner = frame->root->lk_owner;
+ req->lk_owner = stub->frame->root->lk_owner;
+
+ switch (stub->fop) {
+ case GF_FOP_WRITE:
+ req->ordering.off = stub->args.writev.off;
+ req->ordering.size = req->write_size;
+
+ req->fd = fd_ref (stub->args.writev.fd);
+
+ break;
+ case GF_FOP_READ:
+ req->ordering.off = stub->args.readv.off;
+ req->ordering.size = stub->args.readv.size;
+
+ req->fd = fd_ref (stub->args.readv.fd);
+
+ break;
+ case GF_FOP_TRUNCATE:
+ req->ordering.off = stub->args.truncate.off;
+ req->ordering.size = 0; /* till infinity */
+ break;
+ case GF_FOP_FTRUNCATE:
+ req->ordering.off = stub->args.ftruncate.off;
+ req->ordering.size = 0; /* till infinity */
+
+ req->fd = fd_ref (stub->args.ftruncate.fd);
+
+ break;
+ default:
+ break;
+ }
LOCK (&wb_inode->lock);
{
- list_add_tail (&request->list, &wb_inode->request);
- if (stub->fop == GF_FOP_WRITE) {
- /* reference for stack winding */
- __wb_request_ref (request);
-
- /* reference for stack unwinding */
- __wb_request_ref (request);
-
- wb_inode->aggregate_current += request->write_size;
- } else {
- list_for_each_entry (tmp, &wb_inode->request, list) {
- if (tmp->stub && tmp->stub->fop
- == GF_FOP_WRITE) {
- tmp->flags.write_request.flush_all = 1;
- }
- }
-
- /*reference for resuming */
- __wb_request_ref (request);
- }
+ list_add_tail (&req->all, &wb_inode->all);
+
+ req->gen = wb_inode->gen;
+
+ list_add_tail (&req->todo, &wb_inode->todo);
+ __wb_request_ref (req); /* for wind */
+
+ if (req->ordering.tempted) {
+ list_add_tail (&req->lie, &wb_inode->temptation);
+ __wb_request_ref (req); /* for unwind */
+ }
}
UNLOCK (&wb_inode->lock);
out:
- return request;
+ if (!req)
+ return _gf_false;
+
+ return _gf_true;
+}
+
+
+gf_boolean_t
+wb_enqueue (wb_inode_t *wb_inode, call_stub_t *stub)
+{
+ return wb_enqueue_common (wb_inode, stub, 0);
+}
+
+
+gf_boolean_t
+wb_enqueue_tempted (wb_inode_t *wb_inode, call_stub_t *stub)
+{
+ return wb_enqueue_common (wb_inode, stub, 1);
}
@@ -426,18 +529,18 @@ __wb_inode_create (xlator_t *this, inode_t *inode)
wb_inode_t *wb_inode = NULL;
wb_conf_t *conf = NULL;
- GF_VALIDATE_OR_GOTO ("write-behind", this, out);
GF_VALIDATE_OR_GOTO (this->name, inode, out);
conf = this->private;
wb_inode = GF_CALLOC (1, sizeof (*wb_inode), gf_wb_mt_wb_inode_t);
- if (wb_inode == NULL) {
+ if (!wb_inode)
goto out;
- }
- INIT_LIST_HEAD (&wb_inode->request);
- INIT_LIST_HEAD (&wb_inode->passive_requests);
+ INIT_LIST_HEAD (&wb_inode->all);
+ INIT_LIST_HEAD (&wb_inode->todo);
+ INIT_LIST_HEAD (&wb_inode->liability);
+ INIT_LIST_HEAD (&wb_inode->temptation);
wb_inode->this = this;
@@ -452,58 +555,18 @@ out:
}
-wb_file_t *
-wb_file_create (xlator_t *this, fd_t *fd, int32_t flags)
-{
- wb_file_t *file = NULL;
- wb_conf_t *conf = NULL;
-
- GF_VALIDATE_OR_GOTO ("write-behind", this, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
-
- conf = this->private;
-
- file = GF_CALLOC (1, sizeof (*file), gf_wb_mt_wb_file_t);
- if (file == NULL) {
- goto out;
- }
-
- /*
- fd_ref() not required, file should never decide the existence of
- an fd
- */
- file->fd= fd;
- /* If O_DIRECT then, we disable chaching */
- if (((flags & O_DIRECT) == O_DIRECT)
- || ((flags & O_ACCMODE) == O_RDONLY)
- || (((flags & O_SYNC) == O_SYNC)
- && conf->enable_O_SYNC == _gf_true)) {
- file->disabled = 1;
- }
-
- file->flags = flags;
-
- fd_ctx_set (fd, this, (uint64_t)(unsigned long)file);
-
-out:
- return file;
-}
-
-
wb_inode_t *
wb_inode_create (xlator_t *this, inode_t *inode)
{
wb_inode_t *wb_inode = NULL;
- GF_VALIDATE_OR_GOTO ("write-behind", this, out);
GF_VALIDATE_OR_GOTO (this->name, inode, out);
LOCK (&inode->lock);
{
wb_inode = __wb_inode_ctx_get (this, inode);
- if (wb_inode == NULL) {
+ if (!wb_inode)
wb_inode = __wb_inode_create (this, inode);
- }
}
UNLOCK (&inode->lock);
@@ -524,2390 +587,1049 @@ out:
}
-int32_t
-wb_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
+void
+__wb_fulfill_request (wb_request_t *req)
{
- wb_inode_t *wb_inode = NULL;
+ wb_inode_t *wb_inode = NULL;
- if (op_ret < 0) {
- goto unwind;
- }
+ wb_inode = req->wb_inode;
- wb_inode = wb_inode_create (this, inode);
- if (wb_inode == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
+ req->ordering.fulfilled = 1;
+ wb_inode->window_current -= req->total_size;
+ wb_inode->transit -= req->total_size;
-unwind:
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
- dict, postparent);
+ if (!req->ordering.lied) {
+ /* TODO: fail the req->frame with error if
+ necessary
+ */
+ }
- return 0;
+ __wb_request_unref (req);
}
-int32_t
-wb_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata)
+void
+wb_head_done (wb_request_t *head)
{
- STACK_WIND (frame, wb_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
- return 0;
-}
-
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ wb_inode_t *wb_inode = NULL;
-int32_t
-wb_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
- dict_t *xdata)
-{
- wb_local_t *local = NULL;
- list_head_t *winds = NULL;
- wb_inode_t *wb_inode = NULL;
- wb_request_t *request = NULL, *dummy = NULL;
- wb_local_t *per_request_local = NULL;
- int32_t ret = -1;
- int32_t total_write_size = 0;
- fd_t *fd = NULL;
+ wb_inode = head->wb_inode;
- GF_ASSERT (frame);
- GF_ASSERT (this);
+ LOCK (&wb_inode->lock);
+ {
+ list_for_each_entry_safe (req, tmp, &head->winds, winds) {
+ __wb_fulfill_request (req);
+ }
+ __wb_fulfill_request (head);
+ }
+ UNLOCK (&wb_inode->lock);
+}
- local = frame->local;
- winds = &local->winds;
- fd = local->fd;
+void
+wb_inode_err (wb_inode_t *wb_inode, int op_errno)
+{
+ LOCK (&wb_inode->lock);
+ {
+ wb_inode->op_ret = -1;
+ wb_inode->op_errno = op_errno;
+ }
+ UNLOCK (&wb_inode->lock);
+}
- wb_inode = wb_inode_ctx_get (this, fd->inode);
- GF_VALIDATE_OR_GOTO (this->name, wb_inode, out);
- LOCK (&wb_inode->lock);
- {
- list_for_each_entry_safe (request, dummy, winds, winds) {
- request->flags.write_request.got_reply = 1;
-
- if (!request->flags.write_request.write_behind
- && (op_ret == -1)) {
- per_request_local = request->stub->frame->local;
- per_request_local->op_ret = op_ret;
- per_request_local->op_errno = op_errno;
- }
-
- if (request->flags.write_request.write_behind) {
- wb_inode->window_current -= request->write_size;
- total_write_size += request->write_size;
- }
-
- __wb_request_unref (request);
- }
+int
+wb_fulfill_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ wb_inode_t *wb_inode = NULL;
+ wb_request_t *head = NULL;
- if (op_ret == -1) {
- wb_inode->op_ret = op_ret;
- wb_inode->op_errno = op_errno;
- } else if (op_ret < total_write_size) {
- /*
- * We've encountered a short write, for whatever reason.
- * Set an EIO error for the next fop. This should be
- * valid for writev or flush (close).
- *
- * TODO: Retry the write so we can potentially capture
- * a real error condition (i.e., ENOSPC).
- */
- wb_inode->op_ret = -1;
- wb_inode->op_errno = EIO;
- }
- }
- UNLOCK (&wb_inode->lock);
+ head = frame->local;
+ frame->local = NULL;
- ret = wb_process_queue (frame, wb_inode);
- if (ret == -1) {
- if (errno == ENOMEM) {
- LOCK (&wb_inode->lock);
- {
- wb_inode->op_ret = -1;
- wb_inode->op_errno = ENOMEM;
- }
- UNLOCK (&wb_inode->lock);
- }
+ wb_inode = head->wb_inode;
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
+ if (op_ret == -1) {
+ wb_inode_err (wb_inode, op_errno);
+ } else if (op_ret < head->total_size) {
+ /*
+ * We've encountered a short write, for whatever reason.
+ * Set an EIO error for the next fop. This should be
+ * valid for writev or flush (close).
+ *
+ * TODO: Retry the write so we can potentially capture
+ * a real error condition (i.e., ENOSPC).
+ */
+ wb_inode_err (wb_inode, EIO);
+ }
- /* safe place to do fd_unref */
- fd_unref (fd);
+ wb_head_done (head);
- frame->local = NULL;
-
- if (local != NULL) {
- mem_put (frame->local);
- }
+ wb_process_queue (wb_inode);
STACK_DESTROY (frame->root);
-out:
return 0;
}
-ssize_t
-wb_sync (call_frame_t *frame, wb_inode_t *wb_inode, list_head_t *winds)
-{
- wb_request_t *dummy = NULL, *request = NULL;
- wb_request_t *first_request = NULL, *next = NULL;
- size_t total_count = 0, count = 0;
- size_t copied = 0;
- call_frame_t *sync_frame = NULL;
- struct iobref *iobref = NULL;
- wb_local_t *local = NULL;
- struct iovec *vector = NULL;
- ssize_t current_size = 0, bytes = 0;
- size_t bytecount = 0;
- wb_conf_t *conf = NULL;
- fd_t *fd = NULL;
- int32_t op_errno = -1;
- off_t next_offset_expected = 0;
- gf_lkowner_t lk_owner = {0, };
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR ((wb_inode ? wb_inode->this->name
- : "write-behind"), frame,
- out, bytes, -1);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, wb_inode, out, bytes,
- -1);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, winds, out, bytes,
- -1);
-
- conf = wb_inode->this->private;
- list_for_each_entry (request, winds, winds) {
- total_count += request->stub->args.writev.count;
- if (total_count > 0) {
- break;
- }
- }
+#define WB_IOV_LOAD(vec, cnt, req, head) do { \
+ memcpy (&vec[cnt], req->stub->args.writev.vector, \
+ (req->stub->args.writev.count * sizeof(vec[0]))); \
+ cnt += req->stub->args.writev.count; \
+ head->total_size += req->write_size; \
+ } while (0)
- if (total_count == 0) {
- gf_log (wb_inode->this->name, GF_LOG_TRACE,
- "no vectors are to be synced");
- goto out;
- }
- list_for_each_entry_safe (request, dummy, winds, winds) {
- if (!vector) {
- vector = GF_MALLOC (VECTORSIZE (MAX_VECTOR_COUNT),
- gf_wb_mt_iovec);
- if (vector == NULL) {
- bytes = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- iobref = iobref_new ();
- if (iobref == NULL) {
- bytes = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- local = mem_get0 (THIS->local_pool);
- if (local == NULL) {
- bytes = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- INIT_LIST_HEAD (&local->winds);
-
- first_request = request;
- current_size = 0;
-
- next_offset_expected = request->stub->args.writev.off
- + request->write_size;
- lk_owner = request->lk_owner;
- }
+void
+wb_fulfill_head (wb_inode_t *wb_inode, wb_request_t *head)
+{
+ struct iovec vector[MAX_VECTOR_COUNT];
+ int count = 0;
+ wb_request_t *req = NULL;
+ call_frame_t *frame = NULL;
- count += request->stub->args.writev.count;
- bytecount = VECTORSIZE (request->stub->args.writev.count);
- memcpy (((char *)vector)+copied,
- request->stub->args.writev.vector,
- bytecount);
- copied += bytecount;
+ frame = create_frame (wb_inode->this, wb_inode->this->ctx->pool);
+ if (!frame)
+ goto enomem;
- current_size += request->write_size;
+ WB_IOV_LOAD (vector, count, head, head);
- if (request->stub->args.writev.iobref) {
- iobref_merge (iobref,
- request->stub->args.writev.iobref);
- }
+ list_for_each_entry (req, &head->winds, winds) {
+ WB_IOV_LOAD (vector, count, req, head);
- next = NULL;
- if (request->winds.next != winds) {
- next = list_entry (request->winds.next,
- wb_request_t, winds);
- }
+ iobref_merge (head->stub->args.writev.iobref,
+ req->stub->args.writev.iobref);
+ }
- list_del_init (&request->winds);
- list_add_tail (&request->winds, &local->winds);
-
- if ((!next)
- || ((count + next->stub->args.writev.count)
- > MAX_VECTOR_COUNT)
- || ((current_size + next->write_size)
- > conf->aggregate_size)
- || (next_offset_expected != next->stub->args.writev.off)
- || (!is_same_lkowner (&lk_owner, &next->lk_owner))
- || (request->stub->args.writev.fd
- != next->stub->args.writev.fd)) {
-
- sync_frame = copy_frame (frame);
- if (sync_frame == NULL) {
- bytes = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- frame->root->lk_owner = lk_owner;
-
- local->wb_inode = wb_inode;
- sync_frame->local = local;
-
- local->fd = fd = fd_ref (request->stub->args.writev.fd);
-
- bytes += current_size;
- STACK_WIND (sync_frame, wb_sync_cbk,
- FIRST_CHILD(sync_frame->this),
- FIRST_CHILD(sync_frame->this)->fops->writev,
- fd, vector, count,
- first_request->stub->args.writev.off,
- first_request->stub->args.writev.flags,
- iobref, NULL);
+ frame->root->lk_owner = head->lk_owner;
+ frame->local = head;
- iobref_unref (iobref);
- GF_FREE (vector);
- first_request = NULL;
- iobref = NULL;
- vector = NULL;
- sync_frame = NULL;
- local = NULL;
- copied = count = 0;
- }
- }
+ LOCK (&wb_inode->lock);
+ {
+ wb_inode->transit += head->total_size;
+ }
+ UNLOCK (&wb_inode->lock);
-out:
- if (sync_frame != NULL) {
- sync_frame->local = NULL;
- STACK_DESTROY (sync_frame->root);
- }
+ STACK_WIND (frame, wb_fulfill_cbk, FIRST_CHILD (frame->this),
+ FIRST_CHILD (frame->this)->fops->writev,
+ head->fd, vector, count,
+ head->stub->args.writev.off,
+ head->stub->args.writev.flags,
+ head->stub->args.writev.iobref, NULL);
- if (local != NULL) {
- /* had we winded these requests, we would have unrefed
- * in wb_sync_cbk.
- */
- list_for_each_entry_safe (request, dummy, &local->winds,
- winds) {
- wb_request_unref (request);
- }
+ return;
+enomem:
+ wb_inode_err (wb_inode, ENOMEM);
- mem_put (local);
- local = NULL;
- }
+ wb_head_done (head);
- if (iobref != NULL) {
- iobref_unref (iobref);
- }
+ return;
+}
- GF_FREE (vector);
-
- if (bytes == -1) {
- /*
- * had we winded these requests, we would have unrefed
- * in wb_sync_cbk.
- */
- if (local) {
- list_for_each_entry_safe (request, dummy, &local->winds,
- winds) {
- wb_request_unref (request);
- }
- }
- if (wb_inode != NULL) {
- LOCK (&wb_inode->lock);
- {
- wb_inode->op_ret = -1;
- wb_inode->op_errno = op_errno;
- }
- UNLOCK (&wb_inode->lock);
- }
- }
-
- return bytes;
-}
+#define NEXT_HEAD(head, req) do { \
+ if (head) \
+ wb_fulfill_head (wb_inode, head); \
+ head = req; \
+ expected_offset = req->stub->args.writev.off + \
+ req->write_size; \
+ curr_aggregate = 0; \
+ vector_count = 0; \
+ } while (0)
-int32_t
-wb_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *buf, dict_t *xdata)
+void
+wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities)
{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- call_frame_t *process_frame = NULL;
- wb_inode_t *wb_inode = NULL;
- int32_t ret = -1;
-
- GF_ASSERT (frame);
- GF_ASSERT (this);
+ wb_request_t *req = NULL;
+ wb_request_t *head = NULL;
+ wb_request_t *tmp = NULL;
+ wb_conf_t *conf = NULL;
+ off_t expected_offset = 0;
+ size_t curr_aggregate = 0;
+ size_t vector_count = 0;
- local = frame->local;
- wb_inode = local->wb_inode;
+ conf = wb_inode->this->private;
- request = local->request;
- if (request) {
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
+ list_for_each_entry_safe (req, tmp, liabilities, winds) {
+ list_del_init (&req->winds);
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+ if (!head) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
- if (request != NULL) {
- wb_request_unref (request);
- }
+ if (req->fd != head->fd) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
- if (process_frame != NULL) {
- ret = wb_process_queue (process_frame, wb_inode);
- if (ret == -1) {
- if ((errno == ENOMEM) && (wb_inode != NULL)) {
- LOCK (&wb_inode->lock);
- {
- wb_inode->op_ret = -1;
- wb_inode->op_errno = ENOMEM;
- }
- UNLOCK (&wb_inode->lock);
- }
-
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
+ if (!is_same_lkowner (&req->lk_owner, &head->lk_owner)) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
- STACK_DESTROY (process_frame->root);
- }
+ if (expected_offset != req->stub->args.writev.off) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
- return 0;
-}
+ if ((curr_aggregate + req->write_size) > conf->aggregate_size) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
+ if (vector_count + req->stub->args.writev.count >
+ MAX_VECTOR_COUNT) {
+ NEXT_HEAD (head, req);
+ continue;
+ }
-static int32_t
-wb_stat_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- GF_ASSERT (frame);
- GF_ASSERT (this);
+ list_add_tail (&req->winds, &head->winds);
+ curr_aggregate += req->write_size;
+ vector_count += req->stub->args.writev.count;
+ }
- STACK_WIND (frame, wb_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
- return 0;
+ if (head)
+ wb_fulfill_head (wb_inode, head);
+ return;
}
-int32_t
-wb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+void
+wb_do_unwinds (wb_inode_t *wb_inode, list_head_t *lies)
{
- wb_inode_t *wb_inode = NULL;
- wb_local_t *local = NULL;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1, op_errno = EINVAL;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind);
-
- if (loc->inode) {
- wb_inode = wb_inode_ctx_get (this, loc->inode);
- }
-
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- local->wb_inode = wb_inode;
-
- frame->local = local;
-
- if (wb_inode) {
- stub = fop_stat_stub (frame, wb_stat_helper, loc, xdata);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- request = wb_enqueue (wb_inode, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ call_frame_t *frame = NULL;
+ struct iatt buf = {0, };
- ret = wb_process_queue (frame, wb_inode);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
- } else {
- STACK_WIND (frame, wb_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
- }
+ list_for_each_entry_safe (req, tmp, lies, unwinds) {
+ frame = req->stub->frame;
- return 0;
-unwind:
- STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL, NULL);
+ STACK_UNWIND_STRICT (writev, frame, req->op_ret, req->op_errno,
+ &buf, &buf, NULL); /* :O */
+ req->stub->frame = NULL;
- if (stub) {
- call_stub_destroy (stub);
+ list_del_init (&req->unwinds);
+ wb_request_unref (req);
}
- return 0;
+ return;
}
-int32_t
-wb_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *buf, dict_t *xdata)
+void
+__wb_pick_unwinds (wb_inode_t *wb_inode, list_head_t *lies)
{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- wb_inode_t *wb_inode = NULL;
- int32_t ret = -1;
-
- GF_ASSERT (frame);
-
- local = frame->local;
- wb_inode = local->wb_inode;
-
- request = local->request;
- if ((wb_inode != NULL) && (request != NULL)) {
- wb_request_unref (request);
- ret = wb_process_queue (frame, wb_inode);
- if (ret == -1) {
- if (errno == ENOMEM) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
-
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
- }
-
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
-
- return 0;
-}
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ list_for_each_entry_safe (req, tmp, &wb_inode->temptation, lie) {
+ if (!req->ordering.fulfilled &&
+ wb_inode->window_current > wb_inode->window_conf)
+ continue;
-int32_t
-wb_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- GF_ASSERT (frame);
- GF_ASSERT (this);
-
- STACK_WIND (frame, wb_fstat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
- return 0;
-}
+ list_del_init (&req->lie);
+ list_move_tail (&req->unwinds, lies);
+ wb_inode->window_current += req->orig_size;
-int32_t
-wb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- wb_inode_t *wb_inode = NULL;
- wb_local_t *local = NULL;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
- int op_errno = EINVAL;
+ if (!req->ordering.fulfilled) {
+ /* burden increased */
+ list_add_tail (&req->lie, &wb_inode->liability);
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
+ req->ordering.lied = 1;
- wb_inode = wb_inode_ctx_get (this, fd->inode);
- if ((!IA_ISDIR (fd->inode->ia_type)) && (wb_inode == NULL)) {
- gf_log (this->name, GF_LOG_WARNING,
- "wb_inode not found for fd %p", fd);
- op_errno = EBADFD;
- goto unwind;
- }
+ wb_inode->gen++;
+ }
+ }
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ return;
+}
- local->wb_inode = wb_inode;
- frame->local = local;
+int
+__wb_collapse_small_writes (wb_request_t *holder, wb_request_t *req)
+{
+ char *ptr = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ int ret = -1;
- if (wb_inode) {
- stub = fop_fstat_stub (frame, wb_fstat_helper, fd, xdata);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
+ if (!holder->iobref) {
+ /* TODO: check the required size */
+ iobuf = iobuf_get (req->wb_inode->this->ctx->iobuf_pool);
+ if (iobuf == NULL) {
+ goto out;
}
- request = wb_enqueue (wb_inode, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
+ iobref = iobref_new ();
+ if (iobref == NULL) {
+ iobuf_unref (iobuf);
+ goto out;
}
- /*
- FIXME:should the request queue be emptied in case of error?
- */
- ret = wb_process_queue (frame, wb_inode);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
+ ret = iobref_add (iobref, iobuf);
+ if (ret != 0) {
+ iobuf_unref (iobuf);
+ iobref_unref (iobref);
+ gf_log (req->wb_inode->this->name, GF_LOG_WARNING,
+ "cannot add iobuf (%p) into iobref (%p)",
+ iobuf, iobref);
+ goto out;
}
- } else {
- STACK_WIND (frame, wb_fstat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
- }
-
- return 0;
-
-unwind:
- STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL, NULL);
-
- if (stub) {
- call_stub_destroy (stub);
- }
- return 0;
-}
-
-
-int32_t
-wb_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- wb_inode_t *wb_inode = NULL;
- call_frame_t *process_frame = NULL;
- int32_t ret = -1;
+ iov_unload (iobuf->ptr, holder->stub->args.writev.vector,
+ holder->stub->args.writev.count);
+ holder->stub->args.writev.vector[0].iov_base = iobuf->ptr;
+ holder->stub->args.writev.count = 1;
- GF_ASSERT (frame);
+ iobref_unref (holder->stub->args.writev.iobref);
+ holder->stub->args.writev.iobref = iobref;
- local = frame->local;
- wb_inode = local->wb_inode;
- request = local->request;
+ iobuf_unref (iobuf);
- if ((request != NULL) && (wb_inode != NULL)) {
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
+ holder->iobref = iobref_ref (iobref);
}
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
-
- if (request) {
- wb_request_unref (request);
- }
+ ptr = holder->stub->args.writev.vector[0].iov_base + holder->write_size;
- if (process_frame != NULL) {
- ret = wb_process_queue (process_frame, wb_inode);
- if (ret == -1) {
- if ((errno == ENOMEM) && (wb_inode != NULL)) {
- LOCK (&wb_inode->lock);
- {
- wb_inode->op_ret = -1;
- wb_inode->op_errno = ENOMEM;
- }
- UNLOCK (&wb_inode->lock);
- }
-
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
+ iov_unload (ptr, req->stub->args.writev.vector,
+ req->stub->args.writev.count);
- STACK_DESTROY (process_frame->root);
- }
+ holder->stub->args.writev.vector[0].iov_len += req->write_size;
+ holder->write_size += req->write_size;
+ holder->ordering.size += req->write_size;
- return 0;
+ ret = 0;
+out:
+ return ret;
}
-static int32_t
-wb_truncate_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset, dict_t *xdata)
+void
+__wb_preprocess_winds (wb_inode_t *wb_inode)
{
- GF_ASSERT (frame);
- GF_ASSERT (this);
-
- STACK_WIND (frame, wb_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ off_t offset_expected = 0;
+ size_t space_left = 0;
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
+ wb_request_t *holder = NULL;
+ wb_conf_t *conf = NULL;
+ int ret = 0;
+ size_t page_size = 0;
+
+ /* With asynchronous IO from a VM guest (as a file), there
+ can be two sequential writes happening in two regions
+ of the file. But individual (broken down) IO requests
+ can arrive interleaved.
+
+ TODO: cycle for each such sequence sifting
+ through the interleaved ops
+ */
+
+ page_size = wb_inode->this->ctx->page_size;
+ conf = wb_inode->this->private;
+
+ list_for_each_entry_safe (req, tmp, &wb_inode->todo, todo) {
+ if (!req->ordering.tempted) {
+ if (holder) {
+ if (wb_requests_conflict (holder, req))
+ /* do not hold on write if a
+ dependent write is in queue */
+ holder->ordering.go = 1;
+ }
+ /* collapse only non-sync writes */
+ continue;
+ } else if (!holder) {
+ /* holder is always a non-sync write */
+ holder = req;
+ continue;
+ }
- return 0;
-}
+ offset_expected = holder->stub->args.writev.off
+ + holder->write_size;
+ if (req->stub->args.writev.off != offset_expected) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
-int32_t
-wb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
-{
- wb_inode_t *wb_inode = NULL;
- wb_local_t *local = NULL;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1, op_errno = EINVAL;
+ if (!is_same_lkowner (&req->lk_owner, &holder->lk_owner)) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind);
+ space_left = page_size - holder->write_size;
- if (loc->inode) {
- wb_inode = wb_inode_ctx_get (this, loc->inode);
- }
+ if (space_left < req->write_size) {
+ holder->ordering.go = 1;
+ holder = req;
+ continue;
+ }
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ ret = __wb_collapse_small_writes (holder, req);
+ if (ret)
+ continue;
- local->wb_inode = wb_inode;
+ /* collapsed request is as good as wound
+ (from its p.o.v)
+ */
+ list_del_init (&req->todo);
+ __wb_fulfill_request (req);
- frame->local = local;
- if (wb_inode) {
- stub = fop_truncate_stub (frame, wb_truncate_helper, loc,
- offset, xdata);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ /* Only the last @holder in queue which
- request = wb_enqueue (wb_inode, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ - does not have any non-buffered-writes following it
+ - has not yet filled its capacity
- ret = wb_process_queue (frame, wb_inode);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
- } else {
- STACK_WIND (frame, wb_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset,
- xdata);
+ does not get its 'go' set, in anticipation of the arrival
+ of consecutive smaller writes.
+ */
}
- return 0;
-
-unwind:
- STACK_UNWIND_STRICT (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ /* but if trickling writes are enabled, then do not hold back
+ writes if there are no outstanding requests
+ */
- if (stub) {
- call_stub_destroy (stub);
- }
+ if (conf->trickling_writes && !wb_inode->transit && holder)
+ holder->ordering.go = 1;
- return 0;
+ return;
}
-int32_t
-wb_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+void
+__wb_pick_winds (wb_inode_t *wb_inode, list_head_t *tasks,
+ list_head_t *liabilities)
{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- wb_inode_t *wb_inode = NULL;
- int32_t ret = -1;
-
- GF_ASSERT (frame);
-
- local = frame->local;
- wb_inode = local->wb_inode;
- request = local->request;
-
- if ((request != NULL) && (wb_inode != NULL)) {
- wb_request_unref (request);
- ret = wb_process_queue (frame, wb_inode);
- if (ret == -1) {
- if (errno == ENOMEM) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
-
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
- }
-
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
- return 0;
-}
+ list_for_each_entry_safe (req, tmp, &wb_inode->todo, todo) {
+ if (wb_liability_has_conflict (wb_inode, req))
+ continue;
+ if (req->ordering.tempted && !req->ordering.go)
+ /* wait some more */
+ continue;
-static int32_t
-wb_ftruncate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset, dict_t *xdata)
-{
- GF_ASSERT (frame);
- GF_ASSERT (this);
+ list_del_init (&req->todo);
- STACK_WIND (frame, wb_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
- return 0;
+ if (req->ordering.tempted)
+ list_add_tail (&req->winds, liabilities);
+ else
+ list_add_tail (&req->winds, tasks);
+ }
}
-int32_t
-wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
+void
+wb_do_winds (wb_inode_t *wb_inode, list_head_t *tasks)
{
- wb_inode_t *wb_inode = NULL;
- wb_local_t *local = NULL;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
- int op_errno = EINVAL;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
-
- wb_inode = wb_inode_ctx_get (this, fd->inode);
- if ((!IA_ISDIR (fd->inode->ia_type)) && (wb_inode == NULL)) {
- gf_log (this->name, GF_LOG_WARNING,
- "wb_inode not found for fd %p", fd);
- op_errno = EBADFD;
- goto unwind;
- }
-
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- local->wb_inode = wb_inode;
-
- frame->local = local;
-
- if (wb_inode) {
- stub = fop_ftruncate_stub (frame, wb_ftruncate_helper, fd,
- offset, xdata);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- request = wb_enqueue (wb_inode, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- ret = wb_process_queue (frame, wb_inode);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
- } else {
- STACK_WIND (frame, wb_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
- }
-
- return 0;
+ wb_request_t *req = NULL;
+ wb_request_t *tmp = NULL;
-unwind:
- STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ list_for_each_entry_safe (req, tmp, tasks, winds) {
+ list_del_init (&req->winds);
- if (stub) {
- call_stub_destroy (stub);
- }
+ call_resume (req->stub);
- return 0;
+ wb_request_unref (req);
+ }
}
-int32_t
-wb_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+void
+wb_process_queue (wb_inode_t *wb_inode)
{
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- call_frame_t *process_frame = NULL;
- wb_inode_t *wb_inode = NULL;
- int32_t ret = -1;
+ list_head_t tasks = {0, };
+ list_head_t lies = {0, };
+ list_head_t liabilities = {0, };
- GF_ASSERT (frame);
+ INIT_LIST_HEAD (&tasks);
+ INIT_LIST_HEAD (&lies);
+ INIT_LIST_HEAD (&liabilities);
- local = frame->local;
- wb_inode = local->wb_inode;
- request = local->request;
+ LOCK (&wb_inode->lock);
+ {
+ __wb_preprocess_winds (wb_inode);
- if (request) {
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
+ __wb_pick_winds (wb_inode, &tasks, &liabilities);
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre,
- statpost, xdata);
+ __wb_pick_unwinds (wb_inode, &lies);
- if (request) {
- wb_request_unref (request);
}
+ UNLOCK (&wb_inode->lock);
- if (request && (process_frame != NULL)) {
- ret = wb_process_queue (process_frame, wb_inode);
- if (ret == -1) {
- if ((errno == ENOMEM) && (wb_inode != NULL)) {
- LOCK (&wb_inode->lock);
- {
- wb_inode->op_ret = -1;
- wb_inode->op_errno = ENOMEM;
- }
- UNLOCK (&wb_inode->lock);
- }
-
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
+ wb_do_unwinds (wb_inode, &lies);
- STACK_DESTROY (process_frame->root);
- }
+ wb_do_winds (wb_inode, &tasks);
- return 0;
+ wb_fulfill (wb_inode, &liabilities);
+
+ return;
}
-static int32_t
-wb_setattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+int
+wb_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- GF_ASSERT (frame);
- GF_ASSERT (this);
-
- STACK_WIND (frame, wb_setattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
- return 0;
+ STACK_WIND (frame, default_writev_cbk,
+ FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
+ fd, vector, count, offset, flags, iobref, xdata);
+ return 0;
}
-int32_t
-wb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+int
+wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- wb_inode_t *wb_inode = NULL;
- wb_local_t *local = NULL;
- call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1, op_errno = EINVAL;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind);
+ wb_inode_t *wb_inode = NULL;
+ wb_conf_t *conf = NULL;
+ gf_boolean_t wb_disabled = 0;
+ call_stub_t *stub = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+ int o_direct = O_DIRECT;
+
+ conf = this->private;
+ wb_inode = wb_inode_create (this, fd->inode);
+ if (!wb_inode) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (!conf->strict_O_DIRECT)
+ o_direct = 0;
+
+ if (fd->flags & (O_SYNC|O_DSYNC|o_direct))
+ wb_disabled = 1;
+
+ if (flags & (O_SYNC|O_DSYNC|O_DIRECT))
+ /* O_DIRECT flag in params of writev must _always_ be honored */
+ wb_disabled = 1;
+
+ op_errno = 0;
+ LOCK (&wb_inode->lock);
+ {
+ /* pick up a previous error in fulfillment */
+ if (wb_inode->op_ret < 0)
+ op_errno = wb_inode->op_errno;
+
+ wb_inode->op_ret = 0;
+ }
+ UNLOCK (&wb_inode->lock);
+
+ if (op_errno)
+ goto unwind;
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
+ if (wb_disabled)
+ stub = fop_writev_stub (frame, wb_writev_helper, fd, vector,
+ count, offset, flags, iobref, xdata);
+ else
+ stub = fop_writev_stub (frame, NULL, fd, vector, count, offset,
+ flags, iobref, xdata);
+ if (!stub) {
op_errno = ENOMEM;
goto unwind;
}
- frame->local = local;
-
- if (loc->inode) {
- wb_inode = wb_inode_ctx_get (this, loc->inode);
- }
+ if (wb_disabled)
+ ret = wb_enqueue (wb_inode, stub);
+ else
+ ret = wb_enqueue_tempted (wb_inode, stub);
- local->wb_inode = wb_inode;
+ if (!ret) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- if (wb_inode) {
- stub = fop_setattr_stub (frame, wb_setattr_helper, loc, stbuf,
- valid, xdata);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- request = wb_enqueue (wb_inode, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- ret = wb_process_queue (frame, wb_inode);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
- } else {
- STACK_WIND (frame, wb_setattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr, loc, stbuf,
- valid, xdata);
- }
+ wb_process_queue (wb_inode);
return 0;
+
unwind:
- STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, NULL);
- if (stub) {
+ if (stub)
call_stub_destroy (stub);
- }
return 0;
}
-void
-wb_disable_all (xlator_t *this, fd_t *origfd)
-{
- inode_t *inode = NULL;
- fd_t *otherfd = NULL;
- wb_file_t *wb_file = NULL;
-
- inode = origfd->inode;
-
- LOCK(&inode->lock);
- {
- list_for_each_entry (otherfd, &inode->fd_list, inode_list) {
- if (otherfd == origfd) {
- continue;
- }
-
- wb_file = wb_fd_ctx_get (this, otherfd);
- if (wb_file == NULL) {
- continue;
- }
-
- gf_log(this->name,GF_LOG_DEBUG,
- "disabling wb on %p because %p is O_SYNC",
- otherfd, origfd);
- wb_file->disabled = 1;
- }
- }
- UNLOCK(&inode->lock);
-}
-int32_t
-wb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd, dict_t *xdata)
+int
+wb_readv_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- int32_t flags = 0;
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, out, op_errno,
- EINVAL);
- local = frame->local;
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno,
- EINVAL);
-
- flags = local->flags;
-
- if (op_ret != -1) {
- file = wb_file_create (this, fd, flags);
- if (file == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
- }
-
-out:
- frame->local = NULL;
- if (local != NULL) {
- mem_put (local);
- }
-
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
return 0;
}
-int32_t
-wb_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
+int
+wb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- wb_local_t *local = NULL;
- int32_t op_errno = EINVAL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- local->flags = flags;
+ stub = fop_readv_stub (frame, wb_readv_helper, fd, size,
+ offset, flags, xdata);
+ if (!stub)
+ goto unwind;
- frame->local = local;
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
+
+ wb_process_queue (wb_inode);
- STACK_WIND (frame, wb_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
return 0;
unwind:
- STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL, NULL);
+ STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL,
+ NULL);
return 0;
-}
-
-
-int32_t
-wb_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- long flags = 0;
- wb_inode_t *wb_inode = NULL;
- wb_file_t *file = NULL;
- wb_local_t *local = NULL;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, out,
- op_errno, EINVAL);
-
- if (op_ret != -1) {
- if (frame->local) {
- flags = (long) frame->local;
- }
-
- file = wb_file_create (this, fd, flags);
- if (file == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- LOCK (&inode->lock);
- {
- wb_inode = __wb_inode_create (this, inode);
- if (wb_inode == NULL) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- }
- UNLOCK (&inode->lock);
- }
-
- frame->local = NULL;
-
-out:
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata);
-
- if (local != NULL) {
- mem_put (local);
- }
+noqueue:
+ STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
return 0;
}
-int32_t
-wb_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+int
+wb_flush_bg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- int32_t op_errno = EINVAL;
- wb_local_t *local = NULL;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind);
- GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind);
- GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind);
-
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- local->flags = flags;
-
- frame->local = local;
-
- STACK_WIND (frame, wb_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, umask, fd, xdata);
- return 0;
-
-unwind:
- STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL, NULL);
+ STACK_DESTROY (frame->root);
return 0;
}
-/* Mark all the contiguous write requests for winding starting from head of
- * request list. Stops marking at the first non-write request found. If
- * file is opened with O_APPEND, make sure all the writes marked for winding
- * will fit into a single write call to server.
- */
-size_t
-__wb_mark_wind_all (wb_inode_t *wb_inode, list_head_t *list, list_head_t *winds)
+int
+wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- wb_request_t *request = NULL, *prev_request = NULL;
- wb_file_t *wb_file = NULL, *prev_wb_file = NULL;
- wb_file_t *last_wb_file = NULL;
- size_t size = 0;
- char first_request = 1, overlap = 0;
- wb_conf_t *conf = NULL;
- int count = 0;
- enum _gf_boolean dont_wind_set = 0;
-
- GF_VALIDATE_OR_GOTO ("write-behind", wb_inode, out);
- GF_VALIDATE_OR_GOTO (wb_inode->this->name, list, out);
- GF_VALIDATE_OR_GOTO (wb_inode->this->name, winds, out);
-
- conf = wb_inode->this->private;
-
- list_for_each_entry (request, list, list)
- {
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)) {
- break;
- }
-
- wb_file = wb_fd_ctx_get (wb_inode->this,
- request->stub->args.writev.fd);
- if (wb_file == NULL) {
- gf_log (wb_inode->this->name, GF_LOG_WARNING,
- "write behind wb_file pointer is"
- " not stored in context of fd(%p)",
- request->stub->args.writev.fd);
- goto out;
- }
-
- /* If write requests from two fds are interleaved, for
- * each of them, we can only send first set of adjacent
- * requests that are on same fd. This is because, fds
- * with O_APPEND cannot have more than one write fop in
- * progress while syncing, so that order is not messed
- * up. Since we group adjacent requests with same fd into
- * single write call whenever possible, we need the above said
- * measure.
- */
- if ((prev_wb_file != NULL) && (prev_wb_file->flags & O_APPEND)
- && (prev_request->stub->args.writev.fd
- != request->stub->args.writev.fd)
- && (!prev_wb_file->dont_wind)) {
- prev_wb_file->dont_wind = 1;
- dont_wind_set = 1;
- last_wb_file = prev_wb_file;
- }
-
- prev_request = request;
- prev_wb_file = wb_file;
-
- if (!request->flags.write_request.stack_wound) {
- if (first_request) {
- first_request = 0;
- } else {
- overlap = wb_overlap (list, request);
- if (overlap) {
- continue;
- }
- }
-
- if ((wb_file->flags & O_APPEND)
- && (((size + request->write_size)
- > conf->aggregate_size)
- || ((count + request->stub->args.writev.count)
- > MAX_VECTOR_COUNT)
- || (wb_file->dont_wind))) {
- continue;
- }
-
- size += request->write_size;
-
- wb_inode->aggregate_current -= request->write_size;
-
- count += request->stub->args.writev.count;
-
- request->flags.write_request.stack_wound = 1;
- list_add_tail (&request->winds, winds);
- }
- }
-
-out:
- if (wb_inode != NULL) {
- wb_inode->aggregate_current -= size;
- }
-
- if (dont_wind_set && (list != NULL)) {
- list_for_each_entry (request, list, list) {
- wb_file = wb_fd_ctx_get (wb_inode->this,
- request->stub->args.writev.fd);
- if (wb_file != NULL) {
- wb_file->dont_wind = 0;
- }
-
- if (wb_file == last_wb_file) {
- break;
- }
- }
- }
-
- return size;
-}
-
+ wb_conf_t *conf = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_frame_t *bg_frame = NULL;
+ int op_errno = 0;
+ int op_ret = 0;
-int32_t
-__wb_can_wind (list_head_t *list, char *other_fop_in_queue,
- char *overlapping_writes, char *incomplete_writes,
- char *wind_all)
-{
- wb_request_t *request = NULL;
- char first_request = 1;
- int32_t ret = -1;
- char overlap = 0;
+ conf = this->private;
- GF_VALIDATE_OR_GOTO ("write-behind", list, out);
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unwind;
+ }
- list_for_each_entry (request, list, list)
+ LOCK (&wb_inode->lock);
{
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)) {
- if (request->stub && other_fop_in_queue) {
- *other_fop_in_queue = 1;
- }
- break;
- }
-
- if (request->flags.write_request.stack_wound
- && !request->flags.write_request.got_reply
- && (incomplete_writes != NULL)) {
- *incomplete_writes = 1;
- break;
- }
+ if (wb_inode->op_ret < 0) {
+ op_ret = -1;
+ op_errno = wb_inode->op_errno;
+ }
- if (!request->flags.write_request.stack_wound) {
- if (first_request) {
- char flush = 0;
- first_request = 0;
-
- flush = request->flags.write_request.flush_all;
- if (wind_all != NULL) {
- *wind_all = flush;
- }
- }
-
- overlap = wb_overlap (list, request);
- if (overlap) {
- if (overlapping_writes != NULL) {
- *overlapping_writes = 1;
- }
-
- break;
- }
- }
+ wb_inode->op_ret = 0;
}
+ UNLOCK (&wb_inode->lock);
- ret = 0;
-out:
- return ret;
-}
+ if (op_errno)
+ goto unwind;
+ if (conf->flush_behind)
+ goto flushbehind;
-ssize_t
-__wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf,
- char enable_trickling_writes)
-{
- size_t size = 0;
- char other_fop_in_queue = 0;
- char incomplete_writes = 0;
- char overlapping_writes = 0;
- wb_request_t *request = NULL;
- wb_inode_t *wb_inode = NULL;
- char wind_all = 0;
- int32_t ret = 0;
-
- GF_VALIDATE_OR_GOTO ("write-behind", list, out);
- GF_VALIDATE_OR_GOTO ("write-behind", winds, out);
-
- if (list_empty (list)) {
- goto out;
- }
+ STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
- request = list_entry (list->next, typeof (*request), list);
- wb_inode = request->wb_inode;
+flushbehind:
+ bg_frame = copy_frame (frame);
+ if (!bg_frame) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- ret = __wb_can_wind (list, &other_fop_in_queue,
- &overlapping_writes, &incomplete_writes,
- &wind_all);
- if (ret == -1) {
- gf_log (wb_inode->this->name, GF_LOG_WARNING,
- "cannot decide whether to wind or not");
- goto out;
- }
-
- if (!incomplete_writes && ((enable_trickling_writes)
- || (wind_all) || (overlapping_writes)
- || (other_fop_in_queue)
- || (wb_inode->aggregate_current
- >= aggregate_conf))) {
- size = __wb_mark_wind_all (wb_inode, list, winds);
- }
+ STACK_WIND (bg_frame, wb_flush_bg_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ /* fall through */
+unwind:
+ STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL);
-out:
- return size;
+ return 0;
}
-size_t
-__wb_mark_unwind_till (list_head_t *list, list_head_t *unwinds, size_t size)
+int
+wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- size_t written_behind = 0;
- wb_request_t *request = NULL;
- wb_inode_t *wb_inode = NULL;
-
- if (list_empty (list)) {
- goto out;
- }
-
- request = list_entry (list->next, typeof (*request), list);
- wb_inode = request->wb_inode;
-
- list_for_each_entry (request, list, list)
- {
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)) {
- continue;
- }
-
- if (written_behind <= size) {
- if (!request->flags.write_request.write_behind) {
- written_behind += request->write_size;
- request->flags.write_request.write_behind = 1;
- list_add_tail (&request->unwinds, unwinds);
-
- if (!request->flags.write_request.got_reply) {
- wb_inode->window_current
- += request->write_size;
- }
- }
- } else {
- break;
- }
- }
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
-out:
- return written_behind;
-}
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
+ stub = fop_flush_stub (frame, wb_flush_helper, fd, xdata);
+ if (!stub)
+ goto unwind;
-void
-__wb_mark_unwinds (list_head_t *list, list_head_t *unwinds)
-{
- wb_request_t *request = NULL;
- wb_inode_t *wb_inode = NULL;
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
- GF_VALIDATE_OR_GOTO ("write-behind", list, out);
- GF_VALIDATE_OR_GOTO ("write-behind", unwinds, out);
+ wb_process_queue (wb_inode);
- if (list_empty (list)) {
- goto out;
- }
+ return 0;
- request = list_entry (list->next, typeof (*request), list);
- wb_inode = request->wb_inode;
+unwind:
+ STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL);
- if (wb_inode->window_current <= wb_inode->window_conf) {
- __wb_mark_unwind_till (list, unwinds,
- wb_inode->window_conf
- - wb_inode->window_current);
- }
+ return 0;
-out:
- return;
+noqueue:
+ STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
}
-uint32_t
-__wb_get_other_requests (list_head_t *list, list_head_t *other_requests)
-{
- wb_request_t *request = NULL;
- uint32_t count = 0;
-
- GF_VALIDATE_OR_GOTO ("write-behind", list, out);
- GF_VALIDATE_OR_GOTO ("write-behind", other_requests, out);
-
- list_for_each_entry (request, list, list) {
- if ((request->stub == NULL)
- || (request->stub->fop == GF_FOP_WRITE)) {
- break;
- }
-
- if (!request->flags.other_requests.marked_for_resume) {
- request->flags.other_requests.marked_for_resume = 1;
- list_add_tail (&request->other_requests,
- other_requests);
- count++;
- }
- }
-out:
- return count;
-}
-
-
-int32_t
-wb_stack_unwind (list_head_t *unwinds)
+int
+wb_fsync_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
{
- struct iatt buf = {0,};
- wb_request_t *request = NULL, *dummy = NULL;
- call_frame_t *frame = NULL;
- wb_local_t *local = NULL;
- int ret = 0, write_requests_removed = 0;
-
- GF_VALIDATE_OR_GOTO ("write-behind", unwinds, out);
-
- list_for_each_entry_safe (request, dummy, unwinds, unwinds) {
- frame = request->stub->frame;
- local = frame->local;
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &buf, &buf, NULL, NULL);
-
- ret = wb_request_unref (request);
- if (ret == 0) {
- write_requests_removed++;
- }
- }
-
-out:
- return write_requests_removed;
+ STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
}
-int32_t
-wb_resume_other_requests (call_frame_t *frame, wb_inode_t *wb_inode,
- list_head_t *other_requests)
+int
+wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- int32_t ret = -1;
- wb_request_t *request = NULL, *dummy = NULL;
- int32_t fops_removed = 0;
- char wind = 0;
+ wb_inode_t *wb_inode = NULL;
call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ((wb_inode ? wb_inode->this->name : "write-behind"),
- frame, out);
- GF_VALIDATE_OR_GOTO (frame->this->name, wb_inode, out);
- GF_VALIDATE_OR_GOTO (frame->this->name, other_requests, out);
-
- if (list_empty (other_requests)) {
- ret = 0;
- goto out;
- }
-
- list_for_each_entry_safe (request, dummy, other_requests,
- other_requests) {
- wind = request->stub->wind;
- stub = request->stub;
-
- LOCK (&wb_inode->lock);
- {
- request->stub = NULL;
- }
- UNLOCK (&wb_inode->lock);
-
- if (!wind) {
- wb_request_unref (request);
- fops_removed++;
- }
-
- call_resume (stub);
- }
-
- ret = 0;
-
- if (fops_removed > 0) {
- ret = wb_process_queue (frame, wb_inode);
- if (ret == -1) {
- gf_log (frame->this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
- }
-
-out:
- return ret;
-}
-
-
-int32_t
-wb_do_ops (call_frame_t *frame, wb_inode_t *wb_inode, list_head_t *winds,
- list_head_t *unwinds, list_head_t *other_requests)
-{
- int32_t ret = -1, write_requests_removed = 0;
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- GF_VALIDATE_OR_GOTO ((wb_inode ? wb_inode->this->name : "write-behind"),
- frame, out);
- GF_VALIDATE_OR_GOTO (frame->this->name, wb_inode, out);
+ stub = fop_fsync_stub (frame, wb_fsync_helper, fd, datasync, xdata);
+ if (!stub)
+ goto unwind;
- ret = wb_stack_unwind (unwinds);
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
- write_requests_removed = ret;
+ wb_process_queue (wb_inode);
- ret = wb_sync (frame, wb_inode, winds);
- if (ret == -1) {
- gf_log (frame->this->name, GF_LOG_WARNING,
- "syncing of write requests failed");
- }
+ return 0;
- ret = wb_resume_other_requests (frame, wb_inode, other_requests);
- if (ret == -1) {
- gf_log (frame->this->name, GF_LOG_WARNING,
- "cannot resume non-write requests in request queue");
- }
+unwind:
+ STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
- /* wb_stack_unwind does wb_request_unref after unwinding a write
- * request. Hence if a write-request was just freed in wb_stack_unwind,
- * we have to process request queue once again to unblock requests
- * blocked on the writes just unwound.
- */
- if (write_requests_removed > 0) {
- ret = wb_process_queue (frame, wb_inode);
- if (ret == -1) {
- gf_log (frame->this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
- }
+ return 0;
-out:
- return ret;
+noqueue:
+ STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
}
-inline int
-__wb_copy_into_holder (wb_request_t *holder, wb_request_t *request)
+int
+wb_stat_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- char *ptr = NULL;
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- int ret = -1;
-
- if (holder->flags.write_request.virgin) {
- /* TODO: check the required size */
- iobuf = iobuf_get (request->wb_inode->this->ctx->iobuf_pool);
- if (iobuf == NULL) {
- goto out;
- }
-
- iobref = iobref_new ();
- if (iobref == NULL) {
- iobuf_unref (iobuf);
- goto out;
- }
-
- ret = iobref_add (iobref, iobuf);
- if (ret != 0) {
- iobuf_unref (iobuf);
- iobref_unref (iobref);
- gf_log (request->wb_inode->this->name, GF_LOG_WARNING,
- "cannot add iobuf (%p) into iobref (%p)",
- iobuf, iobref);
- goto out;
- }
-
- iov_unload (iobuf->ptr, holder->stub->args.writev.vector,
- holder->stub->args.writev.count);
- holder->stub->args.writev.vector[0].iov_base = iobuf->ptr;
-
- iobref_unref (holder->stub->args.writev.iobref);
- holder->stub->args.writev.iobref = iobref;
-
- iobuf_unref (iobuf);
-
- holder->flags.write_request.virgin = 0;
- }
-
- ptr = holder->stub->args.writev.vector[0].iov_base + holder->write_size;
-
- iov_unload (ptr, request->stub->args.writev.vector,
- request->stub->args.writev.count);
-
- holder->stub->args.writev.vector[0].iov_len += request->write_size;
- holder->write_size += request->write_size;
-
- request->flags.write_request.stack_wound = 1;
- list_move_tail (&request->list, &request->wb_inode->passive_requests);
-
- ret = 0;
-out:
- return ret;
+ STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
}
-/* this procedure assumes that write requests have only one vector to write */
-void
-__wb_collapse_write_bufs (list_head_t *requests, size_t page_size)
+int
+wb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- off_t offset_expected = 0;
- size_t space_left = 0;
- wb_request_t *request = NULL, *tmp = NULL, *holder = NULL;
- int ret = 0;
-
- GF_VALIDATE_OR_GOTO ("write-behind", requests, out);
-
- list_for_each_entry_safe (request, tmp, requests, list) {
- if ((request->stub == NULL)
- || (request->stub->fop != GF_FOP_WRITE)
- || (request->flags.write_request.stack_wound)) {
- holder = NULL;
- continue;
- }
-
- if (request->flags.write_request.write_behind) {
- if (holder == NULL) {
- holder = request;
- continue;
- }
-
- offset_expected = holder->stub->args.writev.off
- + holder->write_size;
-
- if ((request->stub->args.writev.off != offset_expected)
- || (!is_same_lkowner (&request->lk_owner,
- &holder->lk_owner))
- || (holder->stub->args.writev.fd
- != request->stub->args.writev.fd)) {
- holder = request;
- continue;
- }
-
- space_left = page_size - holder->write_size;
-
- if (space_left >= request->write_size) {
- ret = __wb_copy_into_holder (holder, request);
- if (ret != 0) {
- break;
- }
-
- __wb_request_unref (request);
- } else {
- holder = request;
- }
- } else {
- break;
- }
- }
-
-out:
- return;
-}
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
-int32_t
-wb_process_queue (call_frame_t *frame, wb_inode_t *wb_inode)
-{
- list_head_t winds = {0, }, unwinds = {0, }, other_requests = {0, };
- size_t size = 0;
- wb_conf_t *conf = NULL;
- uint32_t count = 0;
- int32_t ret = -1;
+ wb_inode = wb_inode_ctx_get (this, loc->inode);
+ if (!wb_inode)
+ goto noqueue;
- INIT_LIST_HEAD (&winds);
- INIT_LIST_HEAD (&unwinds);
- INIT_LIST_HEAD (&other_requests);
+ stub = fop_stat_stub (frame, wb_stat_helper, loc, xdata);
+ if (!stub)
+ goto unwind;
- GF_VALIDATE_OR_GOTO ((wb_inode ? wb_inode->this->name : "write-behind"),
- frame, out);
- GF_VALIDATE_OR_GOTO (wb_inode->this->name, frame, out);
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
- conf = wb_inode->this->private;
- GF_VALIDATE_OR_GOTO (wb_inode->this->name, conf, out);
+ wb_process_queue (wb_inode);
- size = conf->aggregate_size;
- LOCK (&wb_inode->lock);
- {
- /*
- * make sure requests are marked for unwinding and adjacent
- * contiguous write buffers (each of size less than that of
- * an iobuf) are packed properly so that iobufs are filled to
- * their maximum capacity, before calling __wb_mark_winds.
- */
- __wb_mark_unwinds (&wb_inode->request, &unwinds);
-
- __wb_collapse_write_bufs (&wb_inode->request,
- wb_inode->this->ctx->page_size);
-
- count = __wb_get_other_requests (&wb_inode->request,
- &other_requests);
-
- if (count == 0) {
- __wb_mark_winds (&wb_inode->request, &winds, size,
- conf->enable_trickling_writes);
- }
+ return 0;
- }
- UNLOCK (&wb_inode->lock);
+unwind:
+ STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM, NULL, NULL);
- ret = wb_do_ops (frame, wb_inode, &winds, &unwinds, &other_requests);
+ if (stub)
+ call_stub_destroy (stub);
+ return 0;
-out:
- return ret;
+noqueue:
+ STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
}
-int32_t
-wb_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+int
+wb_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- GF_ASSERT (frame);
-
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
return 0;
}
-int32_t
-wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
- dict_t *xdata)
+int
+wb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- wb_inode_t *wb_inode = NULL;
- wb_file_t *wb_file = NULL;
- char wb_disabled = 0;
- call_frame_t *process_frame = NULL;
- call_stub_t *stub = NULL;
- wb_local_t *local = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
- size_t size = 0;
- int32_t op_ret = -1, op_errno = EINVAL;
-
- GF_ASSERT (frame);
-
- GF_VALIDATE_OR_GOTO_WITH_ERROR ("write-behind", this, unwind, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, fd, unwind, op_errno,
- EINVAL);
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- if (vector != NULL) {
- size = iov_length (vector, count);
- }
wb_inode = wb_inode_ctx_get (this, fd->inode);
- if ((!IA_ISDIR (fd->inode->ia_type)) && (wb_inode == NULL)) {
- gf_log (this->name, GF_LOG_WARNING,
- "write behind wb_inode pointer is"
- " not stored in context of inode(%p), returning EBADFD",
- fd->inode);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (wb_file != NULL) {
- if (wb_file->disabled || wb_file->disable_till) {
- if (size > wb_file->disable_till) {
- wb_file->disable_till = 0;
- } else {
- wb_file->disable_till -= size;
- }
- wb_disabled = 1;
- }
- } else {
- wb_disabled = 1;
- }
-
- if (wb_inode != NULL) {
- LOCK (&wb_inode->lock);
- {
- op_ret = wb_inode->op_ret;
- op_errno = wb_inode->op_errno;
- }
- UNLOCK (&wb_inode->lock);
- }
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- if (wb_disabled) {
- STACK_WIND (frame, wb_writev_cbk, FIRST_CHILD (frame->this),
- FIRST_CHILD (frame->this)->fops->writev,
- fd, vector, count, offset, flags, iobref, xdata);
- return 0;
- }
-
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ if (!wb_inode)
+ goto noqueue;
- frame->local = local;
- local->wb_inode = wb_inode;
+ stub = fop_fstat_stub (frame, wb_fstat_helper, fd, xdata);
+ if (!stub)
+ goto unwind;
- stub = fop_writev_stub (frame, NULL, fd, vector, count, offset, flags,
- iobref, xdata);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
- request = wb_enqueue (wb_inode, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- ret = wb_process_queue (process_frame, wb_inode);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
-
- STACK_DESTROY (process_frame->root);
+ wb_process_queue (wb_inode);
return 0;
unwind:
- local = frame->local;
- frame->local = NULL;
- mem_put (local);
-
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, NULL, NULL);
- if (process_frame) {
- STACK_DESTROY (process_frame->root);
- }
-
- if (stub) {
+ if (stub)
call_stub_destroy (stub);
- }
-
return 0;
-}
-
-
-int32_t
-wb_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- wb_local_t *local = NULL;
- wb_inode_t *wb_inode = NULL;
- wb_request_t *request = NULL;
- int32_t ret = 0;
-
- GF_ASSERT (frame);
-
- local = frame->local;
- wb_inode = local->wb_inode;
- request = local->request;
-
- if ((request != NULL) && (wb_inode != NULL)) {
- wb_request_unref (request);
-
- ret = wb_process_queue (frame, wb_inode);
- if (ret == -1) {
- if (errno == ENOMEM) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
-
- gf_log (frame->this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
- }
-
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref, xdata);
+noqueue:
+ STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
return 0;
}
-static int32_t
-wb_readv_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
+int
+wb_truncate_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
{
- STACK_WIND (frame, wb_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
- xdata);
-
+ STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
return 0;
}
-int32_t
-wb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
+int
+wb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
wb_inode_t *wb_inode = NULL;
- wb_local_t *local = NULL;
- call_stub_t *stub = NULL;
- int32_t ret = -1, op_errno = 0;
- wb_request_t *request = NULL;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, unwind,
- op_errno, EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, fd, unwind, op_errno,
- EINVAL);
-
- wb_inode = wb_inode_ctx_get (this, fd->inode);
- if ((!IA_ISDIR (fd->inode->ia_type)) && (wb_inode == NULL)) {
- gf_log (this->name, GF_LOG_WARNING,
- "write behind wb_inode pointer is"
- " not stored in context of inode(%p), returning "
- "EBADFD", fd->inode);
- op_errno = EBADFD;
- goto unwind;
- }
-
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ call_stub_t *stub = NULL;
- local->wb_inode = wb_inode;
+ wb_inode = wb_inode_create (this, loc->inode);
+ if (!wb_inode)
+ goto unwind;
- frame->local = local;
- if (wb_inode) {
- stub = fop_readv_stub (frame, wb_readv_helper, fd, size,
- offset, flags, xdata);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ stub = fop_truncate_stub (frame, wb_truncate_helper, loc,
+ offset, xdata);
+ if (!stub)
+ goto unwind;
- request = wb_enqueue (wb_inode, stub);
- if (request == NULL) {
- call_stub_destroy (stub);
- op_errno = ENOMEM;
- goto unwind;
- }
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
- ret = wb_process_queue (frame, wb_inode);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
- } else {
- STACK_WIND (frame, wb_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd, size, offset, flags, xdata);
- }
+ wb_process_queue (wb_inode);
return 0;
unwind:
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0, NULL, NULL,
- NULL);
- return 0;
-}
-
-
-int32_t
-wb_ffr_bg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STACK_DESTROY (frame->root);
- return 0;
-}
-
+ STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
-int32_t
-wb_ffr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- wb_local_t *local = NULL;
- wb_inode_t *wb_inode = NULL;
-
- GF_ASSERT (frame);
-
- local = frame->local;
- wb_inode = local->wb_inode;
-
- if (wb_inode != NULL) {
- LOCK (&wb_inode->lock);
- {
- if (wb_inode->op_ret == -1) {
- op_ret = wb_inode->op_ret;
- op_errno = wb_inode->op_errno;
-
- wb_inode->op_ret = 0;
- }
- }
- UNLOCK (&wb_inode->lock);
- }
-
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
+ if (stub)
+ call_stub_destroy (stub);
return 0;
}
-int32_t
-wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+int
+wb_ftruncate_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
{
- wb_conf_t *conf = NULL;
- wb_local_t *local = NULL;
- wb_inode_t *wb_inode = NULL;
- call_frame_t *flush_frame = NULL, *process_frame = NULL;
- int32_t op_ret = -1, op_errno = -1, ret = -1;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, unwind,
- op_errno, EINVAL);
-
- conf = this->private;
-
- local = frame->local;
- wb_inode = local->wb_inode;
-
- LOCK (&wb_inode->lock);
- {
- op_ret = wb_inode->op_ret;
- op_errno = wb_inode->op_errno;
- }
- UNLOCK (&wb_inode->lock);
-
- if (local && local->request) {
- process_frame = copy_frame (frame);
- if (process_frame == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- wb_request_unref (local->request);
- }
-
- if (conf->flush_behind) {
- flush_frame = copy_frame (frame);
- if (flush_frame == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- STACK_WIND (flush_frame, wb_ffr_bg_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd, xdata);
- } else {
- STACK_WIND (frame, wb_ffr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd, xdata);
- }
-
- if (process_frame != NULL) {
- ret = wb_process_queue (process_frame, wb_inode);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
-
- STACK_DESTROY (process_frame->root);
- }
-
- if (conf->flush_behind) {
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, NULL);
- }
-
- return 0;
-
-unwind:
- STACK_UNWIND_STRICT (flush, frame, -1, op_errno, NULL);
+ STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
return 0;
}
-int32_t
-wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+int
+wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- wb_conf_t *conf = NULL;
wb_inode_t *wb_inode = NULL;
- wb_local_t *local = NULL;
call_stub_t *stub = NULL;
- call_frame_t *flush_frame = NULL;
- wb_request_t *request = NULL;
- int32_t ret = 0, op_errno = 0;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, unwind,
- op_errno, EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, fd, unwind, op_errno,
- EINVAL);
-
- conf = this->private;
-
- wb_inode = wb_inode_ctx_get (this, fd->inode);
- if ((!IA_ISDIR (fd->inode->ia_type)) && (wb_inode == NULL)) {
- gf_log (this->name, GF_LOG_WARNING,
- "write behind wb_inode pointer is"
- " not stored in context of inode(%p), "
- "returning EBADFD", fd->inode);
- op_errno = EBADFD;
- goto unwind;
- }
-
- if (wb_inode != NULL) {
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- local->wb_inode = wb_inode;
- frame->local = local;
+ wb_inode = wb_inode_create (this, fd->inode);
+ if (!wb_inode)
+ goto unwind;
- stub = fop_flush_stub (frame, wb_flush_helper, fd, xdata);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ stub = fop_ftruncate_stub (frame, wb_ftruncate_helper, fd,
+ offset, xdata);
+ if (!stub)
+ goto unwind;
- request = wb_enqueue (wb_inode, stub);
- if (request == NULL) {
- call_stub_destroy (stub);
- op_errno = ENOMEM;
- goto unwind;
- }
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
- ret = wb_process_queue (frame, wb_inode);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
- } else {
- if (conf->flush_behind) {
- flush_frame = copy_frame (frame);
- if (flush_frame == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- STACK_UNWIND_STRICT (flush, frame, 0, 0, NULL);
-
- STACK_WIND (flush_frame, wb_ffr_bg_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd, xdata);
- } else {
- STACK_WIND (frame, wb_ffr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd, xdata);
- }
- }
+ wb_process_queue (wb_inode);
return 0;
unwind:
- STACK_UNWIND_STRICT (flush, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-static int32_t
-wb_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
- dict_t *xdata)
-{
- wb_local_t *local = NULL;
- wb_inode_t *wb_inode = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1;
-
- GF_ASSERT (frame);
-
- local = frame->local;
- wb_inode = local->wb_inode;
- request = local->request;
-
- if (wb_inode != NULL) {
- LOCK (&wb_inode->lock);
- {
- if (wb_inode->op_ret == -1) {
- op_ret = wb_inode->op_ret;
- op_errno = wb_inode->op_errno;
-
- wb_inode->op_ret = 0;
- }
- }
- UNLOCK (&wb_inode->lock);
-
- if (request) {
- wb_request_unref (request);
- ret = wb_process_queue (frame, wb_inode);
- if (ret == -1) {
- if (errno == ENOMEM) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
-
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
- }
-
- }
-
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ if (stub)
+ call_stub_destroy (stub);
return 0;
}
-static int32_t
-wb_fsync_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync, dict_t *xdata)
+int
+wb_setattr_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- STACK_WIND (frame, wb_fsync_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
return 0;
}
-int32_t
-wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
- dict_t *xdata)
+int
+wb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
wb_inode_t *wb_inode = NULL;
- wb_local_t *local = NULL;
call_stub_t *stub = NULL;
- wb_request_t *request = NULL;
- int32_t ret = -1, op_errno = 0;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, unwind,
- op_errno, EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, fd, unwind,
- op_errno, EINVAL);
- wb_inode = wb_inode_ctx_get (this, fd->inode);
- if (wb_inode == NULL && (!IA_ISDIR (fd->inode->ia_type))) {
- gf_log (this->name, GF_LOG_WARNING,
- "write behind wb_inode pointer is"
- " not stored in context of inode(%p), "
- "returning EBADFD", fd->inode);
- op_errno = EBADFD;
- goto unwind;
- }
+ wb_inode = wb_inode_ctx_get (this, loc->inode);
+ if (!wb_inode)
+ goto noqueue;
- local = mem_get0 (this->local_pool);
- if (local == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- frame->local = local;
- local->wb_inode = wb_inode;
-
- if (wb_inode) {
- stub = fop_fsync_stub (frame, wb_fsync_helper, fd, datasync,
- xdata);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ stub = fop_setattr_stub (frame, wb_setattr_helper, loc, stbuf,
+ valid, xdata);
+ if (!stub)
+ goto unwind;
- request = wb_enqueue (wb_inode, stub);
- if (request == NULL) {
- op_errno = ENOMEM;
- call_stub_destroy (stub);
- goto unwind;
- }
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
- ret = wb_process_queue (frame, wb_inode);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "request queue processing failed");
- }
- } else {
- STACK_WIND (frame, wb_fsync_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, datasync,
- xdata);
- }
+ wb_process_queue (wb_inode);
return 0;
-
unwind:
- STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-wb_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- gf_dirent_t *entry = NULL;
-
- if (op_ret <= 0) {
- goto unwind;
- }
+ STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
- list_for_each_entry (entry, &entries->list, list) {
- if (!entry->inode)
- continue;
- wb_inode_create (this, entry->inode);
- }
+ if (stub)
+ call_stub_destroy (stub);
+ return 0;
-unwind:
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+noqueue:
+ STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
return 0;
}
-int32_t
-wb_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off, dict_t *xdata)
+int
+wb_fsetattr_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- STACK_WIND (frame, wb_readdirp_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+ STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
return 0;
}
-int32_t
-wb_release (xlator_t *this, fd_t *fd)
+int
+wb_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- uint64_t wb_file_ptr = 0;
- wb_file_t *wb_file = NULL;
+ wb_inode_t *wb_inode = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("write-behind", this, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ wb_inode = wb_inode_ctx_get (this, fd->inode);
+ if (!wb_inode)
+ goto noqueue;
- fd_ctx_del (fd, this, &wb_file_ptr);
- wb_file = (wb_file_t *)(long) wb_file_ptr;
+ stub = fop_fsetattr_stub (frame, wb_fsetattr_helper, fd, stbuf,
+ valid, xdata);
+ if (!stub)
+ goto unwind;
- GF_FREE (wb_file);
+ if (!wb_enqueue (wb_inode, stub))
+ goto unwind;
-out:
+ wb_process_queue (wb_inode);
+
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+
+ if (stub)
+ call_stub_destroy (stub);
+ return 0;
+
+noqueue:
+ STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
return 0;
}
-int32_t
+int
wb_forget (xlator_t *this, inode_t *inode)
{
uint64_t tmp = 0;
@@ -2917,15 +1639,16 @@ wb_forget (xlator_t *this, inode_t *inode)
wb_inode = (wb_inode_t *)(long)tmp;
- if (wb_inode != NULL) {
- LOCK (&wb_inode->lock);
- {
- GF_ASSERT (list_empty (&wb_inode->request));
- }
- UNLOCK (&wb_inode->lock);
+ if (!wb_inode)
+ return 0;
- wb_inode_destroy (wb_inode);
- }
+ LOCK (&wb_inode->lock);
+ {
+ GF_ASSERT (list_empty (&wb_inode->todo));
+ GF_ASSERT (list_empty (&wb_inode->liability));
+ GF_ASSERT (list_empty (&wb_inode->temptation));
+ }
+ UNLOCK (&wb_inode->lock);
return 0;
}
@@ -2950,10 +1673,8 @@ wb_priv_dump (xlator_t *this)
gf_proc_dump_write ("aggregate_size", "%d", conf->aggregate_size);
gf_proc_dump_write ("window_size", "%d", conf->window_size);
- gf_proc_dump_write ("enable_O_SYNC", "%d", conf->enable_O_SYNC);
gf_proc_dump_write ("flush_behind", "%d", conf->flush_behind);
- gf_proc_dump_write ("enable_trickling_writes", "%d",
- conf->enable_trickling_writes);
+ gf_proc_dump_write ("trickling_writes", "%d", conf->trickling_writes);
ret = 0;
out:
@@ -2962,48 +1683,45 @@ out:
void
-__wb_dump_requests (struct list_head *head, char *prefix, char passive)
+__wb_dump_requests (struct list_head *head, char *prefix)
{
char key[GF_DUMP_MAX_BUF_LEN] = {0, };
char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }, flag = 0;
- wb_request_t *request = NULL;
+ wb_request_t *req = NULL;
- list_for_each_entry (request, head, list) {
- gf_proc_dump_build_key (key, prefix, passive ? "passive-request"
- : "active-request");
+ list_for_each_entry (req, head, all) {
gf_proc_dump_build_key (key_prefix, key,
- (char *)gf_fop_list[request->fop]);
+ (char *)gf_fop_list[req->fop]);
gf_proc_dump_add_section(key_prefix);
- gf_proc_dump_write ("request-ptr", "%p", request);
+ gf_proc_dump_write ("request-ptr", "%p", req);
- gf_proc_dump_write ("refcount", "%d", request->refcount);
+ gf_proc_dump_write ("refcount", "%d", req->refcount);
- if (request->fop == GF_FOP_WRITE) {
- flag = request->flags.write_request.stack_wound;
- gf_proc_dump_write ("stack_wound", "%d", flag);
+ if (list_empty (&req->todo))
+ gf_proc_dump_write ("wound", "yes");
+ else
+ gf_proc_dump_write ("wound", "no");
+ if (req->fop == GF_FOP_WRITE) {
gf_proc_dump_write ("size", "%"GF_PRI_SIZET,
- request->write_size);
+ req->write_size);
gf_proc_dump_write ("offset", "%"PRId64,
- request->stub->args.writev.off);
+ req->stub->args.writev.off);
- flag = request->flags.write_request.write_behind;
- gf_proc_dump_write ("write_behind", "%d", flag);
+ flag = req->ordering.lied;
+ gf_proc_dump_write ("lied", "%d", flag);
- flag = request->flags.write_request.got_reply;
- gf_proc_dump_write ("got_reply", "%d", flag);
+ flag = req->ordering.append;
+ gf_proc_dump_write ("append", "%d", flag);
- flag = request->flags.write_request.virgin;
- gf_proc_dump_write ("virgin", "%d", flag);
+ flag = req->ordering.fulfilled;
+ gf_proc_dump_write ("fulfilled", "%d", flag);
- flag = request->flags.write_request.flush_all;
- gf_proc_dump_write ("flush_all", "%d", flag);
- } else {
- flag = request->flags.other_requests.marked_for_resume;
- gf_proc_dump_write ("marked_for_resume", "%d", flag);
+ flag = req->ordering.go;
+ gf_proc_dump_write ("go", "%d", flag);
}
}
}
@@ -3047,22 +1765,14 @@ wb_inode_dump (xlator_t *this, inode_t *inode)
gf_proc_dump_write ("window_current", "%"GF_PRI_SIZET,
wb_inode->window_current);
- gf_proc_dump_write ("aggregate_current", "%"GF_PRI_SIZET,
- wb_inode->aggregate_current);
-
gf_proc_dump_write ("op_ret", "%d", wb_inode->op_ret);
gf_proc_dump_write ("op_errno", "%d", wb_inode->op_errno);
LOCK (&wb_inode->lock);
{
- if (!list_empty (&wb_inode->request)) {
- __wb_dump_requests (&wb_inode->request, key_prefix, 0);
- }
-
- if (!list_empty (&wb_inode->passive_requests)) {
- __wb_dump_requests (&wb_inode->passive_requests,
- key_prefix, 1);
+ if (!list_empty (&wb_inode->all)) {
+ __wb_dump_requests (&wb_inode->all, key_prefix);
}
}
UNLOCK (&wb_inode->lock);
@@ -3074,65 +1784,6 @@ out:
int
-wb_fd_dump (xlator_t *this, fd_t *fd)
-{
- wb_file_t *wb_file = NULL;
- char *path = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, };
- int ret = -1;
- gf_boolean_t section_added = _gf_false;
-
- gf_proc_dump_build_key (key_prefix, "xlator.performance.write-behind",
- "wb_file");
-
- if ((fd == NULL) || (this == NULL)) {
- goto out;
- }
-
- ret = TRY_LOCK(&fd->lock);
- if (ret)
- goto out;
- {
- wb_file = __wb_fd_ctx_get (this, fd);
- }
- UNLOCK(&fd->lock);
-
- if (wb_file == NULL) {
- goto out;
- }
-
- gf_proc_dump_add_section (key_prefix);
- section_added = _gf_true;
-
- __inode_path (fd->inode, NULL, &path);
- if (path != NULL) {
- gf_proc_dump_write ("path", "%s", path);
- GF_FREE (path);
- }
-
- gf_proc_dump_write ("fd", "%p", fd);
-
- gf_proc_dump_write ("flags", "%d", wb_file->flags);
-
- gf_proc_dump_write ("flags", "%s",
- (wb_file->flags & O_APPEND) ? "O_APPEND"
- : "!O_APPEND");
-
- gf_proc_dump_write ("disabled", "%d", wb_file->disabled);
-
-out:
- if (ret && fd && this) {
- if (_gf_false == section_added)
- gf_proc_dump_add_section (key_prefix);
- gf_proc_dump_write ("Unable to dump the fd",
- "(Lock acquisition failed) %s",
- uuid_utoa (fd->inode->gfid));
- }
- return 0;
-}
-
-
-int32_t
mem_acct_init (xlator_t *this)
{
int ret = -1;
@@ -3166,6 +1817,14 @@ reconfigure (xlator_t *this, dict_t *options)
GF_OPTION_RECONF ("flush-behind", conf->flush_behind, options, bool,
out);
+ GF_OPTION_RECONF ("trickling-writes", conf->trickling_writes, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("strict-O_DIRECT", conf->strict_O_DIRECT, options,
+ bool, out);
+
+ GF_OPTION_RECONF ("strict-write-ordering", conf->strict_write_ordering,
+ options, bool, out);
ret = 0;
out:
return ret;
@@ -3196,8 +1855,6 @@ init (xlator_t *this)
goto out;
}
- GF_OPTION_INIT("enable-O_SYNC", conf->enable_O_SYNC, bool, out);
-
/* configure 'options aggregate-size <size>' */
conf->aggregate_size = WB_AGGREGATE_SIZE;
@@ -3223,16 +1880,12 @@ init (xlator_t *this)
/* configure 'option flush-behind <on/off>' */
GF_OPTION_INIT ("flush-behind", conf->flush_behind, bool, out);
- GF_OPTION_INIT ("enable-trickling-writes",
- conf->enable_trickling_writes, bool, out);
+ GF_OPTION_INIT ("trickling-writes", conf->trickling_writes, bool, out);
- this->local_pool = mem_pool_new (wb_local_t, 64);
- if (!this->local_pool) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto out;
- }
+ GF_OPTION_INIT ("strict-O_DIRECT", conf->strict_O_DIRECT, bool, out);
+
+ GF_OPTION_INIT ("strict-write-ordering", conf->strict_write_ordering,
+ bool, out);
this->private = conf;
ret = 0;
@@ -3266,10 +1919,7 @@ out:
struct xlator_fops fops = {
- .lookup = wb_lookup,
.writev = wb_writev,
- .open = wb_open,
- .create = wb_create,
.readv = wb_readv,
.flush = wb_flush,
.fsync = wb_fsync,
@@ -3278,20 +1928,21 @@ struct xlator_fops fops = {
.truncate = wb_truncate,
.ftruncate = wb_ftruncate,
.setattr = wb_setattr,
- .readdirp = wb_readdirp,
+ .fsetattr = wb_fsetattr,
};
+
struct xlator_cbks cbks = {
.forget = wb_forget,
- .release = wb_release,
};
+
struct xlator_dumpops dumpops = {
.priv = wb_priv_dump,
.inodectx = wb_inode_dump,
- .fdctx = wb_fd_dump,
};
+
struct volume_options options[] = {
{ .key = {"flush-behind"},
.type = GF_OPTION_TYPE_BOOL,
@@ -3300,7 +1951,7 @@ struct volume_options options[] = {
"translator to perform flush in background, by "
"returning success (or any errors, if any of "
"previous writes were failed) to application even "
- "before flush is sent to backend filesystem. "
+ "before flush FOP is sent to backend filesystem. "
},
{ .key = {"cache-size", "window-size"},
.type = GF_OPTION_TYPE_SIZET,
@@ -3309,21 +1960,20 @@ struct volume_options options[] = {
.default_value = "1MB",
.description = "Size of the write-behind buffer for a single file "
"(inode)."
-
- },
- { .key = {"disable-for-first-nbytes"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 0,
- .max = 1 * GF_UNIT_MB,
- .default_value = "0",
},
- { .key = {"enable-O_SYNC"},
+ { .key = {"trickling-writes"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
},
- { .key = {"enable-trickling-writes"},
+ { .key = {"strict-O_DIRECT"},
.type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
+ .default_value = "off",
+ },
+ { .key = {"strict-write-ordering"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Do not let later writes overtake earlier writes even "
+ "if they do not overlap",
},
{ .key = {NULL} },
};