diff options
author | Raghavendra G <raghavendra@gluster.com> | 2012-07-31 12:41:13 +0530 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2012-08-02 12:38:27 -0700 |
commit | a89871f9c7a4c42e87f2d856b51f33ceaf1cf163 (patch) | |
tree | bdb8bef6d8b50e3ef3465e7dee8cc12ca45e9f74 /xlators/performance/write-behind/src | |
parent | b25eba06d5d50543975d24a0dc0a3a1934faf40b (diff) |
performance/write-behind: maintain a per-inode request queue instead of maintaining per-fd
path based operations like stat etc, whose results will be affected by
writes have to be ordered with writes. With request queues maintained
in inode this can be done naturally, than when they are maintained per
open fd.
Change-Id: Ibdde3b81366f642d07531632fc9062cb44fad2e7
BUG: 765443
Signed-off-by: Raghavendra G <raghavendra@gluster.com>
Reviewed-on: http://review.gluster.com/712
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators/performance/write-behind/src')
-rw-r--r-- | xlators/performance/write-behind/src/write-behind-mem-types.h | 1 | ||||
-rw-r--r-- | xlators/performance/write-behind/src/write-behind.c | 1364 |
2 files changed, 769 insertions, 596 deletions
diff --git a/xlators/performance/write-behind/src/write-behind-mem-types.h b/xlators/performance/write-behind/src/write-behind-mem-types.h index 33b8d93c672..f64f429ce22 100644 --- a/xlators/performance/write-behind/src/write-behind-mem-types.h +++ b/xlators/performance/write-behind/src/write-behind-mem-types.h @@ -19,6 +19,7 @@ enum gf_wb_mem_types_ { gf_wb_mt_wb_request_t, gf_wb_mt_iovec, gf_wb_mt_wb_conf_t, + gf_wb_mt_wb_inode_t, gf_wb_mt_end }; #endif diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c index ddd8a6ba244..2cec7132431 100644 --- a/xlators/performance/write-behind/src/write-behind.c +++ b/xlators/performance/write-behind/src/write-behind.c @@ -35,24 +35,28 @@ typedef struct list_head list_head_t; struct wb_conf; struct wb_page; -struct wb_file; +struct wb_inode; -typedef struct wb_file { - int disabled; - uint64_t disable_till; +typedef struct wb_inode { size_t window_conf; size_t window_current; - int32_t flags; size_t aggregate_current; - int32_t refcount; int32_t op_ret; int32_t op_errno; list_head_t request; list_head_t passive_requests; - fd_t *fd; gf_lock_t lock; xlator_t *this; -}wb_file_t; +}wb_inode_t; + +typedef struct wb_file { + int32_t flags; + int disabled; + fd_t *fd; + size_t disable_till; + enum _gf_boolean dont_wind; +} wb_file_t; + typedef struct wb_request { list_head_t list; @@ -62,7 +66,7 @@ typedef struct wb_request { call_stub_t *stub; size_t write_size; int32_t refcount; - wb_file_t *file; + wb_inode_t *wb_inode; glusterfs_fop_t fop; gf_lkowner_t lk_owner; union { @@ -99,27 +103,90 @@ struct wb_conf { typedef struct wb_local { list_head_t winds; int32_t flags; - struct wb_file *file; + fd_t *fd; wb_request_t *request; int op_ret; int op_errno; call_frame_t *frame; int32_t reply_count; + wb_inode_t *wb_inode; } wb_local_t; typedef struct wb_conf wb_conf_t; typedef struct wb_page wb_page_t; int32_t -wb_process_queue (call_frame_t *frame, wb_file_t *file); +wb_process_queue (call_frame_t *frame, wb_inode_t *wb_inode); ssize_t -wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds); +wb_sync (call_frame_t *frame, wb_inode_t *wb_inode, list_head_t *winds); ssize_t __wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_size, char enable_trickling_writes); +wb_inode_t * +__wb_inode_ctx_get (xlator_t *this, inode_t *inode) +{ + uint64_t value = 0; + wb_inode_t *wb_inode = NULL; + + __inode_ctx_get (inode, this, &value); + wb_inode = (wb_inode_t *)(unsigned long) value; + + return wb_inode; +} + + +wb_inode_t * +wb_inode_ctx_get (xlator_t *this, inode_t *inode) +{ + wb_inode_t *wb_inode = NULL; + + GF_VALIDATE_OR_GOTO ("write-behind", this, out); + GF_VALIDATE_OR_GOTO (this->name, inode, out); + + LOCK (&inode->lock); + { + wb_inode = __wb_inode_ctx_get (this, inode); + } + UNLOCK (&inode->lock); +out: + return wb_inode; +} + + +wb_file_t * +__wb_fd_ctx_get (xlator_t *this, fd_t *fd) +{ + wb_file_t *wb_file = NULL; + uint64_t value = 0; + + fd_ctx_get (fd, this, &value); + wb_file = (wb_file_t *)(unsigned long)value; + + return wb_file; +} + + +wb_file_t * +wb_fd_ctx_get (xlator_t *this, fd_t *fd) +{ + wb_file_t *wb_file = NULL; + + GF_VALIDATE_OR_GOTO ("write-behind", this, out); + GF_VALIDATE_OR_GOTO (this->name, fd, out); + + LOCK (&fd->lock); + { + wb_file = __wb_fd_ctx_get (this, fd); + } + UNLOCK (&fd->lock); + +out: + return wb_file; +} + /* Below is a succinct explanation of the code deciding whether two regions overlap, from Pavan <tcp@gluster.com>. @@ -147,7 +214,8 @@ __wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_size, static inline char wb_requests_overlap (wb_request_t *request1, wb_request_t *request2) { - off_t r1_start = 0, r1_end = 0, r2_start = 0, r2_end = 0; + off_t r1_start = 0, r1_end = 0, r2_start = 0, r2_end = 0; + enum _gf_boolean do_overlap = 0; r1_start = request1->stub->args.writev.off; r1_end = r1_start + iov_length (request1->stub->args.writev.vector, @@ -157,7 +225,9 @@ wb_requests_overlap (wb_request_t *request1, wb_request_t *request2) r2_end = r2_start + iov_length (request2->stub->args.writev.vector, request2->stub->args.writev.count); - return ((r1_end >= r2_start) && (r2_end >= r1_start)); + do_overlap = ((r1_end >= r2_start) && (r2_end >= r1_start)); + + return do_overlap; } @@ -217,18 +287,18 @@ out: static int wb_request_unref (wb_request_t *this) { - wb_file_t *file = NULL; - int ret = -1; + wb_inode_t *wb_inode = NULL; + int ret = -1; GF_VALIDATE_OR_GOTO ("write-behind", this, out); - file = this->file; + wb_inode = this->wb_inode; - LOCK (&file->lock); + LOCK (&wb_inode->lock); { ret = __wb_request_unref (this); } - UNLOCK (&file->lock); + UNLOCK (&wb_inode->lock); out: return ret; @@ -257,16 +327,16 @@ out: wb_request_t * wb_request_ref (wb_request_t *this) { - wb_file_t *file = NULL; + wb_inode_t *wb_inode = NULL; GF_VALIDATE_OR_GOTO ("write-behind", this, out); - file = this->file; - LOCK (&file->lock); + wb_inode = this->wb_inode; + LOCK (&wb_inode->lock); { this = __wb_request_ref (this); } - UNLOCK (&file->lock); + UNLOCK (&wb_inode->lock); out: return this; @@ -274,7 +344,7 @@ out: wb_request_t * -wb_enqueue (wb_file_t *file, call_stub_t *stub) +wb_enqueue (wb_inode_t *wb_inode, call_stub_t *stub) { wb_request_t *request = NULL, *tmp = NULL; call_frame_t *frame = NULL; @@ -282,8 +352,8 @@ wb_enqueue (wb_file_t *file, call_stub_t *stub) struct iovec *vector = NULL; int32_t count = 0; - GF_VALIDATE_OR_GOTO ("write-behind", file, out); - GF_VALIDATE_OR_GOTO (file->this->name, stub, out); + GF_VALIDATE_OR_GOTO ("write-behind", wb_inode, out); + GF_VALIDATE_OR_GOTO (wb_inode->this->name, stub, out); request = GF_CALLOC (1, sizeof (*request), gf_wb_mt_wb_request_t); if (request == NULL) { @@ -296,7 +366,7 @@ wb_enqueue (wb_file_t *file, call_stub_t *stub) INIT_LIST_HEAD (&request->other_requests); request->stub = stub; - request->file = file; + request->wb_inode = wb_inode; request->fop = stub->fop; frame = stub->frame; @@ -320,9 +390,9 @@ wb_enqueue (wb_file_t *file, call_stub_t *stub) request->lk_owner = frame->root->lk_owner; - LOCK (&file->lock); + LOCK (&wb_inode->lock); { - list_add_tail (&request->list, &file->request); + list_add_tail (&request->list, &wb_inode->request); if (stub->fop == GF_FOP_WRITE) { /* reference for stack winding */ __wb_request_ref (request); @@ -330,9 +400,9 @@ wb_enqueue (wb_file_t *file, call_stub_t *stub) /* reference for stack unwinding */ __wb_request_ref (request); - file->aggregate_current += request->write_size; + wb_inode->aggregate_current += request->write_size; } else { - list_for_each_entry (tmp, &file->request, list) { + list_for_each_entry (tmp, &wb_inode->request, list) { if (tmp->stub && tmp->stub->fop == GF_FOP_WRITE) { tmp->flags.write_request.flush_all = 1; @@ -343,13 +413,45 @@ wb_enqueue (wb_file_t *file, call_stub_t *stub) __wb_request_ref (request); } } - UNLOCK (&file->lock); + UNLOCK (&wb_inode->lock); out: return request; } +wb_inode_t * +__wb_inode_create (xlator_t *this, inode_t *inode) +{ + wb_inode_t *wb_inode = NULL; + wb_conf_t *conf = NULL; + + GF_VALIDATE_OR_GOTO ("write-behind", this, out); + GF_VALIDATE_OR_GOTO (this->name, inode, out); + + conf = this->private; + + wb_inode = GF_CALLOC (1, sizeof (*wb_inode), gf_wb_mt_wb_inode_t); + if (wb_inode == NULL) { + goto out; + } + + INIT_LIST_HEAD (&wb_inode->request); + INIT_LIST_HEAD (&wb_inode->passive_requests); + + wb_inode->this = this; + + wb_inode->window_conf = conf->window_size; + + LOCK_INIT (&wb_inode->lock); + + __inode_ctx_put (inode, this, (uint64_t)(unsigned long)wb_inode); + +out: + return wb_inode; +} + + wb_file_t * wb_file_create (xlator_t *this, fd_t *fd, int32_t flags) { @@ -366,60 +468,105 @@ wb_file_create (xlator_t *this, fd_t *fd, int32_t flags) goto out; } - INIT_LIST_HEAD (&file->request); - INIT_LIST_HEAD (&file->passive_requests); - /* fd_ref() not required, file should never decide the existence of an fd */ file->fd= fd; - file->disable_till = conf->disable_till; - file->this = this; - file->refcount = 1; - file->window_conf = conf->window_size; - file->flags = flags; + /* If O_DIRECT then, we disable chaching */ + if (((flags & O_DIRECT) == O_DIRECT) + || ((flags & O_ACCMODE) == O_RDONLY) + || (((flags & O_SYNC) == O_SYNC) + && conf->enable_O_SYNC == _gf_true)) { + file->disabled = 1; + } - LOCK_INIT (&file->lock); + file->flags = flags; - fd_ctx_set (fd, this, (uint64_t)(long)file); + fd_ctx_set (fd, this, (uint64_t)(unsigned long)file); out: return file; } -void -wb_file_destroy (wb_file_t *file) +wb_inode_t * +wb_inode_create (xlator_t *this, inode_t *inode) { - int32_t refcount = 0; + wb_inode_t *wb_inode = NULL; - GF_VALIDATE_OR_GOTO ("write-behind", file, out); + GF_VALIDATE_OR_GOTO ("write-behind", this, out); + GF_VALIDATE_OR_GOTO (this->name, inode, out); - LOCK (&file->lock); + LOCK (&inode->lock); { - refcount = --file->refcount; + wb_inode = __wb_inode_ctx_get (this, inode); + if (wb_inode == NULL) { + wb_inode = __wb_inode_create (this, inode); + } } - UNLOCK (&file->lock); + UNLOCK (&inode->lock); - if (!refcount){ - LOCK_DESTROY (&file->lock); - GF_FREE (file); - } +out: + return wb_inode; +} + +void +wb_inode_destroy (wb_inode_t *wb_inode) +{ + GF_VALIDATE_OR_GOTO ("write-behind", wb_inode, out); + + LOCK_DESTROY (&wb_inode->lock); + GF_FREE (wb_inode); out: return; } int32_t +wb_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, dict_t *dict, struct iatt *postparent) +{ + wb_inode_t *wb_inode = NULL; + + if (op_ret < 0) { + goto unwind; + } + + wb_inode = wb_inode_create (this, inode); + if (wb_inode == NULL) { + op_ret = -1; + op_errno = ENOMEM; + } + +unwind: + STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, + dict, postparent); + + return 0; +} + + +int32_t +wb_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) +{ + STACK_WIND (frame, wb_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + return 0; +} + + +int32_t wb_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { wb_local_t *local = NULL; list_head_t *winds = NULL; - wb_file_t *file = NULL; + wb_inode_t *wb_inode = NULL; wb_request_t *request = NULL, *dummy = NULL; wb_local_t *per_request_local = NULL; int32_t ret = -1; @@ -432,10 +579,12 @@ wb_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, local = frame->local; winds = &local->winds; - file = local->file; - GF_VALIDATE_OR_GOTO (this->name, file, out); + fd = local->fd; + + wb_inode = wb_inode_ctx_get (this, fd->inode); + GF_VALIDATE_OR_GOTO (this->name, wb_inode, out); - LOCK (&file->lock); + LOCK (&wb_inode->lock); { list_for_each_entry_safe (request, dummy, winds, winds) { request->flags.write_request.got_reply = 1; @@ -448,7 +597,7 @@ wb_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, } if (request->flags.write_request.write_behind) { - file->window_current -= request->write_size; + wb_inode->window_current -= request->write_size; total_write_size += request->write_size; } @@ -456,8 +605,8 @@ wb_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, } if (op_ret == -1) { - file->op_ret = op_ret; - file->op_errno = op_errno; + wb_inode->op_ret = op_ret; + wb_inode->op_errno = op_errno; } else if (op_ret < total_write_size) { /* * We've encountered a short write, for whatever reason. @@ -467,23 +616,21 @@ wb_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, * TODO: Retry the write so we can potentially capture * a real error condition (i.e., ENOSPC). */ - file->op_ret = -1; - file->op_errno = EIO; + wb_inode->op_ret = -1; + wb_inode->op_errno = EIO; } - - fd = file->fd; } - UNLOCK (&file->lock); + UNLOCK (&wb_inode->lock); - ret = wb_process_queue (frame, file); + ret = wb_process_queue (frame, wb_inode); if (ret == -1) { if (errno == ENOMEM) { - LOCK (&file->lock); + LOCK (&wb_inode->lock); { - file->op_ret = -1; - file->op_errno = ENOMEM; + wb_inode->op_ret = -1; + wb_inode->op_errno = ENOMEM; } - UNLOCK (&file->lock); + UNLOCK (&wb_inode->lock); } gf_log (this->name, GF_LOG_WARNING, @@ -493,6 +640,12 @@ wb_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, /* safe place to do fd_unref */ fd_unref (fd); + frame->local = NULL; + + if (local != NULL) { + mem_put (frame->local); + } + STACK_DESTROY (frame->root); out: @@ -501,7 +654,7 @@ out: ssize_t -wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds) +wb_sync (call_frame_t *frame, wb_inode_t *wb_inode, list_head_t *winds) { wb_request_t *dummy = NULL, *request = NULL; wb_request_t *first_request = NULL, *next = NULL; @@ -519,15 +672,15 @@ wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds) off_t next_offset_expected = 0; gf_lkowner_t lk_owner = {0, }; - GF_VALIDATE_OR_GOTO_WITH_ERROR ((file ? file->this->name + GF_VALIDATE_OR_GOTO_WITH_ERROR ((wb_inode ? wb_inode->this->name : "write-behind"), frame, out, bytes, -1); - GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, file, out, bytes, + GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, wb_inode, out, bytes, -1); GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, winds, out, bytes, -1); - conf = file->this->private; + conf = wb_inode->this->private; list_for_each_entry (request, winds, winds) { total_count += request->stub->args.writev.count; if (total_count > 0) { @@ -536,8 +689,8 @@ wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds) } if (total_count == 0) { - gf_log (file->this->name, GF_LOG_TRACE, "no vectors are to be" - "synced"); + gf_log (wb_inode->this->name, GF_LOG_TRACE, + "no vectors are to be synced"); goto out; } @@ -604,7 +757,10 @@ wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds) || ((current_size + next->write_size) > conf->aggregate_size) || (next_offset_expected != next->stub->args.writev.off) - || (!is_same_lkowner (&lk_owner, &next->lk_owner))) { + || (!is_same_lkowner (&lk_owner, &next->lk_owner)) + || (request->stub->args.writev.fd + != next->stub->args.writev.fd)) { + sync_frame = copy_frame (frame); if (sync_frame == NULL) { bytes = -1; @@ -615,15 +771,8 @@ wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds) frame->root->lk_owner = lk_owner; sync_frame->local = local; - local->file = file; - LOCK (&file->lock); - { - fd = file->fd; - } - UNLOCK (&file->lock); - - fd_ref (fd); + local->fd = fd = fd_ref (request->stub->args.writev.fd); bytes += current_size; STACK_WIND (sync_frame, wb_sync_cbk, @@ -682,13 +831,13 @@ out: } } - if (file != NULL) { - LOCK (&file->lock); + if (wb_inode != NULL) { + LOCK (&wb_inode->lock); { - file->op_ret = -1; - file->op_errno = op_errno; + wb_inode->op_ret = -1; + wb_inode->op_errno = op_errno; } - UNLOCK (&file->lock); + UNLOCK (&wb_inode->lock); } } @@ -703,15 +852,14 @@ wb_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, wb_local_t *local = NULL; wb_request_t *request = NULL; call_frame_t *process_frame = NULL; - wb_file_t *file = NULL; + wb_inode_t *wb_inode = NULL; int32_t ret = -1; - fd_t *fd = NULL; GF_ASSERT (frame); GF_ASSERT (this); local = frame->local; - file = local->file; + wb_inode = local->wb_inode; request = local->request; if (request) { @@ -729,15 +877,15 @@ wb_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, } if (process_frame != NULL) { - ret = wb_process_queue (process_frame, file); + ret = wb_process_queue (process_frame, wb_inode); if (ret == -1) { - if ((errno == ENOMEM) && (file != NULL)) { - LOCK (&file->lock); + if ((errno == ENOMEM) && (wb_inode != NULL)) { + LOCK (&wb_inode->lock); { - file->op_ret = -1; - file->op_errno = ENOMEM; + wb_inode->op_ret = -1; + wb_inode->op_errno = ENOMEM; } - UNLOCK (&file->lock); + UNLOCK (&wb_inode->lock); } gf_log (this->name, GF_LOG_WARNING, @@ -747,16 +895,6 @@ wb_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, STACK_DESTROY (process_frame->root); } - if (file) { - LOCK (&file->lock); - { - fd = file->fd; - } - UNLOCK (&file->lock); - - fd_unref (fd); - } - return 0; } @@ -776,29 +914,18 @@ wb_stat_helper (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) int32_t wb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - wb_file_t *file = NULL; - fd_t *iter_fd = NULL; - wb_local_t *local = NULL; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - wb_request_t *request = NULL; - int32_t ret = -1, op_errno = EINVAL; + wb_inode_t *wb_inode = NULL; + wb_local_t *local = NULL; + call_stub_t *stub = NULL; + wb_request_t *request = NULL; + int32_t ret = -1, op_errno = EINVAL; GF_ASSERT (frame); GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind); if (loc->inode) { - /* FIXME: fd_lookup extends life of fd till stat returns */ - iter_fd = fd_lookup (loc->inode, frame->root->pid); - if (iter_fd) { - if (!fd_ctx_get (iter_fd, this, &tmp_file)) { - file = (wb_file_t *)(long)tmp_file; - } else { - fd_unref (iter_fd); - iter_fd = NULL; - } - } + wb_inode = wb_inode_ctx_get (this, loc->inode); } local = mem_get0 (this->local_pool); @@ -807,24 +934,24 @@ wb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) goto unwind; } - local->file = file; + local->wb_inode = wb_inode; frame->local = local; - if (file) { + if (wb_inode) { stub = fop_stat_stub (frame, wb_stat_helper, loc, xdata); if (stub == NULL) { op_errno = ENOMEM; goto unwind; } - request = wb_enqueue (file, stub); + request = wb_enqueue (wb_inode, stub); if (request == NULL) { op_errno = ENOMEM; goto unwind; } - ret = wb_process_queue (frame, file); + ret = wb_process_queue (frame, wb_inode); if (ret == -1) { gf_log (this->name, GF_LOG_WARNING, "request queue processing failed"); @@ -842,10 +969,6 @@ unwind: call_stub_destroy (stub); } - if (iter_fd != NULL) { - fd_unref (iter_fd); - } - return 0; } @@ -854,20 +977,20 @@ int32_t wb_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) { - wb_local_t *local = NULL; - wb_request_t *request = NULL; - wb_file_t *file = NULL; - int32_t ret = -1; + wb_local_t *local = NULL; + wb_request_t *request = NULL; + wb_inode_t *wb_inode = NULL; + int32_t ret = -1; GF_ASSERT (frame); local = frame->local; - file = local->file; + wb_inode = local->wb_inode; request = local->request; - if ((file != NULL) && (request != NULL)) { + if ((wb_inode != NULL) && (request != NULL)) { wb_request_unref (request); - ret = wb_process_queue (frame, file); + ret = wb_process_queue (frame, wb_inode); if (ret == -1) { if (errno == ENOMEM) { op_ret = -1; @@ -900,30 +1023,23 @@ wb_fstat_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) int32_t wb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - wb_file_t *file = NULL; - wb_local_t *local = NULL; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - wb_request_t *request = NULL; - int32_t ret = -1; - int op_errno = EINVAL; + wb_inode_t *wb_inode = NULL; + wb_local_t *local = NULL; + call_stub_t *stub = NULL; + wb_request_t *request = NULL; + int32_t ret = -1; + int op_errno = EINVAL; GF_ASSERT (frame); GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if ((!IA_ISDIR (fd->inode->ia_type)) - && fd_ctx_get (fd, this, &tmp_file)) { - file = wb_file_create (this, fd, 0); - } else { - file = (wb_file_t *)(long)tmp_file; - if ((!IA_ISDIR (fd->inode->ia_type)) && (file == NULL)) { - gf_log (this->name, GF_LOG_WARNING, - "wb_file not found for fd %p", fd); - op_errno = EBADFD; - goto unwind; - } + wb_inode = wb_inode_ctx_get (this, fd->inode); + if ((!IA_ISDIR (fd->inode->ia_type)) && (wb_inode == NULL)) { + gf_log (this->name, GF_LOG_WARNING, + "wb_inode not found for fd %p", fd); + op_errno = EBADFD; + goto unwind; } local = mem_get0 (this->local_pool); @@ -932,18 +1048,18 @@ wb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) goto unwind; } - local->file = file; + local->wb_inode = wb_inode; frame->local = local; - if (file) { + if (wb_inode) { stub = fop_fstat_stub (frame, wb_fstat_helper, fd, xdata); if (stub == NULL) { op_errno = ENOMEM; goto unwind; } - request = wb_enqueue (file, stub); + request = wb_enqueue (wb_inode, stub); if (request == NULL) { op_errno = ENOMEM; goto unwind; @@ -952,7 +1068,7 @@ wb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) /* FIXME:should the request queue be emptied in case of error? */ - ret = wb_process_queue (frame, file); + ret = wb_process_queue (frame, wb_inode); if (ret == -1) { gf_log (this->name, GF_LOG_WARNING, "request queue processing failed"); @@ -982,18 +1098,17 @@ wb_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, { wb_local_t *local = NULL; wb_request_t *request = NULL; - wb_file_t *file = NULL; + wb_inode_t *wb_inode = NULL; call_frame_t *process_frame = NULL; int32_t ret = -1; - fd_t *fd = NULL; GF_ASSERT (frame); local = frame->local; - file = local->file; + wb_inode = local->wb_inode; request = local->request; - if ((request != NULL) && (file != NULL)) { + if ((request != NULL) && (wb_inode != NULL)) { process_frame = copy_frame (frame); if (process_frame == NULL) { op_ret = -1; @@ -1009,15 +1124,15 @@ wb_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } if (process_frame != NULL) { - ret = wb_process_queue (process_frame, file); + ret = wb_process_queue (process_frame, wb_inode); if (ret == -1) { - if ((errno == ENOMEM) && (file != NULL)) { - LOCK (&file->lock); + if ((errno == ENOMEM) && (wb_inode != NULL)) { + LOCK (&wb_inode->lock); { - file->op_ret = -1; - file->op_errno = ENOMEM; + wb_inode->op_ret = -1; + wb_inode->op_errno = ENOMEM; } - UNLOCK (&file->lock); + UNLOCK (&wb_inode->lock); } gf_log (this->name, GF_LOG_WARNING, @@ -1027,16 +1142,6 @@ wb_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_DESTROY (process_frame->root); } - if (file) { - LOCK (&file->lock); - { - fd = file->fd; - } - UNLOCK (&file->lock); - - fd_unref (fd); - } - return 0; } @@ -1059,31 +1164,18 @@ int32_t wb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata) { - wb_file_t *file = NULL; - fd_t *iter_fd = NULL; - wb_local_t *local = NULL; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - wb_request_t *request = NULL; - int32_t ret = -1, op_errno = EINVAL; + wb_inode_t *wb_inode = NULL; + wb_local_t *local = NULL; + call_stub_t *stub = NULL; + wb_request_t *request = NULL; + int32_t ret = -1, op_errno = EINVAL; GF_ASSERT (frame); GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind); if (loc->inode) { - /* - FIXME: fd_lookup extends life of fd till the execution of - truncate_cbk - */ - iter_fd = fd_lookup (loc->inode, frame->root->pid); - if (iter_fd) { - if (!fd_ctx_get (iter_fd, this, &tmp_file)){ - file = (wb_file_t *)(long)tmp_file; - } else { - fd_unref (iter_fd); - } - } + wb_inode = wb_inode_ctx_get (this, loc->inode); } local = mem_get0 (this->local_pool); @@ -1092,10 +1184,10 @@ wb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, goto unwind; } - local->file = file; + local->wb_inode = wb_inode; frame->local = local; - if (file) { + if (wb_inode) { stub = fop_truncate_stub (frame, wb_truncate_helper, loc, offset, xdata); if (stub == NULL) { @@ -1103,13 +1195,13 @@ wb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, goto unwind; } - request = wb_enqueue (file, stub); + request = wb_enqueue (wb_inode, stub); if (request == NULL) { op_errno = ENOMEM; goto unwind; } - ret = wb_process_queue (frame, file); + ret = wb_process_queue (frame, wb_inode); if (ret == -1) { gf_log (this->name, GF_LOG_WARNING, "request queue processing failed"); @@ -1138,20 +1230,20 @@ wb_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - wb_local_t *local = NULL; - wb_request_t *request = NULL; - wb_file_t *file = NULL; - int32_t ret = -1; + wb_local_t *local = NULL; + wb_request_t *request = NULL; + wb_inode_t *wb_inode = NULL; + int32_t ret = -1; GF_ASSERT (frame); local = frame->local; - file = local->file; + wb_inode = local->wb_inode; request = local->request; - if ((request != NULL) && (file != NULL)) { + if ((request != NULL) && (wb_inode != NULL)) { wb_request_unref (request); - ret = wb_process_queue (frame, file); + ret = wb_process_queue (frame, wb_inode); if (ret == -1) { if (errno == ENOMEM) { op_ret = -1; @@ -1187,30 +1279,23 @@ int32_t wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) { - wb_file_t *file = NULL; - wb_local_t *local = NULL; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - wb_request_t *request = NULL; - int32_t ret = -1; - int op_errno = EINVAL; + wb_inode_t *wb_inode = NULL; + wb_local_t *local = NULL; + call_stub_t *stub = NULL; + wb_request_t *request = NULL; + int32_t ret = -1; + int op_errno = EINVAL; GF_ASSERT (frame); GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); - - if ((!IA_ISDIR (fd->inode->ia_type)) - && fd_ctx_get (fd, this, &tmp_file)) { - file = wb_file_create (this, fd, 0); - } else { - file = (wb_file_t *)(long)tmp_file; - if ((!IA_ISDIR (fd->inode->ia_type)) && (file == NULL)) { - gf_log (this->name, GF_LOG_WARNING, - "wb_file not found for fd %p", fd); - op_errno = EBADFD; - goto unwind; - } + wb_inode = wb_inode_ctx_get (this, fd->inode); + if ((!IA_ISDIR (fd->inode->ia_type)) && (wb_inode == NULL)) { + gf_log (this->name, GF_LOG_WARNING, + "wb_inode not found for fd %p", fd); + op_errno = EBADFD; + goto unwind; } local = mem_get0 (this->local_pool); @@ -1219,11 +1304,11 @@ wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, goto unwind; } - local->file = file; + local->wb_inode = wb_inode; frame->local = local; - if (file) { + if (wb_inode) { stub = fop_ftruncate_stub (frame, wb_ftruncate_helper, fd, offset, xdata); if (stub == NULL) { @@ -1231,13 +1316,13 @@ wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, goto unwind; } - request = wb_enqueue (file, stub); + request = wb_enqueue (wb_inode, stub); if (request == NULL) { op_errno = ENOMEM; goto unwind; } - ret = wb_process_queue (frame, file); + ret = wb_process_queue (frame, wb_inode); if (ret == -1) { gf_log (this->name, GF_LOG_WARNING, "request queue processing failed"); @@ -1268,14 +1353,13 @@ wb_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, wb_local_t *local = NULL; wb_request_t *request = NULL; call_frame_t *process_frame = NULL; - wb_file_t *file = NULL; + wb_inode_t *wb_inode = NULL; int32_t ret = -1; - fd_t *fd = NULL; GF_ASSERT (frame); local = frame->local; - file = local->file; + wb_inode = local->wb_inode; request = local->request; if (request) { @@ -1294,15 +1378,15 @@ wb_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } if (request && (process_frame != NULL)) { - ret = wb_process_queue (process_frame, file); + ret = wb_process_queue (process_frame, wb_inode); if (ret == -1) { - if ((errno == ENOMEM) && (file != NULL)) { - LOCK (&file->lock); + if ((errno == ENOMEM) && (wb_inode != NULL)) { + LOCK (&wb_inode->lock); { - file->op_ret = -1; - file->op_errno = ENOMEM; + wb_inode->op_ret = -1; + wb_inode->op_errno = ENOMEM; } - UNLOCK (&file->lock); + UNLOCK (&wb_inode->lock); } gf_log (this->name, GF_LOG_WARNING, @@ -1312,16 +1396,6 @@ wb_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, STACK_DESTROY (process_frame->root); } - if (file) { - LOCK (&file->lock); - { - fd = file->fd; - } - UNLOCK (&file->lock); - - fd_unref (fd); - } - return 0; } @@ -1343,13 +1417,11 @@ int32_t wb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata) { - wb_file_t *file = NULL; - fd_t *iter_fd = NULL; - wb_local_t *local = NULL; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - wb_request_t *request = NULL; - int32_t ret = -1, op_errno = EINVAL; + wb_inode_t *wb_inode = NULL; + wb_local_t *local = NULL; + call_stub_t *stub = NULL; + wb_request_t *request = NULL; + int32_t ret = -1, op_errno = EINVAL; GF_ASSERT (frame); GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); @@ -1364,24 +1436,12 @@ wb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, frame->local = local; if (loc->inode) { - /* - FIXME: fd_lookup extends life of fd till the execution - of wb_utimens_cbk - */ - iter_fd = fd_lookup (loc->inode, frame->root->pid); - if (iter_fd) { - if (!fd_ctx_get (iter_fd, this, &tmp_file)) { - file = (wb_file_t *)(long)tmp_file; - } else { - fd_unref (iter_fd); - } - } - + wb_inode = wb_inode_ctx_get (this, loc->inode); } - local->file = file; + local->wb_inode = wb_inode; - if (file) { + if (wb_inode) { stub = fop_setattr_stub (frame, wb_setattr_helper, loc, stbuf, valid, xdata); if (stub == NULL) { @@ -1389,13 +1449,13 @@ wb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, goto unwind; } - request = wb_enqueue (file, stub); + request = wb_enqueue (wb_inode, stub); if (request == NULL) { op_errno = ENOMEM; goto unwind; } - ret = wb_process_queue (frame, file); + ret = wb_process_queue (frame, wb_inode); if (ret == -1) { gf_log (this->name, GF_LOG_WARNING, "request queue processing failed"); @@ -1420,23 +1480,29 @@ unwind: void wb_disable_all (xlator_t *this, fd_t *origfd) { - inode_t *inode = NULL; - fd_t *otherfd = NULL; - uint64_t tmp_file = 0; + inode_t *inode = NULL; + fd_t *otherfd = NULL; + wb_file_t *wb_file = NULL; inode = origfd->inode; + LOCK(&inode->lock); - list_for_each_entry (otherfd, &inode->fd_list, inode_list) { - if (otherfd == origfd) { - continue; - } - if (fd_ctx_get(otherfd,this,&tmp_file)) { - continue; + { + list_for_each_entry (otherfd, &inode->fd_list, inode_list) { + if (otherfd == origfd) { + continue; + } + + wb_file = wb_fd_ctx_get (this, otherfd); + if (wb_file == NULL) { + continue; + } + + gf_log(this->name,GF_LOG_DEBUG, + "disabling wb on %p because %p is O_SYNC", + otherfd, origfd); + wb_file->disabled = 1; } - gf_log(this->name,GF_LOG_DEBUG, - "disabling wb on %p because %p is O_SYNC", - otherfd, origfd); - ((wb_file_t *)(long)tmp_file)->disabled = 1; } UNLOCK(&inode->lock); } @@ -1447,15 +1513,11 @@ wb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, { int32_t flags = 0; wb_file_t *file = NULL; - wb_conf_t *conf = NULL; wb_local_t *local = NULL; GF_ASSERT (frame); GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, out, op_errno, EINVAL); - - conf = this->private; - local = frame->local; GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, local, out, op_errno, EINVAL); @@ -1469,30 +1531,14 @@ wb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, op_errno = ENOMEM; goto out; } - - LOCK (&file->lock); - { - /* If O_DIRECT then, we disable caching */ - if (((flags & O_DIRECT) == O_DIRECT) - || ((flags & O_ACCMODE) == O_RDONLY) - || (((flags & O_SYNC) == O_SYNC) - && conf->enable_O_SYNC == _gf_true)) { - gf_log(this->name,GF_LOG_DEBUG, - "disabling wb on %p", fd); - file->disabled = 1; - wb_disable_all(this,fd); - } - /* TODO: decide about wbflags's fate */ - /* - else if (wbflags & GF_OPEN_NOWB) { - file->disabled = 1; - } - */ - } - UNLOCK (&file->lock); } out: + frame->local = NULL; + if (local != NULL) { + mem_put (local); + } + STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata); return 0; } @@ -1531,15 +1577,15 @@ wb_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, struct iatt *buf, struct iatt *preparent, struct iatt *postparent, dict_t *xdata) { - long flags = 0; - wb_file_t *file = NULL; - wb_conf_t *conf = NULL; + long flags = 0; + wb_inode_t *wb_inode = NULL; + wb_file_t *file = NULL; + wb_local_t *local = NULL; GF_ASSERT (frame); GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, out, op_errno, EINVAL); - conf = this->private; if (op_ret != -1) { if (frame->local) { flags = (long) frame->local; @@ -1552,19 +1598,15 @@ wb_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto out; } - LOCK (&file->lock); + LOCK (&inode->lock); { - /* If O_DIRECT then, we disable caching */ - if (frame->local) { - if (((flags & O_DIRECT) == O_DIRECT) - || ((flags & O_ACCMODE) == O_RDONLY) - || (((flags & O_SYNC) == O_SYNC) - && (conf->enable_O_SYNC == _gf_true))) { - file->window_conf = 0; - } + wb_inode = __wb_inode_create (this, inode); + if (wb_inode == NULL) { + op_ret = -1; + op_errno = ENOMEM; } } - UNLOCK (&file->lock); + UNLOCK (&inode->lock); } frame->local = NULL; @@ -1572,6 +1614,11 @@ wb_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, out: STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, preparent, postparent, xdata); + + if (local != NULL) { + mem_put (local); + } + return 0; } @@ -1580,14 +1627,23 @@ int32_t wb_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { - int32_t op_errno = EINVAL; + int32_t op_errno = EINVAL; + wb_local_t *local = NULL; GF_ASSERT (frame); GF_VALIDATE_OR_GOTO (frame->this->name, this, unwind); GF_VALIDATE_OR_GOTO (frame->this->name, fd, unwind); GF_VALIDATE_OR_GOTO (frame->this->name, loc, unwind); - frame->local = (void *)(long)flags; + local = mem_get0 (this->local_pool); + if (local == NULL) { + op_errno = ENOMEM; + goto unwind; + } + + local->flags = flags; + + frame->local = local; STACK_WIND (frame, wb_create_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create, @@ -1607,19 +1663,22 @@ unwind: * will fit into a single write call to server. */ size_t -__wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds) +__wb_mark_wind_all (wb_inode_t *wb_inode, list_head_t *list, list_head_t *winds) { - wb_request_t *request = NULL; - size_t size = 0; - char first_request = 1, overlap = 0; - wb_conf_t *conf = NULL; - int count = 0; + wb_request_t *request = NULL, *prev_request = NULL; + wb_file_t *wb_file = NULL, *prev_wb_file = NULL; + wb_file_t *last_wb_file = NULL; + size_t size = 0; + char first_request = 1, overlap = 0; + wb_conf_t *conf = NULL; + int count = 0; + enum _gf_boolean dont_wind_set = 0; - GF_VALIDATE_OR_GOTO ("write-behind", file, out); - GF_VALIDATE_OR_GOTO (file->this->name, list, out); - GF_VALIDATE_OR_GOTO (file->this->name, winds, out); + GF_VALIDATE_OR_GOTO ("write-behind", wb_inode, out); + GF_VALIDATE_OR_GOTO (wb_inode->this->name, list, out); + GF_VALIDATE_OR_GOTO (wb_inode->this->name, winds, out); - conf = file->this->private; + conf = wb_inode->this->private; list_for_each_entry (request, list, list) { @@ -1628,25 +1687,60 @@ __wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds) break; } + wb_file = wb_fd_ctx_get (wb_inode->this, + request->stub->args.writev.fd); + if (wb_file == NULL) { + gf_log (wb_inode->this->name, GF_LOG_WARNING, + "write behind wb_file pointer is" + " not stored in context of fd(%p)", + request->stub->args.writev.fd); + goto out; + } + + /* If write requests from two fds are interleaved, for + * each of them, we can only send first set of adjacent + * requests that are on same fd. This is because, fds + * with O_APPEND cannot have more than one write fop in + * progress while syncing, so that order is not messed + * up. Since we group adjacent requests with same fd into + * single write call whenever possible, we need the above said + * measure. + */ + if ((prev_wb_file != NULL) && (prev_wb_file->flags & O_APPEND) + && (prev_request->stub->args.writev.fd + != request->stub->args.writev.fd) + && (!prev_wb_file->dont_wind)) { + prev_wb_file->dont_wind = 1; + dont_wind_set = 1; + last_wb_file = prev_wb_file; + } + + prev_request = request; + prev_wb_file = wb_file; + if (!request->flags.write_request.stack_wound) { if (first_request) { first_request = 0; } else { overlap = wb_overlap (list, request); if (overlap) { - goto out; + continue; } } - if ((file->flags & O_APPEND) + if ((wb_file->flags & O_APPEND) && (((size + request->write_size) > conf->aggregate_size) || ((count + request->stub->args.writev.count) - > MAX_VECTOR_COUNT))) { - goto out; + > MAX_VECTOR_COUNT) + || (wb_file->dont_wind))) { + continue; } size += request->write_size; + + wb_inode->aggregate_current -= request->write_size; + count += request->stub->args.writev.count; request->flags.write_request.stack_wound = 1; @@ -1655,8 +1749,22 @@ __wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds) } out: - if (file != NULL) { - file->aggregate_current -= size; + if (wb_inode != NULL) { + wb_inode->aggregate_current -= size; + } + + if (dont_wind_set && (list != NULL)) { + list_for_each_entry (request, list, list) { + wb_file = wb_fd_ctx_get (wb_inode->this, + request->stub->args.writev.fd); + if (wb_file != NULL) { + wb_file->dont_wind = 0; + } + + if (wb_file == last_wb_file) { + break; + } + } } return size; @@ -1729,7 +1837,7 @@ __wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf, char incomplete_writes = 0; char overlapping_writes = 0; wb_request_t *request = NULL; - wb_file_t *file = NULL; + wb_inode_t *wb_inode = NULL; char wind_all = 0; int32_t ret = 0; @@ -1741,13 +1849,13 @@ __wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf, } request = list_entry (list->next, typeof (*request), list); - file = request->file; + wb_inode = request->wb_inode; ret = __wb_can_wind (list, &other_fop_in_queue, &overlapping_writes, &incomplete_writes, &wind_all); if (ret == -1) { - gf_log (file->this->name, GF_LOG_WARNING, + gf_log (wb_inode->this->name, GF_LOG_WARNING, "cannot decide whether to wind or not"); goto out; } @@ -1755,9 +1863,9 @@ __wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf, if (!incomplete_writes && ((enable_trickling_writes) || (wind_all) || (overlapping_writes) || (other_fop_in_queue) - || (file->aggregate_current + || (wb_inode->aggregate_current >= aggregate_conf))) { - size = __wb_mark_wind_all (file, list, winds); + size = __wb_mark_wind_all (wb_inode, list, winds); } out: @@ -1770,14 +1878,14 @@ __wb_mark_unwind_till (list_head_t *list, list_head_t *unwinds, size_t size) { size_t written_behind = 0; wb_request_t *request = NULL; - wb_file_t *file = NULL; + wb_inode_t *wb_inode = NULL; if (list_empty (list)) { goto out; } request = list_entry (list->next, typeof (*request), list); - file = request->file; + wb_inode = request->wb_inode; list_for_each_entry (request, list, list) { @@ -1793,7 +1901,7 @@ __wb_mark_unwind_till (list_head_t *list, list_head_t *unwinds, size_t size) list_add_tail (&request->unwinds, unwinds); if (!request->flags.write_request.got_reply) { - file->window_current + wb_inode->window_current += request->write_size; } } @@ -1810,8 +1918,8 @@ out: void __wb_mark_unwinds (list_head_t *list, list_head_t *unwinds) { - wb_request_t *request = NULL; - wb_file_t *file = NULL; + wb_request_t *request = NULL; + wb_inode_t *wb_inode = NULL; GF_VALIDATE_OR_GOTO ("write-behind", list, out); GF_VALIDATE_OR_GOTO ("write-behind", unwinds, out); @@ -1821,12 +1929,12 @@ __wb_mark_unwinds (list_head_t *list, list_head_t *unwinds) } request = list_entry (list->next, typeof (*request), list); - file = request->file; + wb_inode = request->wb_inode; - if (file->window_current <= file->window_conf) { + if (wb_inode->window_current <= wb_inode->window_conf) { __wb_mark_unwind_till (list, unwinds, - file->window_conf - - file->window_current); + wb_inode->window_conf + - wb_inode->window_current); } out: @@ -1892,7 +2000,7 @@ out: int32_t -wb_resume_other_requests (call_frame_t *frame, wb_file_t *file, +wb_resume_other_requests (call_frame_t *frame, wb_inode_t *wb_inode, list_head_t *other_requests) { int32_t ret = -1; @@ -1901,9 +2009,9 @@ wb_resume_other_requests (call_frame_t *frame, wb_file_t *file, char wind = 0; call_stub_t *stub = NULL; - GF_VALIDATE_OR_GOTO ((file ? file->this->name : "write-behind"), frame, - out); - GF_VALIDATE_OR_GOTO (frame->this->name, file, out); + GF_VALIDATE_OR_GOTO ((wb_inode ? wb_inode->this->name : "write-behind"), + frame, out); + GF_VALIDATE_OR_GOTO (frame->this->name, wb_inode, out); GF_VALIDATE_OR_GOTO (frame->this->name, other_requests, out); if (list_empty (other_requests)) { @@ -1916,11 +2024,11 @@ wb_resume_other_requests (call_frame_t *frame, wb_file_t *file, wind = request->stub->wind; stub = request->stub; - LOCK (&file->lock); + LOCK (&wb_inode->lock); { request->stub = NULL; } - UNLOCK (&file->lock); + UNLOCK (&wb_inode->lock); if (!wind) { wb_request_unref (request); @@ -1933,7 +2041,7 @@ wb_resume_other_requests (call_frame_t *frame, wb_file_t *file, ret = 0; if (fops_removed > 0) { - ret = wb_process_queue (frame, file); + ret = wb_process_queue (frame, wb_inode); if (ret == -1) { gf_log (frame->this->name, GF_LOG_WARNING, "request queue processing failed"); @@ -1946,26 +2054,26 @@ out: int32_t -wb_do_ops (call_frame_t *frame, wb_file_t *file, list_head_t *winds, +wb_do_ops (call_frame_t *frame, wb_inode_t *wb_inode, list_head_t *winds, list_head_t *unwinds, list_head_t *other_requests) { int32_t ret = -1, write_requests_removed = 0; - GF_VALIDATE_OR_GOTO ((file ? file->this->name : "write-behind"), + GF_VALIDATE_OR_GOTO ((wb_inode ? wb_inode->this->name : "write-behind"), frame, out); - GF_VALIDATE_OR_GOTO (frame->this->name, file, out); + GF_VALIDATE_OR_GOTO (frame->this->name, wb_inode, out); ret = wb_stack_unwind (unwinds); write_requests_removed = ret; - ret = wb_sync (frame, file, winds); + ret = wb_sync (frame, wb_inode, winds); if (ret == -1) { gf_log (frame->this->name, GF_LOG_WARNING, "syncing of write requests failed"); } - ret = wb_resume_other_requests (frame, file, other_requests); + ret = wb_resume_other_requests (frame, wb_inode, other_requests); if (ret == -1) { gf_log (frame->this->name, GF_LOG_WARNING, "cannot resume non-write requests in request queue"); @@ -1977,7 +2085,7 @@ wb_do_ops (call_frame_t *frame, wb_file_t *file, list_head_t *winds, * blocked on the writes just unwound. */ if (write_requests_removed > 0) { - ret = wb_process_queue (frame, file); + ret = wb_process_queue (frame, wb_inode); if (ret == -1) { gf_log (frame->this->name, GF_LOG_WARNING, "request queue processing failed"); @@ -1999,7 +2107,7 @@ __wb_copy_into_holder (wb_request_t *holder, wb_request_t *request) if (holder->flags.write_request.virgin) { /* TODO: check the required size */ - iobuf = iobuf_get (request->file->this->ctx->iobuf_pool); + iobuf = iobuf_get (request->wb_inode->this->ctx->iobuf_pool); if (iobuf == NULL) { goto out; } @@ -2014,7 +2122,7 @@ __wb_copy_into_holder (wb_request_t *holder, wb_request_t *request) if (ret != 0) { iobuf_unref (iobuf); iobref_unref (iobref); - gf_log (request->file->this->name, GF_LOG_WARNING, + gf_log (request->wb_inode->this->name, GF_LOG_WARNING, "cannot add iobuf (%p) into iobref (%p)", iobuf, iobref); goto out; @@ -2041,7 +2149,7 @@ __wb_copy_into_holder (wb_request_t *holder, wb_request_t *request) holder->write_size += request->write_size; request->flags.write_request.stack_wound = 1; - list_move_tail (&request->list, &request->file->passive_requests); + list_move_tail (&request->list, &request->wb_inode->passive_requests); ret = 0; out: @@ -2079,7 +2187,9 @@ __wb_collapse_write_bufs (list_head_t *requests, size_t page_size) if ((request->stub->args.writev.off != offset_expected) || (!is_same_lkowner (&request->lk_owner, - &holder->lk_owner))) { + &holder->lk_owner)) + || (holder->stub->args.writev.fd + != request->stub->args.writev.fd)) { holder = request; continue; } @@ -2107,7 +2217,7 @@ out: int32_t -wb_process_queue (call_frame_t *frame, wb_file_t *file) +wb_process_queue (call_frame_t *frame, wb_inode_t *wb_inode) { list_head_t winds = {0, }, unwinds = {0, }, other_requests = {0, }; size_t size = 0; @@ -2119,15 +2229,15 @@ wb_process_queue (call_frame_t *frame, wb_file_t *file) INIT_LIST_HEAD (&unwinds); INIT_LIST_HEAD (&other_requests); - GF_VALIDATE_OR_GOTO ((file ? file->this->name : "write-behind"), frame, - out); - GF_VALIDATE_OR_GOTO (file->this->name, frame, out); + GF_VALIDATE_OR_GOTO ((wb_inode ? wb_inode->this->name : "write-behind"), + frame, out); + GF_VALIDATE_OR_GOTO (wb_inode->this->name, frame, out); - conf = file->this->private; - GF_VALIDATE_OR_GOTO (file->this->name, conf, out); + conf = wb_inode->this->private; + GF_VALIDATE_OR_GOTO (wb_inode->this->name, conf, out); size = conf->aggregate_size; - LOCK (&file->lock); + LOCK (&wb_inode->lock); { /* * make sure requests are marked for unwinding and adjacent @@ -2135,23 +2245,23 @@ wb_process_queue (call_frame_t *frame, wb_file_t *file) * an iobuf) are packed properly so that iobufs are filled to * their maximum capacity, before calling __wb_mark_winds. */ - __wb_mark_unwinds (&file->request, &unwinds); + __wb_mark_unwinds (&wb_inode->request, &unwinds); - __wb_collapse_write_bufs (&file->request, - file->this->ctx->page_size); + __wb_collapse_write_bufs (&wb_inode->request, + wb_inode->this->ctx->page_size); - count = __wb_get_other_requests (&file->request, + count = __wb_get_other_requests (&wb_inode->request, &other_requests); if (count == 0) { - __wb_mark_winds (&file->request, &winds, size, + __wb_mark_winds (&wb_inode->request, &winds, size, conf->enable_trickling_writes); } } - UNLOCK (&file->lock); + UNLOCK (&wb_inode->lock); - ret = wb_do_ops (frame, file, &winds, &unwinds, &other_requests); + ret = wb_do_ops (frame, wb_inode, &winds, &unwinds, &other_requests); out: return ret; @@ -2176,15 +2286,15 @@ wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata) { - wb_file_t *file = NULL; + wb_inode_t *wb_inode = NULL; + wb_file_t *wb_file = NULL; char wb_disabled = 0; call_frame_t *process_frame = NULL; - size_t size = 0; - uint64_t tmp_file = 0; call_stub_t *stub = NULL; wb_local_t *local = NULL; wb_request_t *request = NULL; int32_t ret = -1; + size_t size = 0; int32_t op_ret = -1, op_errno = EINVAL; GF_ASSERT (frame); @@ -2194,49 +2304,44 @@ wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, fd, unwind, op_errno, EINVAL); - if (vector != NULL) + if (vector != NULL) { size = iov_length (vector, count); - - if ((!IA_ISDIR (fd->inode->ia_type)) - && fd_ctx_get (fd, this, &tmp_file)) { - file = wb_file_create (this, fd, 0); - } else { - file = (wb_file_t *)(long)tmp_file; - if ((!IA_ISDIR (fd->inode->ia_type)) && (file == NULL)) { - gf_log (this->name, GF_LOG_WARNING, - "wb_file not found for fd %p", fd); - op_errno = EBADFD; - goto unwind; - } } - if (file != NULL) { - LOCK (&file->lock); - { - op_ret = file->op_ret; - op_errno = file->op_errno; - - file->op_ret = 0; + wb_inode = wb_inode_ctx_get (this, fd->inode); + if ((!IA_ISDIR (fd->inode->ia_type)) && (wb_inode == NULL)) { + gf_log (this->name, GF_LOG_WARNING, + "write behind wb_inode pointer is" + " not stored in context of inode(%p), returning EBADFD", + fd->inode); + op_errno = EBADFD; + goto unwind; + } - if ((op_ret == 0) - && (file->disabled || file->disable_till)) { - if (size > file->disable_till) { - file->disable_till = 0; - } else { - file->disable_till -= size; - } - wb_disabled = 1; + if (wb_file != NULL) { + if (wb_file->disabled || wb_file->disable_till) { + if (size > wb_file->disable_till) { + wb_file->disable_till = 0; + } else { + wb_file->disable_till -= size; } + wb_disabled = 1; } - UNLOCK (&file->lock); } else { wb_disabled = 1; } + if (wb_inode != NULL) { + LOCK (&wb_inode->lock); + { + op_ret = wb_inode->op_ret; + op_errno = wb_inode->op_errno; + } + UNLOCK (&wb_inode->lock); + } + if (op_ret == -1) { - STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, NULL, - NULL, NULL); - return 0; + goto unwind; } if (wb_disabled) { @@ -2259,7 +2364,7 @@ wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, } frame->local = local; - local->file = file; + local->wb_inode = wb_inode; stub = fop_writev_stub (frame, NULL, fd, vector, count, offset, flags, iobref, xdata); @@ -2268,13 +2373,13 @@ wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, goto unwind; } - request = wb_enqueue (file, stub); + request = wb_enqueue (wb_inode, stub); if (request == NULL) { op_errno = ENOMEM; goto unwind; } - ret = wb_process_queue (process_frame, file); + ret = wb_process_queue (process_frame, wb_inode); if (ret == -1) { gf_log (this->name, GF_LOG_WARNING, "request queue processing failed"); @@ -2285,6 +2390,10 @@ wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, return 0; unwind: + local = frame->local; + frame->local = NULL; + mem_put (local); + STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, NULL); if (process_frame) { @@ -2304,21 +2413,21 @@ wb_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) { - wb_local_t *local = NULL; - wb_file_t *file = NULL; - wb_request_t *request = NULL; - int32_t ret = 0; + wb_local_t *local = NULL; + wb_inode_t *wb_inode = NULL; + wb_request_t *request = NULL; + int32_t ret = 0; GF_ASSERT (frame); local = frame->local; - file = local->file; + wb_inode = local->wb_inode; request = local->request; - if ((request != NULL) && (file != NULL)) { + if ((request != NULL) && (wb_inode != NULL)) { wb_request_unref (request); - ret = wb_process_queue (frame, file); + ret = wb_process_queue (frame, wb_inode); if (ret == -1) { if (errno == ENOMEM) { op_ret = -1; @@ -2353,12 +2462,11 @@ int32_t wb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata) { - wb_file_t *file = NULL; - wb_local_t *local = NULL; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - int32_t ret = -1, op_errno = 0; - wb_request_t *request = NULL; + wb_inode_t *wb_inode = NULL; + wb_local_t *local = NULL; + call_stub_t *stub = NULL; + int32_t ret = -1, op_errno = 0; + wb_request_t *request = NULL; GF_ASSERT (frame); GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, unwind, @@ -2366,17 +2474,14 @@ wb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, fd, unwind, op_errno, EINVAL); - if ((!IA_ISDIR (fd->inode->ia_type)) - && fd_ctx_get (fd, this, &tmp_file)) { - file = wb_file_create (this, fd, 0); - } else { - file = (wb_file_t *)(long)tmp_file; - if ((!IA_ISDIR (fd->inode->ia_type)) && (file == NULL)) { - gf_log (this->name, GF_LOG_WARNING, - "wb_file not found for fd %p", fd); - op_errno = EBADFD; - goto unwind; - } + wb_inode = wb_inode_ctx_get (this, fd->inode); + if ((!IA_ISDIR (fd->inode->ia_type)) && (wb_inode == NULL)) { + gf_log (this->name, GF_LOG_WARNING, + "write behind wb_inode pointer is" + " not stored in context of inode(%p), returning " + "EBADFD", fd->inode); + op_errno = EBADFD; + goto unwind; } local = mem_get0 (this->local_pool); @@ -2385,10 +2490,10 @@ wb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, goto unwind; } - local->file = file; + local->wb_inode = wb_inode; frame->local = local; - if (file) { + if (wb_inode) { stub = fop_readv_stub (frame, wb_readv_helper, fd, size, offset, flags, xdata); if (stub == NULL) { @@ -2396,14 +2501,14 @@ wb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, goto unwind; } - request = wb_enqueue (file, stub); + request = wb_enqueue (wb_inode, stub); if (request == NULL) { call_stub_destroy (stub); op_errno = ENOMEM; goto unwind; } - ret = wb_process_queue (frame, file); + ret = wb_process_queue (frame, wb_inode); if (ret == -1) { gf_log (this->name, GF_LOG_WARNING, "request queue processing failed"); @@ -2436,25 +2541,25 @@ int32_t wb_ffr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { - wb_local_t *local = NULL; - wb_file_t *file = NULL; + wb_local_t *local = NULL; + wb_inode_t *wb_inode = NULL; GF_ASSERT (frame); local = frame->local; - file = local->file; + wb_inode = local->wb_inode; - if (file != NULL) { - LOCK (&file->lock); + if (wb_inode != NULL) { + LOCK (&wb_inode->lock); { - if (file->op_ret == -1) { - op_ret = file->op_ret; - op_errno = file->op_errno; + if (wb_inode->op_ret == -1) { + op_ret = wb_inode->op_ret; + op_errno = wb_inode->op_errno; - file->op_ret = 0; + wb_inode->op_ret = 0; } } - UNLOCK (&file->lock); + UNLOCK (&wb_inode->lock); } STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata); @@ -2468,7 +2573,7 @@ wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { wb_conf_t *conf = NULL; wb_local_t *local = NULL; - wb_file_t *file = NULL; + wb_inode_t *wb_inode = NULL; call_frame_t *flush_frame = NULL, *process_frame = NULL; int32_t op_ret = -1, op_errno = -1, ret = -1; @@ -2479,14 +2584,14 @@ wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) conf = this->private; local = frame->local; - file = local->file; + wb_inode = local->wb_inode; - LOCK (&file->lock); + LOCK (&wb_inode->lock); { - op_ret = file->op_ret; - op_errno = file->op_errno; + op_ret = wb_inode->op_ret; + op_errno = wb_inode->op_errno; } - UNLOCK (&file->lock); + UNLOCK (&wb_inode->lock); if (local && local->request) { process_frame = copy_frame (frame); @@ -2513,7 +2618,7 @@ wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) } if (process_frame != NULL) { - ret = wb_process_queue (process_frame, file); + ret = wb_process_queue (process_frame, wb_inode); if (ret == -1) { gf_log (this->name, GF_LOG_WARNING, "request queue processing failed"); @@ -2537,14 +2642,13 @@ unwind: int32_t wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { - wb_conf_t *conf = NULL; - wb_file_t *file = NULL; - wb_local_t *local = NULL; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - call_frame_t *flush_frame = NULL; - wb_request_t *request = NULL; - int32_t ret = 0, op_errno = 0; + wb_conf_t *conf = NULL; + wb_inode_t *wb_inode = NULL; + wb_local_t *local = NULL; + call_stub_t *stub = NULL; + call_frame_t *flush_frame = NULL; + wb_request_t *request = NULL; + int32_t ret = 0, op_errno = 0; GF_ASSERT (frame); GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, unwind, @@ -2554,28 +2658,24 @@ wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) conf = this->private; - - if ((!IA_ISDIR (fd->inode->ia_type)) - && fd_ctx_get (fd, this, &tmp_file)) { - file = wb_file_create (this, fd, 0); - } else { - file = (wb_file_t *)(long)tmp_file; - if ((!IA_ISDIR (fd->inode->ia_type)) && (file == NULL)) { - gf_log (this->name, GF_LOG_WARNING, - "wb_file not found for fd %p", fd); - op_errno = EBADFD; - goto unwind; - } + wb_inode = wb_inode_ctx_get (this, fd->inode); + if ((!IA_ISDIR (fd->inode->ia_type)) && (wb_inode == NULL)) { + gf_log (this->name, GF_LOG_WARNING, + "write behind wb_inode pointer is" + " not stored in context of inode(%p), " + "returning EBADFD", fd->inode); + op_errno = EBADFD; + goto unwind; } - if (file != NULL) { + if (wb_inode != NULL) { local = mem_get0 (this->local_pool); if (local == NULL) { op_errno = ENOMEM; goto unwind; } - local->file = file; + local->wb_inode = wb_inode; frame->local = local; @@ -2585,14 +2685,14 @@ wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) goto unwind; } - request = wb_enqueue (file, stub); + request = wb_enqueue (wb_inode, stub); if (request == NULL) { call_stub_destroy (stub); op_errno = ENOMEM; goto unwind; } - ret = wb_process_queue (frame, file); + ret = wb_process_queue (frame, wb_inode); if (ret == -1) { gf_log (this->name, GF_LOG_WARNING, "request queue processing failed"); @@ -2629,32 +2729,32 @@ wb_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - wb_local_t *local = NULL; - wb_file_t *file = NULL; - wb_request_t *request = NULL; - int32_t ret = -1; + wb_local_t *local = NULL; + wb_inode_t *wb_inode = NULL; + wb_request_t *request = NULL; + int32_t ret = -1; GF_ASSERT (frame); local = frame->local; - file = local->file; + wb_inode = local->wb_inode; request = local->request; - if (file != NULL) { - LOCK (&file->lock); + if (wb_inode != NULL) { + LOCK (&wb_inode->lock); { - if (file->op_ret == -1) { - op_ret = file->op_ret; - op_errno = file->op_errno; + if (wb_inode->op_ret == -1) { + op_ret = wb_inode->op_ret; + op_errno = wb_inode->op_errno; - file->op_ret = 0; + wb_inode->op_ret = 0; } } - UNLOCK (&file->lock); + UNLOCK (&wb_inode->lock); if (request) { wb_request_unref (request); - ret = wb_process_queue (frame, file); + ret = wb_process_queue (frame, wb_inode); if (ret == -1) { if (errno == ENOMEM) { op_ret = -1; @@ -2689,12 +2789,11 @@ int32_t wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, dict_t *xdata) { - wb_file_t *file = NULL; - wb_local_t *local = NULL; - uint64_t tmp_file = 0; - call_stub_t *stub = NULL; - wb_request_t *request = NULL; - int32_t ret = -1, op_errno = 0; + wb_inode_t *wb_inode = NULL; + wb_local_t *local = NULL; + call_stub_t *stub = NULL; + wb_request_t *request = NULL; + int32_t ret = -1, op_errno = 0; GF_ASSERT (frame); GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, this, unwind, @@ -2702,18 +2801,14 @@ wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, GF_VALIDATE_OR_GOTO_WITH_ERROR (frame->this->name, fd, unwind, op_errno, EINVAL); - - if ((!IA_ISDIR (fd->inode->ia_type)) - && fd_ctx_get (fd, this, &tmp_file)) { - file = wb_file_create (this, fd, 0); - } else { - file = (wb_file_t *)(long)tmp_file; - if ((!IA_ISDIR (fd->inode->ia_type)) && (file == NULL)) { - gf_log (this->name, GF_LOG_WARNING, - "wb_file not found for fd %p", fd); - op_errno = EBADFD; - goto unwind; - } + wb_inode = wb_inode_ctx_get (this, fd->inode); + if ((!IA_ISDIR (fd->inode->ia_type))) { + gf_log (this->name, GF_LOG_WARNING, + "write behind wb_inode pointer is" + " not stored in context of inode(%p), " + "returning EBADFD", fd->inode); + op_errno = EBADFD; + goto unwind; } local = mem_get0 (this->local_pool); @@ -2722,11 +2817,9 @@ wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, goto unwind; } - local->file = file; - frame->local = local; - if (file) { + if (wb_inode) { stub = fop_fsync_stub (frame, wb_fsync_helper, fd, datasync, xdata); if (stub == NULL) { @@ -2734,21 +2827,22 @@ wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync, goto unwind; } - request = wb_enqueue (file, stub); + request = wb_enqueue (wb_inode, stub); if (request == NULL) { op_errno = ENOMEM; call_stub_destroy (stub); goto unwind; } - ret = wb_process_queue (frame, file); + ret = wb_process_queue (frame, wb_inode); if (ret == -1) { gf_log (this->name, GF_LOG_WARNING, "request queue processing failed"); } } else { STACK_WIND (frame, wb_fsync_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata); + FIRST_CHILD(this)->fops->fsync, fd, datasync, + xdata); } return 0; @@ -2760,28 +2854,77 @@ unwind: int32_t +wb_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + gf_dirent_t *entry = NULL; + + if (op_ret <= 0) { + goto unwind; + } + + list_for_each_entry (entry, &entries->list, list) { + if (!entry->inode) + continue; + wb_inode_create (this, entry->inode); + } + +unwind: + STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata); + return 0; +} + + +int32_t +wb_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, + size_t size, off_t off, dict_t *xdata) +{ + STACK_WIND (frame, wb_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata); + return 0; +} + + +int32_t wb_release (xlator_t *this, fd_t *fd) { - uint64_t file_ptr = 0; - wb_file_t *file = NULL; + uint64_t wb_file_ptr = 0; + wb_file_t *wb_file = NULL; GF_VALIDATE_OR_GOTO ("write-behind", this, out); GF_VALIDATE_OR_GOTO (this->name, fd, out); - fd_ctx_get (fd, this, &file_ptr); - file = (wb_file_t *) (long) file_ptr; + fd_ctx_del (fd, this, &wb_file_ptr); + wb_file = (wb_file_t *)(long) wb_file_ptr; + + GF_FREE (wb_file); + +out: + return 0; +} + + +int32_t +wb_forget (xlator_t *this, inode_t *inode) +{ + uint64_t tmp = 0; + wb_inode_t *wb_inode = NULL; + + inode_ctx_del (inode, this, &tmp); + + wb_inode = (wb_inode_t *)(long)tmp; - if (file != NULL) { - LOCK (&file->lock); + if (wb_inode != NULL) { + LOCK (&wb_inode->lock); { - GF_ASSERT (list_empty (&file->request)); + GF_ASSERT (list_empty (&wb_inode->request)); } - UNLOCK (&file->lock); + UNLOCK (&wb_inode->lock); - wb_file_destroy (file); + wb_inode_destroy (wb_inode); } -out: return 0; } @@ -2805,7 +2948,6 @@ wb_priv_dump (xlator_t *this) gf_proc_dump_write ("aggregate_size", "%d", conf->aggregate_size); gf_proc_dump_write ("window_size", "%d", conf->window_size); - gf_proc_dump_write ("disable_till", "%d", conf->disable_till); gf_proc_dump_write ("enable_O_SYNC", "%d", conf->enable_O_SYNC); gf_proc_dump_write ("flush_behind", "%d", conf->flush_behind); gf_proc_dump_write ("enable_trickling_writes", "%d", @@ -2866,75 +3008,62 @@ __wb_dump_requests (struct list_head *head, char *prefix, char passive) int -wb_file_dump (xlator_t *this, fd_t *fd) +wb_inode_dump (xlator_t *this, inode_t *inode) { - wb_file_t *file = NULL; - uint64_t tmp_file = 0; - int32_t ret = -1; - char *path = NULL; + wb_inode_t *wb_inode = NULL; + int32_t ret = -1; + char *path = NULL; char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; - if ((fd == NULL) || (this == NULL)) { - ret = 0; - goto out; - } - - ret = fd_ctx_get (fd, this, &tmp_file); - if (ret == -1) { + if ((inode == NULL) || (this == NULL)) { ret = 0; goto out; } - file = (wb_file_t *)(long)tmp_file; - if (file == NULL) { + wb_inode = wb_inode_ctx_get (this, inode); + if (wb_inode == NULL) { ret = 0; goto out; } gf_proc_dump_build_key (key_prefix, "xlator.performance.write-behind", - "file"); + "wb_inode"); gf_proc_dump_add_section (key_prefix); - __inode_path (fd->inode, NULL, &path); + __inode_path (inode, NULL, &path); if (path != NULL) { gf_proc_dump_write ("path", "%s", path); GF_FREE (path); } - gf_proc_dump_write ("fd", "%p", fd); - - gf_proc_dump_write ("disabled", "%d", file->disabled); - - gf_proc_dump_write ("disable_till", "%lu", file->disable_till); + gf_proc_dump_write ("inode", "%p", inode); - gf_proc_dump_write ("window_conf", "%"GF_PRI_SIZET, file->window_conf); + gf_proc_dump_write ("window_conf", "%"GF_PRI_SIZET, + wb_inode->window_conf); - gf_proc_dump_write ("window_current", "%"GF_PRI_SIZET, file->window_current); + gf_proc_dump_write ("window_current", "%"GF_PRI_SIZET, + wb_inode->window_current); - gf_proc_dump_write ("flags", "%s", (file->flags & O_APPEND) ? "O_APPEND" - : "!O_APPEND"); - - gf_proc_dump_write ("aggregate_current", "%"GF_PRI_SIZET, file->aggregate_current); - - gf_proc_dump_write ("refcount", "%d", file->refcount); + gf_proc_dump_write ("aggregate_current", "%"GF_PRI_SIZET, + wb_inode->aggregate_current); - gf_proc_dump_write ("op_ret", "%d", file->op_ret); + gf_proc_dump_write ("op_ret", "%d", wb_inode->op_ret); - gf_proc_dump_write ("op_errno", "%d", file->op_errno); + gf_proc_dump_write ("op_errno", "%d", wb_inode->op_errno); - LOCK (&file->lock); + LOCK (&wb_inode->lock); { - if (!list_empty (&file->request)) { - __wb_dump_requests (&file->request, key_prefix, 0); + if (!list_empty (&wb_inode->request)) { + __wb_dump_requests (&wb_inode->request, key_prefix, 0); } - if (!list_empty (&file->passive_requests)) { - __wb_dump_requests (&file->passive_requests, key_prefix, - 1); + if (!list_empty (&wb_inode->passive_requests)) { + __wb_dump_requests (&wb_inode->passive_requests, + key_prefix, 1); } } - UNLOCK (&file->lock); + UNLOCK (&wb_inode->lock); ret = 0; out: @@ -2942,6 +3071,48 @@ out: } +int +wb_fd_dump (xlator_t *this, fd_t *fd) +{ + wb_file_t *wb_file = NULL; + char *path = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN] = {0, }; + + if ((fd == NULL) || (this == NULL)) { + goto out; + } + + wb_file = wb_fd_ctx_get (this, fd); + if (wb_file == NULL) { + goto out; + } + + gf_proc_dump_build_key (key_prefix, "xlator.performance.write-behind", + "wb_file"); + + gf_proc_dump_add_section (key_prefix); + + __inode_path (fd->inode, NULL, &path); + if (path != NULL) { + gf_proc_dump_write ("path", "%s", path); + GF_FREE (path); + } + + gf_proc_dump_write ("fd", "%p", fd); + + gf_proc_dump_write ("flags", "%d", wb_file->flags); + + gf_proc_dump_write ("flags", "%s", + (wb_file->flags & O_APPEND) ? "O_APPEND" + : "!O_APPEND"); + + gf_proc_dump_write ("disabled", "%d", wb_file->disabled); + +out: + return 0; +} + + int32_t mem_acct_init (xlator_t *this) { @@ -2998,7 +3169,7 @@ init (xlator_t *this) if (this->parents == NULL) { gf_log (this->name, GF_LOG_WARNING, - "dangling volume. check volfile"); + "dangling volume. check volfilex"); } conf = GF_CALLOC (1, sizeof (*conf), gf_wb_mt_wb_conf_t); @@ -3011,9 +3182,6 @@ init (xlator_t *this) /* configure 'options aggregate-size <size>' */ conf->aggregate_size = WB_AGGREGATE_SIZE; - GF_OPTION_INIT("disable-for-first-nbytes", conf->disable_till, size, - out); - /* configure 'option window-size <size>' */ GF_OPTION_INIT ("cache-size", conf->window_size, size, out); @@ -3036,8 +3204,8 @@ init (xlator_t *this) /* configure 'option flush-behind <on/off>' */ GF_OPTION_INIT ("flush-behind", conf->flush_behind, bool, out); - GF_OPTION_INIT ("enable-trickling-writes", conf->enable_trickling_writes, - bool, out); + GF_OPTION_INIT ("enable-trickling-writes", + conf->enable_trickling_writes, bool, out); this->local_pool = mem_pool_new (wb_local_t, 64); if (!this->local_pool) { @@ -3079,6 +3247,7 @@ out: struct xlator_fops fops = { + .lookup = wb_lookup, .writev = wb_writev, .open = wb_open, .create = wb_create, @@ -3090,6 +3259,7 @@ struct xlator_fops fops = { .truncate = wb_truncate, .ftruncate = wb_ftruncate, .setattr = wb_setattr, + .readdirp = wb_readdirp, }; struct xlator_cbks cbks = { @@ -3098,7 +3268,8 @@ struct xlator_cbks cbks = { struct xlator_dumpops dumpops = { .priv = wb_priv_dump, - .fdctx = wb_file_dump, + .inodectx = wb_inode_dump, + .fdctx = wb_fd_dump, }; struct volume_options options[] = { @@ -3116,7 +3287,8 @@ struct volume_options options[] = { .min = 512 * GF_UNIT_KB, .max = 1 * GF_UNIT_GB, .default_value = "1MB", - .description = "Size of the per-file write-behind buffer. " + .description = "Size of the write-behind buffer for a single file " + "(inode)." }, { .key = {"disable-for-first-nbytes"}, |