1 files changed, 268 insertions, 126 deletions
diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
index ffa333ce8..95c5921c6 100644
--- a/xlators/performance/write-behind/src/write-behind.c
+++ b/xlators/performance/write-behind/src/write-behind.c
@@ -43,13 +43,6 @@ typedef struct wb_inode {
 				 used for trickling_writes
 			      */
 
-        int32_t      op_ret;   /* Last found op_ret and op_errno
-				  while completing a liability
-				  operation. Will be picked by
-				  the next arriving writev/flush/fsync
-			       */
-        int32_t      op_errno;
-
         list_head_t  all;    /* All requests, from enqueue() till destroy().
 				Used only for resetting generation
 				number when empty.
@@ -89,6 +82,12 @@ typedef struct wb_inode {
 				     write-behind from this list, and therefore
 				     get "upgraded" to the "liability" list.
 			     */
+	list_head_t  wip; /* List of write calls in progress, SYNC or non-SYNC
+			     which are currently STACK_WIND'ed towards the server.
+			     This is for guaranteeing that no two overlapping
+			     writes are in progress at the same time. Modules
+			     like eager-lock in AFR depend on this behavior.
+			  */
 	uint64_t     gen;    /* Liability generation number. Represents
 				the current 'state' of liability. Every
 				new addition to the liability list bumps
@@ -120,10 +119,11 @@ typedef struct wb_request {
 	list_head_t           lie;  /* either in @liability or @temptation */
         list_head_t           winds;
         list_head_t           unwinds;
+        list_head_t           wip;
 
         call_stub_t          *stub;
 
-        size_t                write_size;  /* currently held size
+        ssize_t               write_size;  /* currently held size
 					      (after collapsing) */
 	size_t                orig_size;   /* size which arrived with the request.
 					      This is the size by which we grow
@@ -205,6 +205,26 @@ out:
 }
 
 
+gf_boolean_t
+wb_fd_err (fd_t *fd, xlator_t *this, int32_t *op_errno)
+{
+        gf_boolean_t err   = _gf_false;
+        uint64_t     value = 0;
+        int32_t      tmp   = 0;
+
+	if (fd_ctx_get (fd, this, &value) == 0) {
+                if (op_errno) {
+                        tmp = value;
+                        *op_errno = tmp;
+                }
+
+                err = _gf_true;
+        }
+
+        return err;
+}
+
+
 /*
   Below is a succinct explanation of the code deciding whether two regions
   overlap, from Pavan <tcp@gluster.com>.
@@ -302,6 +322,30 @@ wb_liability_has_conflict (wb_inode_t *wb_inode, wb_request_t *req)
 }
 
 
+gf_boolean_t
+wb_wip_has_conflict (wb_inode_t *wb_inode, wb_request_t *req)
+{
+        wb_request_t *each     = NULL;
+
+	if (req->stub->fop != GF_FOP_WRITE)
+		/* non-writes fundamentally never conflict with WIP requests */
+		return _gf_false;
+
+        list_for_each_entry (each, &wb_inode->wip, wip) {
+		if (each == req)
+			/* request never conflicts with itself,
+			   though this condition should never occur.
+			*/
+			continue;
+
+		if (wb_requests_overlap (each, req))
+			return _gf_true;
+        }
+
+        return _gf_false;
+}
+
+
 static int
 __wb_request_unref (wb_request_t *req)
 {
@@ -320,6 +364,7 @@ __wb_request_unref (wb_request_t *req)
         if (req->refcount == 0) {
                 list_del_init (&req->todo);
                 list_del_init (&req->lie);
+		list_del_init (&req->wip);
 
 		list_del_init (&req->all);
 		if (list_empty (&wb_inode->all)) {
@@ -425,6 +470,7 @@ wb_enqueue_common (wb_inode_t *wb_inode, call_stub_t *stub, int tempted)
         INIT_LIST_HEAD (&req->lie);
         INIT_LIST_HEAD (&req->winds);
         INIT_LIST_HEAD (&req->unwinds);
+        INIT_LIST_HEAD (&req->wip);
 
         req->stub = stub;
         req->wb_inode = wb_inode;
@@ -432,8 +478,8 @@ wb_enqueue_common (wb_inode_t *wb_inode, call_stub_t *stub, int tempted)
 	req->ordering.tempted = tempted;
 
         if (stub->fop == GF_FOP_WRITE) {
-                req->write_size = iov_length (stub->args.writev.vector,
-					      stub->args.writev.count);
+                req->write_size = iov_length (stub->args.vector,
+					      stub->args.count);
 
 		/* req->write_size can change as we collapse
 		   small writes. But the window needs to grow
@@ -449,7 +495,7 @@ wb_enqueue_common (wb_inode_t *wb_inode, call_stub_t *stub, int tempted)
 		req->op_ret = req->write_size;
 		req->op_errno = 0;
 
-		if (stub->args.writev.fd->flags & O_APPEND)
+		if (stub->args.fd->flags & O_APPEND)
 			req->ordering.append = 1;
         }
 
@@ -457,28 +503,28 @@ wb_enqueue_common (wb_inode_t *wb_inode, call_stub_t *stub, int tempted)
 
 	switch (stub->fop) {
 	case GF_FOP_WRITE:
-		req->ordering.off = stub->args.writev.off;
+		req->ordering.off = stub->args.offset;
 		req->ordering.size = req->write_size;
 
-		req->fd = fd_ref (stub->args.writev.fd);
+		req->fd = fd_ref (stub->args.fd);
 
 		break;
 	case GF_FOP_READ:
-		req->ordering.off = stub->args.readv.off;
-		req->ordering.size = stub->args.readv.size;
+		req->ordering.off = stub->args.offset;
+		req->ordering.size = stub->args.size;
 
-		req->fd = fd_ref (stub->args.readv.fd);
+		req->fd = fd_ref (stub->args.fd);
 
 		break;
 	case GF_FOP_TRUNCATE:
-		req->ordering.off = stub->args.truncate.off;
+		req->ordering.off = stub->args.offset;
 		req->ordering.size = 0; /* till infinity */
 		break;
 	case GF_FOP_FTRUNCATE:
-		req->ordering.off = stub->args.ftruncate.off;
+		req->ordering.off = stub->args.offset;
 		req->ordering.size = 0; /* till infinity */
 
-		req->fd = fd_ref (stub->args.ftruncate.fd);
+		req->fd = fd_ref (stub->args.fd);
 
 		break;
 	default:
@@ -541,6 +587,7 @@ __wb_inode_create (xlator_t *this, inode_t *inode)
         INIT_LIST_HEAD (&wb_inode->todo);
         INIT_LIST_HEAD (&wb_inode->liability);
         INIT_LIST_HEAD (&wb_inode->temptation);
+        INIT_LIST_HEAD (&wb_inode->wip);
 
         wb_inode->this = this;
 
@@ -629,12 +676,25 @@ wb_head_done (wb_request_t *head)
 
 
 void
-wb_inode_err (wb_inode_t *wb_inode, int op_errno)
+wb_fulfill_err (wb_request_t *head, int op_errno)
 {
+	wb_inode_t *wb_inode;
+	wb_request_t *req;
+
+	wb_inode = head->wb_inode;
+
+	/* for all future requests yet to arrive */
+	fd_ctx_set (head->fd, THIS, op_errno);
+
 	LOCK (&wb_inode->lock);
 	{
-		wb_inode->op_ret = -1;
-		wb_inode->op_errno = op_errno;
+		/* for all requests already arrived */
+		list_for_each_entry (req, &wb_inode->all, all) {
+			if (req->fd != head->fd)
+				continue;
+			req->op_ret = -1;
+			req->op_errno = op_errno;
+		}
 	}
 	UNLOCK (&wb_inode->lock);
 }
@@ -654,7 +714,7 @@ wb_fulfill_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
         wb_inode = head->wb_inode;
 
 	if (op_ret == -1) {
-		wb_inode_err (wb_inode, op_errno);
+		wb_fulfill_err (head, op_errno);
 	} else if (op_ret < head->total_size) {
 		/*
 		 * We've encountered a short write, for whatever reason.
@@ -664,7 +724,7 @@ wb_fulfill_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
 		 * TODO: Retry the write so we can potentially capture
 		 * a real error condition (i.e., ENOSPC).
 		 */
-		wb_inode_err (wb_inode, EIO);
+		wb_fulfill_err (head, EIO);
 	}
 
 	wb_head_done (head);
@@ -678,34 +738,47 @@ wb_fulfill_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
 
 
 #define WB_IOV_LOAD(vec, cnt, req, head) do {				\
-		memcpy (&vec[cnt], req->stub->args.writev.vector,	\
-			(req->stub->args.writev.count * sizeof(vec[0]))); \
-		cnt += req->stub->args.writev.count;			\
+		memcpy (&vec[cnt], req->stub->args.vector,		\
+			(req->stub->args.count * sizeof(vec[0])));	\
+		cnt += req->stub->args.count;				\
 		head->total_size += req->write_size;			\
 	} while (0)
 
 
-void
+int
 wb_fulfill_head (wb_inode_t *wb_inode, wb_request_t *head)
 {
-	struct iovec   vector[MAX_VECTOR_COUNT];
-	int            count = 0;
-	wb_request_t  *req = NULL;
-	call_frame_t  *frame = NULL;
+	struct iovec  vector[MAX_VECTOR_COUNT];
+	int           count    = 0;
+	wb_request_t *req      = NULL;
+	call_frame_t *frame    = NULL;
+        gf_boolean_t  fderr    = _gf_false;
+        xlator_t     *this     = NULL;
 
-	frame = create_frame (wb_inode->this, wb_inode->this->ctx->pool);
-	if (!frame)
-		goto enomem;
+        this = THIS;
+
+        /* make sure head->total_size is updated before we run into any
+         * errors
+         */
 
 	WB_IOV_LOAD (vector, count, head, head);
 
 	list_for_each_entry (req, &head->winds, winds) {
 		WB_IOV_LOAD (vector, count, req, head);
 
-		iobref_merge (head->stub->args.writev.iobref,
-			      req->stub->args.writev.iobref);
+		iobref_merge (head->stub->args.iobref,
+			      req->stub->args.iobref);
 	}
 
+        if (wb_fd_err (head->fd, this, NULL)) {
+                fderr = _gf_true;
+                goto err;
+        }
+
+	frame = create_frame (wb_inode->this, wb_inode->this->ctx->pool);
+	if (!frame)
+		goto err;
+
 	frame->root->lk_owner = head->lk_owner;
 	frame->local = head;
 
@@ -718,32 +791,36 @@ wb_fulfill_head (wb_inode_t *wb_inode, wb_request_t *head)
 	STACK_WIND (frame, wb_fulfill_cbk, FIRST_CHILD (frame->this),
 		    FIRST_CHILD (frame->this)->fops->writev,
 		    head->fd, vector, count,
-		    head->stub->args.writev.off,
-		    head->stub->args.writev.flags,
-		    head->stub->args.writev.iobref, NULL);
+		    head->stub->args.offset,
+		    head->stub->args.flags,
+		    head->stub->args.iobref, NULL);
 
-	return;
-enomem:
-	wb_inode_err (wb_inode, ENOMEM);
+	return 0;
+err:
+        if (!fderr) {
+                /* frame creation failure */
+                fderr = ENOMEM;
+                wb_fulfill_err (head, fderr);
+        }
 
 	wb_head_done (head);
 
-	return;
+	return fderr;
 }
 
 
 #define NEXT_HEAD(head, req) do {					\
 		if (head)						\
-			wb_fulfill_head (wb_inode, head);		\
+			ret |= wb_fulfill_head (wb_inode, head);	\
 		head = req;						\
-		expected_offset = req->stub->args.writev.off +		\
+		expected_offset = req->stub->args.offset +		\
 			req->write_size;				\
 		curr_aggregate = 0;					\
 		vector_count = 0;					\
 	} while (0)
 
 
-void
+int
 wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities)
 {
 	wb_request_t  *req     = NULL;
@@ -753,6 +830,7 @@ wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities)
 	off_t          expected_offset = 0;
 	size_t         curr_aggregate = 0;
 	size_t         vector_count = 0;
+        int            ret          = 0;
 
 	conf = wb_inode->this->private;
 
@@ -774,7 +852,7 @@ wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities)
 			continue;
 		}
 
-		if (expected_offset != req->stub->args.writev.off) {
+		if (expected_offset != req->stub->args.offset) {
 			NEXT_HEAD (head, req);
 			continue;
 		}
@@ -784,7 +862,7 @@ wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities)
 			continue;
 		}
 
-		if (vector_count + req->stub->args.writev.count >
+		if (vector_count + req->stub->args.count >
 		    MAX_VECTOR_COUNT) {
 			NEXT_HEAD (head, req);
 			continue;
@@ -792,12 +870,13 @@ wb_fulfill (wb_inode_t *wb_inode, list_head_t *liabilities)
 
 		list_add_tail (&req->winds, &head->winds);
 		curr_aggregate += req->write_size;
-		vector_count += req->stub->args.writev.count;
+		vector_count += req->stub->args.count;
 	}
 
 	if (head)
-		wb_fulfill_head (wb_inode, head);
-	return;
+		ret |= wb_fulfill_head (wb_inode, head);
+
+	return ret;
 }
 
 
@@ -861,10 +940,20 @@ __wb_collapse_small_writes (wb_request_t *holder, wb_request_t *req)
         struct iobuf  *iobuf  = NULL;
         struct iobref *iobref = NULL;
         int            ret    = -1;
+        ssize_t        required_size = 0;
+        size_t         holder_len = 0;
+        size_t         req_len = 0;
 
         if (!holder->iobref) {
-                /* TODO: check the required size */
-                iobuf = iobuf_get (req->wb_inode->this->ctx->iobuf_pool);
+                holder_len = iov_length (holder->stub->args.vector,
+                                         holder->stub->args.count);
+                req_len = iov_length (req->stub->args.vector,
+                                      req->stub->args.count);
+
+                required_size = max ((THIS->ctx->page_size),
+                                     (holder_len + req_len));
+                iobuf = iobuf_get2 (req->wb_inode->this->ctx->iobuf_pool,
+                                    required_size);
                 if (iobuf == NULL) {
                         goto out;
                 }
@@ -885,25 +974,25 @@ __wb_collapse_small_writes (wb_request_t *holder, wb_request_t *req)
                         goto out;
                 }
 
-                iov_unload (iobuf->ptr, holder->stub->args.writev.vector,
-                            holder->stub->args.writev.count);
-                holder->stub->args.writev.vector[0].iov_base = iobuf->ptr;
-		holder->stub->args.writev.count = 1;
+                iov_unload (iobuf->ptr, holder->stub->args.vector,
+                            holder->stub->args.count);
+                holder->stub->args.vector[0].iov_base = iobuf->ptr;
+		holder->stub->args.count = 1;
 
-                iobref_unref (holder->stub->args.writev.iobref);
-                holder->stub->args.writev.iobref = iobref;
+                iobref_unref (holder->stub->args.iobref);
+                holder->stub->args.iobref = iobref;
 
                 iobuf_unref (iobuf);
 
                 holder->iobref = iobref_ref (iobref);
         }
 
-        ptr = holder->stub->args.writev.vector[0].iov_base + holder->write_size;
+        ptr = holder->stub->args.vector[0].iov_base + holder->write_size;
 
-        iov_unload (ptr, req->stub->args.writev.vector,
-                    req->stub->args.writev.count);
+        iov_unload (ptr, req->stub->args.vector,
+                    req->stub->args.count);
 
-        holder->stub->args.writev.vector[0].iov_len += req->write_size;
+        holder->stub->args.vector[0].iov_len += req->write_size;
         holder->write_size += req->write_size;
         holder->ordering.size += req->write_size;
 
@@ -917,13 +1006,13 @@ void
 __wb_preprocess_winds (wb_inode_t *wb_inode)
 {
         off_t         offset_expected = 0;
-        size_t        space_left      = 0;
+        ssize_t       space_left      = 0;
 	wb_request_t *req             = NULL;
 	wb_request_t *tmp             = NULL;
 	wb_request_t *holder          = NULL;
 	wb_conf_t    *conf            = NULL;
         int           ret             = 0;
-	size_t        page_size       = 0;
+	ssize_t       page_size       = 0;
 
 	/* With asynchronous IO from a VM guest (as a file), there
 	   can be two sequential writes happening in two regions
@@ -953,10 +1042,10 @@ __wb_preprocess_winds (wb_inode_t *wb_inode)
 			continue;
 		}
 
-		offset_expected = holder->stub->args.writev.off
+		offset_expected = holder->stub->args.offset
 			+ holder->write_size;
 
-		if (req->stub->args.writev.off != offset_expected) {
+		if (req->stub->args.offset != offset_expected) {
 			holder->ordering.go = 1;
 			holder = req;
 			continue;
@@ -968,6 +1057,12 @@ __wb_preprocess_winds (wb_inode_t *wb_inode)
 			continue;
 		}
 
+                if (req->fd != holder->fd) {
+                        holder->ordering.go = 1;
+                        holder = req;
+                        continue;
+                }
+
 		space_left = page_size - holder->write_size;
 
 		if (space_left < req->write_size) {
@@ -1022,6 +1117,18 @@ __wb_pick_winds (wb_inode_t *wb_inode, list_head_t *tasks,
 			/* wait some more */
 			continue;
 
+		if (req->stub->fop == GF_FOP_WRITE) {
+			if (wb_wip_has_conflict (wb_inode, req))
+				continue;
+
+			list_add_tail (&req->wip, &wb_inode->wip);
+
+			if (!req->ordering.tempted)
+				/* unrefed in wb_writev_cbk */
+				req->stub->frame->local =
+					__wb_request_ref (req);
+		}
+
 		list_del_init (&req->todo);
 
 		if (req->ordering.tempted)
@@ -1054,38 +1161,69 @@ wb_process_queue (wb_inode_t *wb_inode)
         list_head_t tasks  = {0, };
 	list_head_t lies = {0, };
 	list_head_t liabilities = {0, };
+        int         retry       = 0;
 
         INIT_LIST_HEAD (&tasks);
         INIT_LIST_HEAD (&lies);
         INIT_LIST_HEAD (&liabilities);
 
-        LOCK (&wb_inode->lock);
-        {
-		__wb_preprocess_winds (wb_inode);
+        do {
+                LOCK (&wb_inode->lock);
+                {
+                        __wb_preprocess_winds (wb_inode);
 
-		__wb_pick_winds (wb_inode, &tasks, &liabilities);
+                        __wb_pick_winds (wb_inode, &tasks, &liabilities);
 
-                __wb_pick_unwinds (wb_inode, &lies);
+                        __wb_pick_unwinds (wb_inode, &lies);
 
-        }
-        UNLOCK (&wb_inode->lock);
+                }
+                UNLOCK (&wb_inode->lock);
 
-	wb_do_unwinds (wb_inode, &lies);
+                wb_do_unwinds (wb_inode, &lies);
 
-	wb_do_winds (wb_inode, &tasks);
+                wb_do_winds (wb_inode, &tasks);
 
-	wb_fulfill (wb_inode, &liabilities);
+                /* fd might've been marked bad due to previous errors.
+                 * Since, caller of wb_process_queue might be the last fop on
+                 * inode, make sure we keep processing request queue, till there
+                 * are no requests left.
+                 */
+                retry = wb_fulfill (wb_inode, &liabilities);
+        } while (retry);
 
         return;
 }
 
 
 int
+wb_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+	       int32_t op_ret, int32_t op_errno,
+	       struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+	wb_request_t *req = NULL;
+	wb_inode_t *wb_inode;
+
+	req = frame->local;
+	frame->local = NULL;
+	wb_inode = req->wb_inode;
+
+	wb_request_unref (req);
+
+	/* requests could be pending while this was in progress */
+	wb_process_queue(wb_inode);
+
+	STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
+			     xdata);
+	return 0;
+}
+
+
+int
 wb_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
 		  struct iovec *vector, int32_t count, off_t offset,
 		  uint32_t flags, struct iobref *iobref, dict_t *xdata)
 {
-	STACK_WIND (frame, default_writev_cbk,
+	STACK_WIND (frame, wb_writev_cbk,
 		    FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
 		    fd, vector, count, offset, flags, iobref, xdata);
 	return 0;
@@ -1102,10 +1240,15 @@ wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
         gf_boolean_t  wb_disabled   = 0;
         call_stub_t  *stub          = NULL;
         int           ret           = -1;
-	int           op_errno      = EINVAL;
+        int32_t       op_errno      = EINVAL;
 	int           o_direct      = O_DIRECT;
 
 	conf = this->private;
+
+        if (wb_fd_err (fd, this, &op_errno)) {
+                goto unwind;
+        }
+
         wb_inode = wb_inode_create (this, fd->inode);
 	if (!wb_inode) {
 		op_errno = ENOMEM;
@@ -1118,24 +1261,9 @@ wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
 	if (fd->flags & (O_SYNC|O_DSYNC|o_direct))
 		wb_disabled = 1;
 
-	if (flags & (O_SYNC|O_DSYNC|O_DIRECT))
-		/* O_DIRECT flag in params of writev must _always_ be honored */
+	if (flags & (O_SYNC|O_DSYNC|o_direct))
 		wb_disabled = 1;
 
-	op_errno = 0;
-	LOCK (&wb_inode->lock);
-	{
-		/* pick up a previous error in fulfillment */
-		if (wb_inode->op_ret < 0)
-			op_errno = wb_inode->op_errno;
-
-		wb_inode->op_ret = 0;
-	}
-	UNLOCK (&wb_inode->lock);
-
-        if (op_errno)
-                goto unwind;
-
 	if (wb_disabled)
 		stub = fop_writev_stub (frame, wb_writev_helper, fd, vector,
 					count, offset, flags, iobref, xdata);
@@ -1233,7 +1361,7 @@ wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
         wb_conf_t    *conf        = NULL;
         wb_inode_t   *wb_inode    = NULL;
         call_frame_t *bg_frame    = NULL;
-	int           op_errno    = 0;
+	int32_t       op_errno    = 0;
 	int           op_ret      = 0;
 
         conf = this->private;
@@ -1245,19 +1373,10 @@ wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
 		goto unwind;
 	}
 
-        LOCK (&wb_inode->lock);
-        {
-		if (wb_inode->op_ret < 0) {
-			op_ret = -1;
-			op_errno = wb_inode->op_errno;
-		}
-
-                wb_inode->op_ret = 0;
-        }
-        UNLOCK (&wb_inode->lock);
-
-	if (op_errno)
+	if (wb_fd_err (fd, this, &op_errno)) {
+		op_ret = -1;
 		goto unwind;
+	}
 
         if (conf->flush_behind)
 		goto flushbehind;
@@ -1301,7 +1420,7 @@ wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
 	if (!wb_enqueue (wb_inode, stub))
 		goto unwind;
 
-	wb_process_queue (wb_inode);
+        wb_process_queue (wb_inode);
 
         return 0;
 
@@ -1334,6 +1453,10 @@ wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
 {
         wb_inode_t   *wb_inode     = NULL;
         call_stub_t  *stub         = NULL;
+	int32_t       op_errno     = EINVAL;
+
+	if (wb_fd_err (fd, this, &op_errno))
+		goto unwind;
 
         wb_inode = wb_inode_ctx_get (this, fd->inode);
 	if (!wb_inode)
@@ -1351,7 +1474,7 @@ wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
         return 0;
 
 unwind:
-        STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
+        STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, NULL);
 
         return 0;
 
@@ -1511,25 +1634,35 @@ wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
 {
         wb_inode_t   *wb_inode     = NULL;
         call_stub_t  *stub         = NULL;
+        int32_t       op_errno     = 0;
 
         wb_inode = wb_inode_create (this, fd->inode);
-	if (!wb_inode)
+	if (!wb_inode) {
+                op_errno = ENOMEM;
+		goto unwind;
+        }
+
+	if (wb_fd_err (fd, this, &op_errno))
 		goto unwind;
 
 	stub = fop_ftruncate_stub (frame, wb_ftruncate_helper, fd,
 				   offset, xdata);
-	if (!stub)
+	if (!stub) {
+                op_errno = ENOMEM;
 		goto unwind;
+        }
 
-	if (!wb_enqueue (wb_inode, stub))
+	if (!wb_enqueue (wb_inode, stub)) {
+                op_errno = ENOMEM;
 		goto unwind;
+        }
 
 	wb_process_queue (wb_inode);
 
         return 0;
 
 unwind:
-        STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+        STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
 
         if (stub)
                 call_stub_destroy (stub);
@@ -1642,13 +1775,22 @@ wb_forget (xlator_t *this, inode_t *inode)
 	if (!wb_inode)
 		return 0;
 
-	LOCK (&wb_inode->lock);
-	{
-		GF_ASSERT (list_empty (&wb_inode->todo));
-		GF_ASSERT (list_empty (&wb_inode->liability));
-		GF_ASSERT (list_empty (&wb_inode->temptation));
-	}
-	UNLOCK (&wb_inode->lock);
+        GF_ASSERT (list_empty (&wb_inode->todo));
+        GF_ASSERT (list_empty (&wb_inode->liability));
+        GF_ASSERT (list_empty (&wb_inode->temptation));
+
+        GF_FREE (wb_inode);
+
+        return 0;
+}
+
+
+int
+wb_release (xlator_t *this, fd_t *fd)
+{
+        uint64_t    tmp      = 0;
+
+        fd_ctx_del (fd, this, &tmp);
 
         return 0;
 }
@@ -1709,7 +1851,7 @@ __wb_dump_requests (struct list_head *head, char *prefix)
                                             req->write_size);
 
                         gf_proc_dump_write ("offset", "%"PRId64,
-                                            req->stub->args.writev.off);
+                                            req->stub->args.offset);
 
                         flag = req->ordering.lied;
                         gf_proc_dump_write ("lied", "%d", flag);
@@ -1766,9 +1908,6 @@ wb_inode_dump (xlator_t *this, inode_t *inode)
         gf_proc_dump_write ("window_current", "%"GF_PRI_SIZET,
                             wb_inode->window_current);
 
-        gf_proc_dump_write ("op_ret", "%d", wb_inode->op_ret);
-
-        gf_proc_dump_write ("op_errno", "%d", wb_inode->op_errno);
 
         ret = TRY_LOCK (&wb_inode->lock);
         if (!ret)
@@ -1941,6 +2080,7 @@ struct xlator_fops fops = {
 
 struct xlator_cbks cbks = {
         .forget   = wb_forget,
+        .release  = wb_release
 };
 
 
@@ -1975,6 +2115,8 @@ struct volume_options options[] = {
         { .key = {"strict-O_DIRECT"},
           .type = GF_OPTION_TYPE_BOOL,
           .default_value = "off",
+          .description = "This option when set to off, ignores the "
+          "O_DIRECT flag."
         },
         { .key = {"strict-write-ordering"},
           .type = GF_OPTION_TYPE_BOOL,