performance/write-behind: retry "failed syncs to backend"

1. When sync fails, the cached-write is still preserved unless there is a flush/fsync waiting on it. 2. When a sync fails and there is a flush/fsync waiting on the cached-write, the cache is thrown away and no further retries will be made. In other words flush/fsync act as barriers for all the previous writes. The behaviour of fsync acting as a barrier is controlled by an option (see below for details). All previous writes are either successfully synced to backend or forgotten in case of an error. Without such barrier fop (especially flush which is issued prior to a close), we end up retrying for ever even after fd is closed. 3. If a fop is waiting on cached-write and syncing to backend fails, the waiting fop is failed. 4. sync failures when no fop is waiting are ignored and are not propagated to application. For eg., a. first attempt of sync of a cached-write w1 fails b. second attempt of sync of w1 succeeds If there are no fops dependent on w1 are issued b/w a and b, application won't know about failure encountered in a. 5. The effect of repeated sync failures is that, there will be no cache for future writes and they cannot be written behind. fsync as a barrier and resync of cached writes post fsync failure: ================================================================== Whether to keep retrying failed syncs post fsync is controlled by an option "resync-failed-syncs-after-fsync". By default, this option is set to "off". If sync of "cached-writes issued before fsync" (to backend) fails, this option configures whether to retry syncing them after fsync or forget them. If set to on, cached-writes are retried till a "flush" fop (or a successful sync) on sync failures. fsync itself is failed irrespective of the value of this option, when there is a sync failure of any cached-writes issued before fsync. Change-Id: I6097c9257bfb9ee5b15616fbe6a0576ae9af369a Signed-off-by: Raghavendra G <rgowdapp@redhat.com> BUG: 1279730 Reviewed-on: http://review.gluster.org/12594
author: Raghavendra G <rgowdapp@redhat.com> 2015-11-17 12:57:54 +0530
committer: Raghavendra G <rgowdapp@redhat.com> 2015-12-22 01:55:57 -0800
commit: 3fcead2de7bcdb4e1312f37e7e750abd8d9d9770 (patch)
tree: 6df073e7faf75ed55e997692201494fed547ce84 /xlators/performance
parent: d73f5c9eae0f5c8796598fa1930844c15c2c37d4 (diff)
1 files changed, 376 insertions, 104 deletions
diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
index 1adda4eaff4..285420526f4 100644
--- a/xlators/performance/write-behind/src/write-behind.c
+++ b/xlators/performance/write-behind/src/write-behind.c
@@ -107,6 +107,14 @@ typedef struct wb_inode {
 	size_t       size; /* Size of the file to catch write after EOF. */
         gf_lock_t    lock;
         xlator_t    *this;
+        int          dontsync; /* If positive, dont pick lies for
+                                * winding. This is needed to break infinite
+                                * recursion during invocation of
+                                * wb_process_queue from
+                                * wb_fulfill_cbk in case of an
+                                * error during fulfill.
+                                */
+
 } wb_inode_t;
 
 
@@ -144,6 +152,8 @@ typedef struct wb_request {
 				       request arrival */
 
 	fd_t                 *fd;
+        int                   wind_count;    /* number of sync-attempts. Only
+                                                for debug purposes */
 	struct {
 		size_t        size;          /* 0 size == till infinity */
 		off_t         off;
@@ -164,6 +174,7 @@ typedef struct wb_conf {
         gf_boolean_t     trickling_writes;
 	gf_boolean_t     strict_write_ordering;
 	gf_boolean_t     strict_O_DIRECT;
+        gf_boolean_t     resync_after_fsync;
 } wb_conf_t;
 
 
@@ -202,26 +213,6 @@ out:
 }
 
 
-gf_boolean_t
-wb_fd_err (fd_t *fd, xlator_t *this, int32_t *op_errno)
-{
-        gf_boolean_t err   = _gf_false;
-        uint64_t     value = 0;
-        int32_t      tmp   = 0;
-
-	if (fd_ctx_get (fd, this, &value) == 0) {
-                if (op_errno) {
-                        tmp = value;
-                        *op_errno = tmp;
-                }
-
-                err = _gf_true;
-        }
-
-        return err;
-}
-
-
 /*
   Below is a succinct explanation of the code deciding whether two regions
   overlap, from Pavan <tcp@gluster.com>.
@@ -305,17 +296,17 @@ wb_requests_conflict (wb_request_t *lie, wb_request_t *req)
 }
 
 
-gf_boolean_t
+wb_request_t *
 wb_liability_has_conflict (wb_inode_t *wb_inode, wb_request_t *req)
 {
         wb_request_t *each     = NULL;
 
         list_for_each_entry (each, &wb_inode->liability, lie) {
 		if (wb_requests_conflict (each, req))
-			return _gf_true;
+			return each;
         }
 
-        return _gf_false;
+        return NULL;
 }
 
 
@@ -552,6 +543,9 @@ wb_enqueue_common (wb_inode_t *wb_inode, call_stub_t *stub, int tempted)
 
 		break;
 	default:
+                if (stub && stub->args.fd)
+                        req->fd = fd_ref (stub->args.fd);
+
 		break;
 	}
 
@@ -679,6 +673,88 @@ __wb_fulfill_request (wb_request_t *req)
 }
 
 
+/* get a flush/fsync waiting on req */
+wb_request_t *
+__wb_request_waiting_on (wb_request_t *req)
+{
+        wb_inode_t   *wb_inode = NULL;
+        wb_request_t *trav     = NULL;
+
+        wb_inode = req->wb_inode;
+
+        list_for_each_entry (trav, &wb_inode->todo, todo) {
+                if ((trav->fd == req->fd)
+                    && ((trav->stub->fop == GF_FOP_FLUSH)
+                        || (trav->stub->fop == GF_FOP_FSYNC))
+                    && (trav->gen >= req->gen))
+                        return trav;
+        }
+
+        return NULL;
+}
+
+
+void
+__wb_fulfill_request_err (wb_request_t *req, int32_t op_errno)
+{
+	wb_inode_t   *wb_inode = NULL;
+        wb_request_t *waiter   = NULL;
+        wb_conf_t    *conf     = NULL;
+
+        wb_inode = req->wb_inode;
+
+        conf = wb_inode->this->private;
+
+        req->op_ret = -1;
+        req->op_errno = op_errno;
+
+        if (req->ordering.lied)
+                waiter = __wb_request_waiting_on (req);
+
+        if (!req->ordering.lied || waiter) {
+                if (!req->ordering.lied) {
+                        /* response to app is still pending, send failure in
+                         * response.
+                         */
+                } else {
+                        /* response was sent, store the error in a
+                         * waiter (either an fsync or flush).
+                         */
+                        waiter->op_ret = -1;
+                        waiter->op_errno = op_errno;
+                }
+
+                if (!req->ordering.lied
+                    || (waiter->stub->fop == GF_FOP_FLUSH)
+                    || ((waiter->stub->fop == GF_FOP_FSYNC)
+                        && !conf->resync_after_fsync)) {
+                        /* No retry needed, forget the request */
+                        __wb_fulfill_request (req);
+                        return;
+                }
+        }
+
+        /* response was unwound and no waiter waiting on this request, retry
+           till a flush or fsync (subject to conf->resync_after_fsync).
+        */
+	wb_inode->transit -= req->total_size;
+
+        req->total_size = 0;
+
+        list_del_init (&req->winds);
+        list_del_init (&req->todo);
+        list_del_init (&req->wip);
+
+        /* sanitize ordering flags to retry */
+        req->ordering.go = 0;
+
+        /* Add back to todo list to retry */
+        list_add (&req->todo, &wb_inode->todo);
+
+        return;
+}
+
+
 void
 wb_head_done (wb_request_t *head)
 {
@@ -693,6 +769,7 @@ wb_head_done (wb_request_t *head)
 		list_for_each_entry_safe (req, tmp, &head->winds, winds) {
 			__wb_fulfill_request (req);
 		}
+
 		__wb_fulfill_request (head);
 	}
 	UNLOCK (&wb_inode->lock);
@@ -700,29 +777,130 @@ wb_head_done (wb_request_t *head)
 
 
 void
+__wb_fulfill_err (wb_request_t *head, int op_errno)
+{
+	wb_request_t *req      = NULL, *tmp = NULL;
+
+        if (!head)
+                goto out;
+
+        head->wb_inode->dontsync++;
+
+        list_for_each_entry_safe_reverse (req, tmp, &head->winds,
+                                          winds) {
+                __wb_fulfill_request_err (req, op_errno);
+        }
+
+        __wb_fulfill_request_err (head, op_errno);
+
+out:
+        return;
+}
+
+
+void
 wb_fulfill_err (wb_request_t *head, int op_errno)
 {
-	wb_inode_t *wb_inode;
-	wb_request_t *req;
+	wb_inode_t   *wb_inode = NULL;
 
 	wb_inode = head->wb_inode;
 
-	/* for all future requests yet to arrive */
-	fd_ctx_set (head->fd, THIS, op_errno);
-
 	LOCK (&wb_inode->lock);
 	{
-		/* for all requests already arrived */
-		list_for_each_entry (req, &wb_inode->all, all) {
-			if (req->fd != head->fd)
-				continue;
-			req->op_ret = -1;
-			req->op_errno = op_errno;
-		}
+                __wb_fulfill_err (head, op_errno);
+
 	}
 	UNLOCK (&wb_inode->lock);
 }
 
+inline void
+__wb_modify_write_request (wb_request_t *req, int synced_size,
+                           int head_total_size)
+{
+        struct iovec *vector = NULL;
+        int           count  = 0;
+
+        if (!req || synced_size == 0)
+                goto out;
+
+        req->write_size -= synced_size;
+        req->stub->args.offset += synced_size;
+        req->total_size = head_total_size;
+
+        vector = req->stub->args.vector;
+        count = req->stub->args.count;
+
+        req->stub->args.count = iov_subset (vector, count, synced_size,
+                                            iov_length (vector, count), vector);
+
+out:
+        return;
+}
+
+int
+__wb_fulfill_short_write (wb_request_t *req, int size, int total_size)
+{
+        int accounted_size = 0;
+
+        if (req == NULL)
+                goto out;
+
+        if (req->write_size <= size) {
+                accounted_size = req->write_size;
+                __wb_fulfill_request (req);
+        } else {
+                accounted_size = size;
+                __wb_modify_write_request (req, size, total_size);
+        }
+
+out:
+        return accounted_size;
+}
+
+void
+wb_fulfill_short_write (wb_request_t *head, int size)
+{
+        wb_inode_t   *wb_inode   = NULL;
+        wb_request_t *req        = NULL, *tmp = NULL;
+        int           total_size = 0, accounted_size = 0;
+
+        if (!head)
+                goto out;
+
+        wb_inode = head->wb_inode;
+
+        total_size = head->total_size - size;
+        head->total_size = size;
+
+        req = head;
+
+        LOCK (&wb_inode->lock);
+        {
+                accounted_size = __wb_fulfill_short_write (head, size,
+                                                           total_size);
+
+                size -= accounted_size;
+
+                if (size == 0)
+                        goto done;
+
+                list_for_each_entry_safe (req, tmp, &head->winds, winds) {
+                        accounted_size = __wb_fulfill_short_write (req, size,
+                                                                   total_size);
+                        size -= accounted_size;
+
+                        if (size == 0)
+                                break;
+
+                }
+        }
+done:
+        UNLOCK (&wb_inode->lock);
+
+        wb_fulfill_err (req, EIO);
+out:
+        return;
+}
 
 int
 wb_fulfill_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
@@ -740,18 +918,10 @@ wb_fulfill_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
 	if (op_ret == -1) {
 		wb_fulfill_err (head, op_errno);
 	} else if (op_ret < head->total_size) {
-		/*
-		 * We've encountered a short write, for whatever reason.
-		 * Set an EIO error for the next fop. This should be
-		 * valid for writev or flush (close).
-		 *
-		 * TODO: Retry the write so we can potentially capture
-		 * a real error condition (i.e., ENOSPC).
-		 */
-		wb_fulfill_err (head, EIO);
-	}
-
-	wb_head_done (head);
+		wb_fulfill_short_write (head, op_ret);
+	} else {
+                wb_head_done (head);
+        }
 
         wb_process_queue (wb_inode);
 
@@ -776,10 +946,6 @@ wb_fulfill_head (wb_inode_t *wb_inode, wb_request_t *head)
 	int           count    = 0;
 	wb_request_t *req      = NULL;
 	call_frame_t *frame    = NULL;
-        gf_boolean_t  fderr    = _gf_false;
-        xlator_t     *this     = NULL;
-
-        this = THIS;
 
         /* make sure head->total_size is updated before we run into any
          * errors
@@ -795,11 +961,6 @@ wb_fulfill_head (wb_inode_t *wb_inode, wb_request_t *head)
 			goto err;
 	}
 
-        if (wb_fd_err (head->fd, this, NULL)) {
-                fderr = _gf_true;
-                goto err;
-        }
-
 	frame = create_frame (wb_inode->this, wb_inode->this->ctx->pool);
 	if (!frame)
 		goto err;
@@ -822,26 +983,21 @@ wb_fulfill_head (wb_inode_t *wb_inode, wb_request_t *head)
 
 	return 0;
 err:
-        if (!fderr) {
-                /* frame creation failure */
-                fderr = ENOMEM;
-                wb_fulfill_err (head, fderr);
-        }
-
-	wb_head_done (head);
+        /* frame creation failure */
+        wb_fulfill_err (head, ENOMEM);
 
-	return fderr;
+	return ENOMEM;
 }
 
 
-#define NEXT_HEAD(head, req) do {					\
-		if (head)						\
-			ret |= wb_fulfill_head (wb_inode, head);	\
-		head = req;						\
-		expected_offset = req->stub->args.offset +		\
-			req->write_size;				\
-		curr_aggregate = 0;					\
-		vector_count = 0;					\
+#define NEXT_HEAD(head, req) do {                                       \
+		if (head)                                               \
+			ret |= wb_fulfill_head (wb_inode, head);        \
+		head = req;                                             \
+		expected_offset = req->stub->args.offset +              \
+			req->write_size;                                \
+		curr_aggregate = 0;                                     \
+		vector_count = 0;                                       \
 	} while (0)
 
 
@@ -1053,6 +1209,17 @@ __wb_preprocess_winds (wb_inode_t *wb_inode)
 	conf = wb_inode->this->private;
 
         list_for_each_entry_safe (req, tmp, &wb_inode->todo, todo) {
+                if (wb_inode->dontsync && req->ordering.lied) {
+                        /* sync has failed. Don't pick lies _again_ for winding
+                         * as winding these lies again will trigger an infinite
+                         * recursion of wb_process_queue being called from a
+                         * failed fulfill. However, pick non-lied requests for
+                         * winding so that application wont block indefinitely
+                         * waiting for write result.
+                         */
+                        continue;
+                }
+
 		if (!req->ordering.tempted) {
 			if (holder) {
 				if (wb_requests_conflict (holder, req))
@@ -1124,20 +1291,96 @@ __wb_preprocess_winds (wb_inode_t *wb_inode)
 	if (conf->trickling_writes && !wb_inode->transit && holder)
 		holder->ordering.go = 1;
 
+        if (wb_inode->dontsync > 0)
+                wb_inode->dontsync--;
+
         return;
 }
 
+int
+__wb_handle_failed_conflict (wb_request_t *req, wb_request_t *conflict,
+                             list_head_t *tasks)
+{
+        wb_conf_t *conf = NULL;
+
+        conf = req->wb_inode->this->private;
+
+        if ((req->stub->fop != GF_FOP_FLUSH)
+            && ((req->stub->fop != GF_FOP_FSYNC) || conf->resync_after_fsync)) {
+                if (!req->ordering.lied && list_empty (&conflict->wip)) {
+                        /* If request itself is in liability queue,
+                         * 1. We cannot unwind as the response has already been
+                         *    sent.
+                         * 2. We cannot wind till conflict clears up.
+                         * 3. So, skip the request for now.
+                         * 4. Otherwise, resume (unwind) it with error.
+                         */
+                        req->op_ret = -1;
+                        req->op_errno = conflict->op_errno;
+
+                        list_del_init (&req->todo);
+                        list_add_tail (&req->winds, tasks);
+                }
+        } else {
+                /* flush and fsync (without conf->resync_after_fsync) act as
+                   barriers. We cannot unwind them out of
+                   order, when there are earlier generation writes just because
+                   there is a conflicting liability with an error. So, wait for
+                   our turn till there are no conflicting liabilities.
+
+                   This situation can arise when there liabilities spread across
+                   multiple generations. For eg., consider two writes with
+                   following characterstics:
+
+                   1. they belong to different generations gen1, gen2 and
+                      (gen1 > gen2).
+                   2. they overlap.
+                   3. both are liabilities.
+                   4. gen1 write was attempted to sync, but the attempt failed.
+                   5. there was no attempt to sync gen2 write yet.
+                   6. A flush (as part of close) is issued and gets a gen no
+                      gen3.
+
+                   In the above scenario, if flush is unwound without waiting
+                   for gen1 and gen2 writes either to be successfully synced or
+                   purged, we end up with these two writes in wb_inode->todo
+                   list forever as there will be no attempt to process the queue
+                   as flush is the last operation.
+                */
+        }
+
+        return 0;
+}
+
 
-void
+int
 __wb_pick_winds (wb_inode_t *wb_inode, list_head_t *tasks,
 		 list_head_t *liabilities)
 {
-	wb_request_t *req = NULL;
-	wb_request_t *tmp = NULL;
+	wb_request_t *req                 = NULL;
+	wb_request_t *tmp                 = NULL;
+        wb_request_t *conflict            = NULL;
 
 	list_for_each_entry_safe (req, tmp, &wb_inode->todo, todo) {
-		if (wb_liability_has_conflict (wb_inode, req))
-			continue;
+                conflict = wb_liability_has_conflict (wb_inode, req);
+                if (conflict) {
+                        if (conflict->op_ret == -1) {
+                                /* There is a conflicting liability which failed
+                                 * to sync in previous attempts, resume the req
+                                 * and fail, unless its an fsync/flush.
+                                 */
+
+                                __wb_handle_failed_conflict (req, conflict,
+                                                             tasks);
+                        } else {
+                                /* There is a conflicting liability which was
+                                 * not attempted to sync even once. Wait till
+                                 * atleast one attempt to sync is made.
+                                 */
+                        }
+
+                        continue;
+                }
 
 		if (req->ordering.tempted && !req->ordering.go)
 			/* wait some more */
@@ -1148,6 +1391,7 @@ __wb_pick_winds (wb_inode_t *wb_inode, list_head_t *tasks,
 				continue;
 
 			list_add_tail (&req->wip, &wb_inode->wip);
+                        req->wind_count++;
 
 			if (!req->ordering.tempted)
 				/* unrefed in wb_writev_cbk */
@@ -1162,6 +1406,8 @@ __wb_pick_winds (wb_inode_t *wb_inode, list_head_t *tasks,
 		else
 			list_add_tail (&req->winds, tasks);
 	}
+
+        return 0;
 }
 
 
@@ -1174,7 +1420,12 @@ wb_do_winds (wb_inode_t *wb_inode, list_head_t *tasks)
 	list_for_each_entry_safe (req, tmp, tasks, winds) {
 		list_del_init (&req->winds);
 
-		call_resume (req->stub);
+                if (req->op_ret == -1) {
+                        call_unwind_error (req->stub, req->op_ret,
+                                           req->op_errno);
+                } else {
+                        call_resume (req->stub);
+                }
 
 		wb_request_unref (req);
 	}
@@ -1184,10 +1435,10 @@ wb_do_winds (wb_inode_t *wb_inode, list_head_t *tasks)
 void
 wb_process_queue (wb_inode_t *wb_inode)
 {
-        list_head_t tasks  = {0, };
-	list_head_t lies = {0, };
-	list_head_t liabilities = {0, };
-        int         retry       = 0;
+        list_head_t tasks        = {0, };
+	list_head_t lies         = {0, };
+	list_head_t liabilities  = {0, };
+        int         wind_failure = 0;
 
         INIT_LIST_HEAD (&tasks);
         INIT_LIST_HEAD (&lies);
@@ -1209,13 +1460,12 @@ wb_process_queue (wb_inode_t *wb_inode)
 
                 wb_do_winds (wb_inode, &tasks);
 
-                /* fd might've been marked bad due to previous errors.
-                 * Since, caller of wb_process_queue might be the last fop on
-                 * inode, make sure we keep processing request queue, till there
-                 * are no requests left.
+                /* If there is an error in wb_fulfill before winding write
+                 * requests, we would miss invocation of wb_process_queue
+                 * from wb_fulfill_cbk. So, retry processing again.
                  */
-                retry = wb_fulfill (wb_inode, &liabilities);
-        } while (retry);
+                wind_failure = wb_fulfill (wb_inode, &liabilities);
+        } while (wind_failure);
 
         return;
 }
@@ -1285,10 +1535,6 @@ wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
 
 	conf = this->private;
 
-        if (wb_fd_err (fd, this, &op_errno)) {
-                goto unwind;
-        }
-
         wb_inode = wb_inode_create (this, fd->inode);
 	if (!wb_inode) {
 		op_errno = ENOMEM;
@@ -1309,7 +1555,7 @@ wb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
 					count, offset, flags, iobref, xdata);
 	else
 		stub = fop_writev_stub (frame, NULL, fd, vector, count, offset,
-					flags, iobref, xdata);
+                                        flags, iobref, xdata);
         if (!stub) {
                 op_errno = ENOMEM;
                 goto unwind;
@@ -1413,10 +1659,6 @@ wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
 		goto unwind;
 	}
 
-	if (wb_fd_err (fd, this, &op_errno)) {
-		op_ret = -1;
-		goto unwind;
-	}
 
         if (conf->flush_behind)
 		goto flushbehind;
@@ -1495,9 +1737,6 @@ wb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
         call_stub_t  *stub         = NULL;
 	int32_t       op_errno     = EINVAL;
 
-	if (wb_fd_err (fd, this, &op_errno))
-		goto unwind;
-
         wb_inode = wb_inode_ctx_get (this, fd->inode);
 	if (!wb_inode)
 		goto noqueue;
@@ -1720,9 +1959,6 @@ wb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
 		goto unwind;
         }
 
-	if (wb_fd_err (fd, this, &op_errno))
-		goto unwind;
-
 	frame->local = wb_inode;
 
 	stub = fop_ftruncate_stub (frame, wb_ftruncate_helper, fd,
@@ -2003,7 +2239,18 @@ __wb_dump_requests (struct list_head *head, char *prefix)
 		else
 			gf_proc_dump_write ("wound", "no");
 
+                gf_proc_dump_write ("generation-number", "%d", req->gen);
+
+                gf_proc_dump_write ("req->op_ret", "%d", req->op_ret);
+                gf_proc_dump_write ("req->op_errno", "%d", req->op_errno);
+                gf_proc_dump_write ("sync-attempts", "%d", req->wind_count);
+
                 if (req->fop == GF_FOP_WRITE) {
+                        if (list_empty (&req->wip))
+                                gf_proc_dump_write ("sync-in-progress", "no");
+                        else
+                                gf_proc_dump_write ("sync-in-progress", "yes");
+
                         gf_proc_dump_write ("size", "%"GF_PRI_SIZET,
                                             req->write_size);
 
@@ -2021,6 +2268,7 @@ __wb_dump_requests (struct list_head *head, char *prefix)
 
                         flag = req->ordering.go;
                         gf_proc_dump_write ("go", "%d", flag);
+
                 }
         }
 }
@@ -2066,6 +2314,11 @@ wb_inode_dump (xlator_t *this, inode_t *inode)
                             wb_inode->window_current);
 
 
+        gf_proc_dump_write ("transit-size", "%"GF_PRI_SIZET,
+                            wb_inode->transit);
+
+        gf_proc_dump_write ("dontsync", "%d", wb_inode->dontsync);
+
         ret = TRY_LOCK (&wb_inode->lock);
         if (!ret)
         {
@@ -2117,7 +2370,8 @@ reconfigure (xlator_t *this, dict_t *options)
 
         conf = this->private;
 
-        GF_OPTION_RECONF ("cache-size", conf->window_size, options, size_uint64, out);
+        GF_OPTION_RECONF ("cache-size", conf->window_size, options, size_uint64,
+                          out);
 
         GF_OPTION_RECONF ("flush-behind", conf->flush_behind, options, bool,
                           out);
@@ -2130,6 +2384,9 @@ reconfigure (xlator_t *this, dict_t *options)
 
         GF_OPTION_RECONF ("strict-write-ordering", conf->strict_write_ordering,
 			  options, bool, out);
+        GF_OPTION_RECONF ("resync-failed-syncs-after-fsync",
+                          conf->resync_after_fsync, options, bool, out);
+
         ret = 0;
 out:
         return ret;
@@ -2196,6 +2453,9 @@ init (xlator_t *this)
         GF_OPTION_INIT ("strict-write-ordering", conf->strict_write_ordering,
 			bool, out);
 
+        GF_OPTION_INIT ("resync-failed-syncs-after-fsync",
+                        conf->resync_after_fsync, bool, out);
+
         this->private = conf;
         ret = 0;
 
@@ -2287,5 +2547,17 @@ struct volume_options options[] = {
 	  .description = "Do not let later writes overtake earlier writes even "
 	                  "if they do not overlap",
         },
+        { .key = {"resync-failed-syncs-after-fsync"},
+          .type = GF_OPTION_TYPE_BOOL,
+          .default_value = "off",
+          .description = "If sync of \"cached-writes issued before fsync\" "
+                         "(to backend) fails, this option configures whether "
+                         "to retry syncing them after fsync or forget them. "
+                         "If set to on, cached-writes are retried "
+                         "till a \"flush\" fop (or a successful sync) on sync "
+                         "failures. "
+                         "fsync itself is failed irrespective of the value of "
+                         "this option. ",
+        },
         { .key = {NULL} },
 };
author	Raghavendra G <rgowdapp@redhat.com>	2015-11-17 12:57:54 +0530
committer	Raghavendra G <rgowdapp@redhat.com>	2015-12-22 01:55:57 -0800
commit	3fcead2de7bcdb4e1312f37e7e750abd8d9d9770 (patch)
tree	6df073e7faf75ed55e997692201494fed547ce84 /xlators/performance
parent	d73f5c9eae0f5c8796598fa1930844c15c2c37d4 (diff)