summaryrefslogtreecommitdiffstats
path: root/xlators
diff options
context:
space:
mode:
authorRaghavendra G <raghavendra@gluster.com>2011-08-19 12:08:15 +0530
committerAnand Avati <avati@redhat.com>2012-07-25 16:22:45 -0700
commitfd2f9c0be1a5e170ca71079b2da2c9f3d64341ae (patch)
tree68e37c1a4d96a9108cd959f5f4e40375edee7b2b /xlators
parent34d395fc16449ec3fe709d77609677992752b61a (diff)
performance/write-behind: preserve lk-owner while syncing writes.
- This patch also makes syncing of non-overlapping but consecutive writes parallel. Till now only contiguous writes were synced parallely. Change-Id: Icf0d5ea373f30c79fcdc90ba44b7e7a1bc5f0111 BUG: 765141 Signed-off-by: Raghavendra G <raghavendra@gluster.com> Reviewed-on: http://review.gluster.com/269 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators')
-rw-r--r--xlators/performance/write-behind/src/write-behind.c209
1 files changed, 143 insertions, 66 deletions
diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c
index 81a254df6a0..704bde416bf 100644
--- a/xlators/performance/write-behind/src/write-behind.c
+++ b/xlators/performance/write-behind/src/write-behind.c
@@ -28,9 +28,9 @@
#include "statedump.h"
#include "write-behind-mem-types.h"
-#define MAX_VECTOR_COUNT 8
-#define WB_AGGREGATE_SIZE 131072 /* 128 KB */
-#define WB_WINDOW_SIZE 1048576 /* 1MB */
+#define MAX_VECTOR_COUNT 8
+#define WB_AGGREGATE_SIZE 131072 /* 128 KB */
+#define WB_WINDOW_SIZE 1048576 /* 1MB */
typedef struct list_head list_head_t;
struct wb_conf;
@@ -55,22 +55,23 @@ typedef struct wb_file {
}wb_file_t;
typedef struct wb_request {
- list_head_t list;
- list_head_t winds;
- list_head_t unwinds;
- list_head_t other_requests;
- call_stub_t *stub;
- size_t write_size;
- int32_t refcount;
- wb_file_t *file;
- glusterfs_fop_t fop;
+ list_head_t list;
+ list_head_t winds;
+ list_head_t unwinds;
+ list_head_t other_requests;
+ call_stub_t *stub;
+ size_t write_size;
+ int32_t refcount;
+ wb_file_t *file;
+ glusterfs_fop_t fop;
+ gf_lkowner_t lk_owner;
union {
struct {
- char write_behind;
- char stack_wound;
- char got_reply;
- char virgin;
- char flush_all; /* while trying to sync to back-end,
+ char write_behind;
+ char stack_wound;
+ char got_reply;
+ char virgin;
+ char flush_all; /* while trying to sync to back-end,
* don't wait till a data of size
* equal to configured aggregate-size
* is accumulated, instead sync
@@ -87,12 +88,12 @@ typedef struct wb_request {
} wb_request_t;
struct wb_conf {
- uint64_t aggregate_size;
- uint64_t window_size;
- uint64_t disable_till;
- gf_boolean_t enable_O_SYNC;
- gf_boolean_t flush_behind;
- gf_boolean_t enable_trickling_writes;
+ uint64_t aggregate_size;
+ uint64_t window_size;
+ uint64_t disable_till;
+ gf_boolean_t enable_O_SYNC;
+ gf_boolean_t flush_behind;
+ gf_boolean_t enable_trickling_writes;
};
typedef struct wb_local {
@@ -119,6 +120,71 @@ ssize_t
__wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_size,
char enable_trickling_writes);
+/*
+ Below is a succinct explanation of the code deciding whether two regions
+ overlap, from Pavan <tcp@gluster.com>.
+
+ For any two ranges to be non-overlapping, either the end of the first
+ range is lesser than the start of the second, or vice versa. Example -
+
+ <---------> <-------------->
+ p q x y
+
+ ( q < x ) or (y < p) = > No overlap.
+
+ To check for *overlap*, we can negate this (using de morgan's laws), and
+ it becomes -
+
+ (q >= x ) and (y >= p)
+
+ Either that, or you write the negation using -
+
+ if (! ((q < x) or (y < p)) ) {
+ "Overlap"
+ }
+*/
+
+static inline char
+wb_requests_overlap (wb_request_t *request1, wb_request_t *request2)
+{
+ off_t r1_start = 0, r1_end = 0, r2_start = 0, r2_end = 0;
+
+ r1_start = request1->stub->args.writev.off;
+ r1_end = r1_start + iov_length (request1->stub->args.writev.vector,
+ request1->stub->args.writev.count);
+
+ r2_start = request2->stub->args.writev.off;
+ r2_end = r2_start + iov_length (request2->stub->args.writev.vector,
+ request2->stub->args.writev.count);
+
+ return ((r1_end >= r2_start) && (r2_end >= r1_start));
+}
+
+
+static inline char
+wb_overlap (list_head_t *list, wb_request_t *request)
+{
+ char overlap = 0;
+ wb_request_t *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO ("write-behind", list, out);
+ GF_VALIDATE_OR_GOTO ("write-behind", request, out);
+
+ list_for_each_entry (tmp, list, list) {
+ if (tmp == request) {
+ break;
+ }
+
+ overlap = wb_requests_overlap (tmp, request);
+ if (overlap) {
+ break;
+ }
+ }
+
+out:
+ return overlap;
+}
+
static int
__wb_request_unref (wb_request_t *this)
@@ -252,6 +318,8 @@ wb_enqueue (wb_file_t *file, call_stub_t *stub)
request->flags.write_request.virgin = 1;
}
+ request->lk_owner = frame->root->lk_owner;
+
LOCK (&file->lock);
{
list_add_tail (&request->list, &file->request);
@@ -421,19 +489,21 @@ out:
ssize_t
wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds)
{
- wb_request_t *dummy = NULL, *request = NULL;
- wb_request_t *first_request = NULL, *next = NULL;
- size_t total_count = 0, count = 0;
- size_t copied = 0;
- call_frame_t *sync_frame = NULL;
- struct iobref *iobref = NULL;
- wb_local_t *local = NULL;
- struct iovec *vector = NULL;
- ssize_t current_size = 0, bytes = 0;
- size_t bytecount = 0;
- wb_conf_t *conf = NULL;
- fd_t *fd = NULL;
- int32_t op_errno = -1;
+ wb_request_t *dummy = NULL, *request = NULL;
+ wb_request_t *first_request = NULL, *next = NULL;
+ size_t total_count = 0, count = 0;
+ size_t copied = 0;
+ call_frame_t *sync_frame = NULL;
+ struct iobref *iobref = NULL;
+ wb_local_t *local = NULL;
+ struct iovec *vector = NULL;
+ ssize_t current_size = 0, bytes = 0;
+ size_t bytecount = 0;
+ wb_conf_t *conf = NULL;
+ fd_t *fd = NULL;
+ int32_t op_errno = -1;
+ off_t next_offset_expected = 0;
+ gf_lkowner_t lk_owner = {0, };
GF_VALIDATE_OR_GOTO_WITH_ERROR ((file ? file->this->name
: "write-behind"), frame,
@@ -485,6 +555,10 @@ wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds)
first_request = request;
current_size = 0;
+
+ next_offset_expected = request->stub->args.writev.off
+ + request->write_size;
+ lk_owner = request->lk_owner;
}
count += request->stub->args.writev.count;
@@ -514,8 +588,9 @@ wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds)
|| ((count + next->stub->args.writev.count)
> MAX_VECTOR_COUNT)
|| ((current_size + next->write_size)
- > conf->aggregate_size)) {
-
+ > conf->aggregate_size)
+ || (next_offset_expected != next->stub->args.writev.off)
+ || (!is_same_lkowner (&lk_owner, &next->lk_owner))) {
sync_frame = copy_frame (frame);
if (sync_frame == NULL) {
bytes = -1;
@@ -523,6 +598,8 @@ wb_sync (call_frame_t *frame, wb_file_t *file, list_head_t *winds)
goto out;
}
+ frame->root->lk_owner = lk_owner;
+
sync_frame->local = local;
local->file = file;
@@ -1518,12 +1595,11 @@ unwind:
size_t
__wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds)
{
- wb_request_t *request = NULL;
- size_t size = 0;
- char first_request = 1;
- off_t offset_expected = 0;
- wb_conf_t *conf = NULL;
- int count = 0;
+ wb_request_t *request = NULL;
+ size_t size = 0;
+ char first_request = 1, overlap = 0;
+ wb_conf_t *conf = NULL;
+ int count = 0;
GF_VALIDATE_OR_GOTO ("write-behind", file, out);
GF_VALIDATE_OR_GOTO (file->this->name, list, out);
@@ -1541,12 +1617,11 @@ __wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds)
if (!request->flags.write_request.stack_wound) {
if (first_request) {
first_request = 0;
- offset_expected
- = request->stub->args.writev.off;
- }
-
- if (request->stub->args.writev.off != offset_expected) {
- break;
+ } else {
+ overlap = wb_overlap (list, request);
+ if (overlap) {
+ goto out;
+ }
}
if ((file->flags & O_APPEND)
@@ -1554,12 +1629,10 @@ __wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds)
> conf->aggregate_size)
|| ((count + request->stub->args.writev.count)
> MAX_VECTOR_COUNT))) {
- break;
+ goto out;
}
size += request->write_size;
- offset_expected += request->write_size;
- file->aggregate_current -= request->write_size;
count += request->stub->args.writev.count;
request->flags.write_request.stack_wound = 1;
@@ -1568,19 +1641,23 @@ __wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds)
}
out:
+ if (file != NULL) {
+ file->aggregate_current -= size;
+ }
+
return size;
}
int32_t
__wb_can_wind (list_head_t *list, char *other_fop_in_queue,
- char *non_contiguous_writes, char *incomplete_writes,
+ char *overlapping_writes, char *incomplete_writes,
char *wind_all)
{
wb_request_t *request = NULL;
char first_request = 1;
- off_t offset_expected = 0;
int32_t ret = -1;
+ char overlap = 0;
GF_VALIDATE_OR_GOTO ("write-behind", list, out);
@@ -1605,8 +1682,6 @@ __wb_can_wind (list_head_t *list, char *other_fop_in_queue,
if (first_request) {
char flush = 0;
first_request = 0;
- offset_expected
- = request->stub->args.writev.off;
flush = request->flags.write_request.flush_all;
if (wind_all != NULL) {
@@ -1614,14 +1689,14 @@ __wb_can_wind (list_head_t *list, char *other_fop_in_queue,
}
}
- if (offset_expected != request->stub->args.writev.off) {
- if (non_contiguous_writes) {
- *non_contiguous_writes = 1;
+ overlap = wb_overlap (list, request);
+ if (overlap) {
+ if (overlapping_writes != NULL) {
+ *overlapping_writes = 1;
}
+
break;
}
-
- offset_expected += request->write_size;
}
}
@@ -1638,7 +1713,7 @@ __wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf,
size_t size = 0;
char other_fop_in_queue = 0;
char incomplete_writes = 0;
- char non_contiguous_writes = 0;
+ char overlapping_writes = 0;
wb_request_t *request = NULL;
wb_file_t *file = NULL;
char wind_all = 0;
@@ -1655,7 +1730,7 @@ __wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf,
file = request->file;
ret = __wb_can_wind (list, &other_fop_in_queue,
- &non_contiguous_writes, &incomplete_writes,
+ &overlapping_writes, &incomplete_writes,
&wind_all);
if (ret == -1) {
gf_log (file->this->name, GF_LOG_WARNING,
@@ -1664,7 +1739,7 @@ __wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf,
}
if (!incomplete_writes && ((enable_trickling_writes)
- || (wind_all) || (non_contiguous_writes)
+ || (wind_all) || (overlapping_writes)
|| (other_fop_in_queue)
|| (file->aggregate_current
>= aggregate_conf))) {
@@ -1988,7 +2063,9 @@ __wb_collapse_write_bufs (list_head_t *requests, size_t page_size)
offset_expected = holder->stub->args.writev.off
+ holder->write_size;
- if (request->stub->args.writev.off != offset_expected) {
+ if ((request->stub->args.writev.off != offset_expected)
+ || (!is_same_lkowner (&request->lk_owner,
+ &holder->lk_owner))) {
holder = request;
continue;
}