diff options
author | Raghavendra G <raghavendra@gluster.com> | 2010-05-26 06:08:09 +0000 |
---|---|---|
committer | Anand V. Avati <avati@dev.gluster.com> | 2010-05-26 04:49:00 -0700 |
commit | 6d9b11dba63d86c48450aa956281114962289ef5 (patch) | |
tree | 62ef8d84b7b9cc61a8c686e2dd1feef60539c302 | |
parent | 1d363d4b7dd5b4dc25892053259ff43f1b4c52c7 (diff) |
performance/write-behind: explicitly enforce ordering of overlapping writes.v3.0.5rc2
- If there are non-contiguous offsets (offsets which do not start where
previous write ended), wait for completion of previous writes to server,
before sending new ones.
- Send flush call to server only when all writes are completed.
- If a file is opened with O_APPEND, at any point of time a maximum only one
write call to server should be in transit. This is to avoid reordering of
writes in the presence of afr which can result in data corruption.
See bug #934 for more details.
Signed-off-by: Raghavendra G <raghavendra@gluster.com>
Signed-off-by: Anand V. Avati <avati@dev.gluster.com>
BUG: 934 (md5sum mismatch when files are transferred using vsftpd)
URL: http://bugs.gluster.com/cgi-bin/bugzilla3/show_bug.cgi?id=934
-rw-r--r-- | xlators/performance/write-behind/src/write-behind.c | 247 |
1 files changed, 125 insertions, 122 deletions
diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c index 883d25e9666..064e0b717c7 100644 --- a/xlators/performance/write-behind/src/write-behind.c +++ b/xlators/performance/write-behind/src/write-behind.c @@ -55,6 +55,7 @@ typedef struct wb_file { int32_t refcount; int32_t op_ret; int32_t op_errno; + int32_t flags; list_head_t request; list_head_t passive_requests; fd_t *fd; @@ -265,7 +266,7 @@ out: wb_file_t * -wb_file_create (xlator_t *this, fd_t *fd) +wb_file_create (xlator_t *this, fd_t *fd, int32_t flags) { wb_file_t *file = NULL; wb_conf_t *conf = this->private; @@ -287,6 +288,7 @@ wb_file_create (xlator_t *this, fd_t *fd) file->this = this; file->refcount = 1; file->window_conf = conf->window_size; + file->flags = flags; fd_ctx_set (fd, this, (uint64_t)(long)file); @@ -1228,7 +1230,7 @@ wb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, wbflags = local->wbflags; if (op_ret != -1) { - file = wb_file_create (this, fd); + file = wb_file_create (this, fd, flags); if (file == NULL) { op_ret = -1; op_errno = ENOMEM; @@ -1307,7 +1309,7 @@ wb_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, wb_conf_t *conf = this->private; if (op_ret != -1) { - file = wb_file_create (this, fd); + file = wb_file_create (this, fd, flags); if (file == NULL) { op_ret = -1; op_errno = ENOMEM; @@ -1338,8 +1340,8 @@ wb_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, frame->local = NULL; out: - STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, preparent, - postparent); + STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf, + preparent, postparent); return 0; } @@ -1358,14 +1360,22 @@ wb_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, return 0; } - +/* Mark all the contiguous write requests for winding starting from head of + * request list. Stops marking at the first non-write request found. If + * file is opened with O_APPEND, make sure all the writes marked for winding + * will fit into a single write call to server. + */ size_t __wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds) { - wb_request_t *request = NULL; - size_t size = 0; - char first_request = 1; + wb_request_t *request = NULL; + size_t size = 0; + char first_request = 1; off_t offset_expected = 0; + wb_conf_t *conf = NULL; + int count = 0; + + conf = file->this->private; list_for_each_entry (request, list, list) { @@ -1384,9 +1394,18 @@ __wb_mark_wind_all (wb_file_t *file, list_head_t *list, list_head_t *winds) break; } + if ((file->flags & O_APPEND) + && (((size + request->write_size) + > conf->aggregate_size) + || ((count + request->stub->args.writev.count) + > conf->aggregate_size))) { + break; + } + size += request->write_size; offset_expected += request->write_size; file->aggregate_current -= request->write_size; + count += request->stub->args.writev.count; request->flags.write_request.stack_wound = 1; list_add_tail (&request->winds, winds); @@ -1461,15 +1480,14 @@ __wb_mark_winds (list_head_t *list, list_head_t *winds, size_t aggregate_conf, request = list_entry (list->next, typeof (*request), list); file = request->file; - if (!wind_all && (file->aggregate_current < aggregate_conf)) { - __wb_can_wind (list, &other_fop_in_queue, - &non_contiguous_writes, &incomplete_writes); - } + __wb_can_wind (list, &other_fop_in_queue, + &non_contiguous_writes, &incomplete_writes); - if ((enable_trickling_writes && !incomplete_writes) - || (wind_all) || (non_contiguous_writes) - || (other_fop_in_queue) - || (file->aggregate_current >= aggregate_conf)) { + if (!incomplete_writes && ((enable_trickling_writes) + || (wind_all) || (non_contiguous_writes) + || (other_fop_in_queue) + || (file->aggregate_current + >= aggregate_conf))) { size = __wb_mark_wind_all (file, list, winds); } @@ -2087,64 +2105,66 @@ wb_ffr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno) { wb_local_t *local = NULL; - wb_file_t *file = NULL; - wb_conf_t *conf = NULL; - char unwind = 0; - int32_t ret = -1; - int disabled = 0; - int64_t disable_till = 0; + wb_file_t *file = NULL; + wb_conf_t *conf = NULL; + int32_t ret = -1; conf = this->private; local = frame->local; + file = local->file; - if ((local != NULL) && (local->file != NULL)) { - file = local->file; - + if (file != NULL) { LOCK (&file->lock); { - disabled = file->disabled; - disable_till = file->disable_till; + if (file->op_ret == -1) { + op_ret = file->op_ret; + op_errno = file->op_errno; + + file->op_ret = 0; + } } UNLOCK (&file->lock); - if (conf->flush_behind - && (!disabled) && (disable_till == 0)) { - unwind = 1; - } else { - local->reply_count++; - /* - * without flush-behind, unwind should wait for replies - * of writes queued before and the flush - */ - if (local->reply_count == 2) { - unwind = 1; - } + ret = wb_process_queue (frame, file, 0); + if ((ret == -1) && (errno == ENOMEM)) { + op_ret = -1; + op_errno = ENOMEM; } - } else { - unwind = 1; } + + STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno); - if (unwind) { - if (file != NULL) { - LOCK (&file->lock); - { - if (file->op_ret == -1) { - op_ret = file->op_ret; - op_errno = file->op_errno; + return 0; +} - file->op_ret = 0; - } - } - UNLOCK (&file->lock); - ret = wb_process_queue (frame, file, 0); - if ((ret == -1) && (errno == ENOMEM)) { - op_ret = -1; - op_errno = ENOMEM; - } - } +int32_t +wb_flush_helper (call_frame_t *frame, xlator_t *this, fd_t *fd) +{ + wb_conf_t *conf = NULL; + wb_local_t *local = NULL; + + conf = this->private; + + local = frame->local; + + if (local && local->request) { + local->request->stub = NULL; + wb_request_unref (local->request); + } - STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno); + if (conf->flush_behind) { + STACK_WIND (frame, + wb_ffr_bg_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, + fd); + } else { + STACK_WIND (frame, + wb_ffr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, + fd); } return 0; @@ -2159,12 +2179,9 @@ wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) wb_local_t *local = NULL; uint64_t tmp_file = 0; call_stub_t *stub = NULL; - call_frame_t *process_frame = NULL; - wb_local_t *tmp_local = NULL; + call_frame_t *flush_frame = NULL; wb_request_t *request = NULL; int32_t ret = 0; - int disabled = 0; - int64_t disable_till = 0; conf = this->private; @@ -2183,93 +2200,79 @@ wb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) if (file != NULL) { local = CALLOC (1, sizeof (*local)); if (local == NULL) { - STACK_UNWIND (frame, -1, ENOMEM, NULL); + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); return 0; } local->file = file; - frame->local = local; - stub = fop_flush_cbk_stub (frame, wb_ffr_cbk, 0, 0); - if (stub == NULL) { - STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); - return 0; + if (conf->flush_behind) { + flush_frame = copy_frame (frame); + if (flush_frame == NULL) { + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); + return 0; + } + } else { + flush_frame = frame; } - process_frame = copy_frame (frame); - if (process_frame == NULL) { - STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); - call_stub_destroy (stub); - return 0; - } + flush_frame->local = local; - LOCK (&file->lock); - { - disabled = file->disabled; - disable_till = file->disable_till; - } - UNLOCK (&file->lock); - - if (conf->flush_behind - && (!disabled) && (disable_till == 0)) { - tmp_local = CALLOC (1, sizeof (*local)); - if (tmp_local == NULL) { - STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); - - STACK_DESTROY (process_frame->root); - call_stub_destroy (stub); - return 0; + stub = fop_flush_stub (flush_frame, wb_flush_helper, fd); + if (stub == NULL) { + if (flush_frame != frame) { + STACK_DESTROY (flush_frame->root); } - tmp_local->file = file; - process_frame->local = tmp_local; + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); + return 0; } - fd_ref (fd); - request = wb_enqueue (file, stub); if (request == NULL) { - STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); + if (flush_frame != frame) { + STACK_DESTROY (flush_frame->root); + } - fd_unref (fd); + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); call_stub_destroy (stub); - STACK_DESTROY (process_frame->root); return 0; } - ret = wb_process_queue (process_frame, file, 1); + ret = wb_process_queue (flush_frame, file, 1); if ((ret == -1) && (errno == ENOMEM)) { - STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); + if (flush_frame != frame) { + STACK_DESTROY (flush_frame->root); + } - fd_unref (fd); + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); call_stub_destroy (stub); - STACK_DESTROY (process_frame->root); return 0; } - } - - if ((file != NULL) && conf->flush_behind - && (!disabled) && (disable_till == 0)) { - STACK_WIND (process_frame, - wb_ffr_bg_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, - fd); } else { - STACK_WIND (frame, - wb_ffr_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->flush, - fd); + if (conf->flush_behind) { + flush_frame = copy_frame (frame); + if (flush_frame == NULL) { + STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM); + return 0; + } - if (process_frame != NULL) { - STACK_DESTROY (process_frame->root); + STACK_WIND (flush_frame, + wb_ffr_bg_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, + fd); + } else { + STACK_WIND (frame, + wb_ffr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, + fd); } } - - if (file != NULL) { - fd_unref (fd); + if (conf->flush_behind) { + STACK_UNWIND_STRICT (flush, frame, 0, 0); } return 0; |