diff options
-rw-r--r-- | tests/bugs/write-behind/issue-884.c | 267 | ||||
-rwxr-xr-x | tests/bugs/write-behind/issue-884.t | 40 | ||||
-rw-r--r-- | xlators/performance/write-behind/src/write-behind.c | 4 |
3 files changed, 309 insertions, 2 deletions
diff --git a/tests/bugs/write-behind/issue-884.c b/tests/bugs/write-behind/issue-884.c new file mode 100644 index 00000000000..e9c33b351ad --- /dev/null +++ b/tests/bugs/write-behind/issue-884.c @@ -0,0 +1,267 @@ + +#define _GNU_SOURCE + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <time.h> +#include <assert.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <pthread.h> + +#include <glusterfs/api/glfs.h> + +/* Based on a reproducer by Stefan Ring. It seems to be quite sensible to any + * timing modification, so the code has been maintained as is, only with minor + * changes. */ + +struct glfs *glfs; + +pthread_mutex_t the_mutex = PTHREAD_MUTEX_INITIALIZER; +pthread_cond_t the_cond = PTHREAD_COND_INITIALIZER; + +typedef struct _my_aiocb { + int64_t size; + volatile int64_t seq; + int which; +} my_aiocb; + +typedef struct _worker_data { + my_aiocb cb; + struct iovec iov; + int64_t offset; +} worker_data; + +typedef struct { + worker_data wdata[2]; + + volatile unsigned busy; +} all_data_t; + +all_data_t all_data; + +static void +completion_fnc(struct glfs_fd *fd, ssize_t ret, struct glfs_stat *pre, + struct glfs_stat *post, void *arg) +{ + void *the_thread; + my_aiocb *cb = (my_aiocb *)arg; + long seq = cb->seq; + + assert(ret == cb->size); + + pthread_mutex_lock(&the_mutex); + pthread_cond_broadcast(&the_cond); + + all_data.busy &= ~(1 << cb->which); + cb->seq = -1; + + the_thread = (void *)pthread_self(); + printf("worker %d is done from thread %p, seq %ld!\n", cb->which, + the_thread, seq); + + pthread_mutex_unlock(&the_mutex); +} + +static void +init_wdata(worker_data *data, int which) +{ + data->cb.which = which; + data->cb.seq = -1; + + data->iov.iov_base = malloc(1024 * 1024); + memset(data->iov.iov_base, 6, + 1024 * 1024); /* tail part never overwritten */ +} + +static void +init() +{ + all_data.busy = 0; + + init_wdata(&all_data.wdata[0], 0); + init_wdata(&all_data.wdata[1], 1); +} + +static void +do_write(struct glfs_fd *fd, int content, int size, int64_t seq, + worker_data *wdata, const char *name) +{ + int ret; + + wdata->cb.size = size; + wdata->cb.seq = seq; + + if (content >= 0) + memset(wdata->iov.iov_base, content, size); + wdata->iov.iov_len = size; + + pthread_mutex_lock(&the_mutex); + printf("(%d) dispatching write \"%s\", offset %lx, len %x, seq %ld\n", + wdata->cb.which, name, (long)wdata->offset, size, (long)seq); + pthread_mutex_unlock(&the_mutex); + ret = glfs_pwritev_async(fd, &wdata->iov, 1, wdata->offset, 0, + completion_fnc, &wdata->cb); + assert(ret >= 0); +} + +#define IDLE 0 // both workers must be idle +#define ANY 1 // use any worker, other one may be busy + +int +get_worker(int waitfor, int64_t excl_seq) +{ + int which; + + pthread_mutex_lock(&the_mutex); + + while (waitfor == IDLE && (all_data.busy & 3) != 0 || + waitfor == ANY && + ((all_data.busy & 3) == 3 || + excl_seq >= 0 && (all_data.wdata[0].cb.seq == excl_seq || + all_data.wdata[1].cb.seq == excl_seq))) + pthread_cond_wait(&the_cond, &the_mutex); + + if (!(all_data.busy & 1)) + which = 0; + else + which = 1; + + all_data.busy |= (1 << which); + + pthread_mutex_unlock(&the_mutex); + + return which; +} + +static int +doit(struct glfs_fd *fd) +{ + int ret; + int64_t seq = 0; + int64_t offset = 0; // position in file, in blocks + int64_t base = 0x1000; // where to place the data, in blocks + + int async_mode = ANY; + + init(); + + for (;;) { + int which; + worker_data *wdata; + + // for growing to the first offset + for (;;) { + int gap = base + 0x42 - offset; + if (!gap) + break; + if (gap > 80) + gap = 80; + + which = get_worker(IDLE, -1); + wdata = &all_data.wdata[which]; + + wdata->offset = offset << 9; + do_write(fd, 0, gap << 9, seq++, wdata, "gap-filling"); + + offset += gap; + } + + // 8700 + which = get_worker(IDLE, -1); + wdata = &all_data.wdata[which]; + + wdata->offset = (base + 0x42) << 9; + do_write(fd, 1, 62 << 9, seq++, wdata, "!8700"); + + // 8701 + which = get_worker(IDLE, -1); + wdata = &all_data.wdata[which]; + + wdata->offset = (base + 0x42) << 9; + do_write(fd, 2, 55 << 9, seq++, wdata, "!8701"); + + // 8702 + which = get_worker(async_mode, -1); + wdata = &all_data.wdata[which]; + + wdata->offset = (base + 0x79) << 9; + do_write(fd, 3, 54 << 9, seq++, wdata, "!8702"); + + // 8703 + which = get_worker(async_mode, -1); + wdata = &all_data.wdata[which]; + + wdata->offset = (base + 0xaf) << 9; + do_write(fd, 4, 81 << 9, seq++, wdata, "!8703"); + + // 8704 + // this writes both 5s and 6s + // the range of 5s is the one that overwrites 8703 + + which = get_worker(async_mode, seq - 1); + wdata = &all_data.wdata[which]; + + memset(wdata->iov.iov_base, 5, 81 << 9); + wdata->offset = (base + 0xaf) << 9; + do_write(fd, -1, 1623 << 9, seq++, wdata, "!8704"); + + offset = base + 0x706; + base += 0x1000; + if (base >= 0x100000) + break; + } + + printf("done!\n"); + fflush(stdout); + + pthread_mutex_lock(&the_mutex); + + while ((all_data.busy & 3) != 0) + pthread_cond_wait(&the_cond, &the_mutex); + + pthread_mutex_unlock(&the_mutex); + + ret = glfs_close(fd); + assert(ret >= 0); + /* + ret = glfs_fini(glfs); + assert(ret >= 0); + */ + return 0; +} + +int +main(int argc, char *argv[]) +{ + int ret; + int open_flags = O_RDWR | O_DIRECT | O_TRUNC; + struct glfs_fd *fd; + + glfs = glfs_new(argv[1]); + if (!glfs) { + printf("glfs_new!\n"); + goto out; + } + ret = glfs_set_volfile_server(glfs, "tcp", "localhost", 24007); + if (ret < 0) { + printf("set_volfile!\n"); + goto out; + } + ret = glfs_init(glfs); + if (ret) { + printf("init!\n"); + goto out; + } + fd = glfs_open(glfs, argv[2], open_flags); + if (!fd) { + printf("open!\n"); + goto out; + } + srand(time(NULL)); + return doit(fd); +out: + return 1; +} diff --git a/tests/bugs/write-behind/issue-884.t b/tests/bugs/write-behind/issue-884.t new file mode 100755 index 00000000000..2bcf7d15265 --- /dev/null +++ b/tests/bugs/write-behind/issue-884.t @@ -0,0 +1,40 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +# This test tries to detect a race condition in write-behind. It's based on a +# reproducer written by Stefan Ring that is able to hit it sometimes. On my +# system, it happened around 10% of the runs. This means that if this bug +# appears again, this test will fail once every 10 runs. Most probably this +# failure will be hidden by the automatic test retry of the testing framework. +# +# Please, if this test fails, it needs to be analyzed in detail. + +function run() { + "${@}" >/dev/null +} + +cleanup + +TEST glusterd +TEST pidof glusterd + +TEST $CLI volume create $V0 $H0:$B0/$V0 +# This makes it easier to hit the issue +TEST $CLI volume set $V0 client-log-level TRACE +TEST $CLI volume start $V0 + +TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0 + +build_tester $(dirname $0)/issue-884.c -lgfapi + +TEST touch $M0/testfile + +# This program generates a file of 535694336 bytes with a fixed pattern +TEST run $(dirname $0)/issue-884 $V0 testfile + +# This is the md5sum of the expected pattern without corruption +EXPECT "ad105f9349345a70fc697632cbb5eec8" echo "$(md5sum $B0/$V0/testfile | awk '{ print $1; }')" + +cleanup diff --git a/xlators/performance/write-behind/src/write-behind.c b/xlators/performance/write-behind/src/write-behind.c index ab6b76cace8..e1d0e9aaf00 100644 --- a/xlators/performance/write-behind/src/write-behind.c +++ b/xlators/performance/write-behind/src/write-behind.c @@ -1283,14 +1283,14 @@ __wb_pick_unwinds(wb_inode_t *wb_inode, list_head_t *lies) wb_inode->window_current += req->orig_size; + wb_inode->gen++; + if (!req->ordering.fulfilled) { /* burden increased */ list_add_tail(&req->lie, &wb_inode->liability); req->ordering.lied = 1; - wb_inode->gen++; - uuid_utoa_r(req->gfid, gfid); gf_msg_debug(wb_inode->this->name, 0, "(unique=%" PRIu64 |