diff options
| -rw-r--r-- | extras/group-gluster-block | 1 | ||||
| -rw-r--r-- | tests/bugs/shard/shard-append-test.c | 179 | ||||
| -rw-r--r-- | tests/bugs/shard/shard-append-test.t | 32 | ||||
| -rw-r--r-- | xlators/features/shard/src/shard.c | 109 | 
4 files changed, 278 insertions, 43 deletions
diff --git a/extras/group-gluster-block b/extras/group-gluster-block index 0753d26b3ca..a4a6367920b 100644 --- a/extras/group-gluster-block +++ b/extras/group-gluster-block @@ -2,7 +2,6 @@ performance.quick-read=off  performance.read-ahead=off  performance.io-cache=off  performance.stat-prefetch=off -performance.write-behind=off  performance.open-behind=off  performance.readdir-ahead=off  network.remote-dio=enable diff --git a/tests/bugs/shard/shard-append-test.c b/tests/bugs/shard/shard-append-test.c new file mode 100644 index 00000000000..92dff3d078d --- /dev/null +++ b/tests/bugs/shard/shard-append-test.c @@ -0,0 +1,179 @@ +#include <fcntl.h> +#include <unistd.h> +#include <time.h> +#include <limits.h> +#include <string.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <glusterfs/api/glfs.h> +#include <glusterfs/api/glfs-handles.h> + +#define LOG_ERR(msg) do { \ +        fprintf (stderr, "%s : Error (%s)\n", msg, strerror (errno)); \ +        } while (0) + +/*This test tests that shard xlator handles offset in appending writes + * correctly. This test performs writes of 1025 bytes 1025 times, in 5 threads + * with different threads. The buffer to be written is same character repeated + * 1025 times in the buffer for a thread. At the end it reads the buffer till + * end of file and tests that the read of 1025 bytes is always same character + * and the content read is 5*1025*1025 size. 1025 bytes is chosen because it + * will lead to write on more than one shard at some point when the size is + * going over the initial shard*/ +pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; +int thread_data = '1'; + +glfs_t * +init_glfs (const char *hostname, const char *volname, +           const char *logfile) +{ +        int     ret     = -1; +        glfs_t *fs      = NULL; + +        fs = glfs_new (volname); +        if (!fs) { +                LOG_ERR ("glfs_new failed"); +                return NULL; +        } + +        ret = glfs_set_volfile_server (fs, "tcp", hostname, 24007); +        if (ret < 0) { +                LOG_ERR ("glfs_set_volfile_server failed"); +                goto out; +        } + +        ret = glfs_set_logging (fs, logfile, 7); +        if (ret < 0) { +                LOG_ERR ("glfs_set_logging failed"); +                goto out; +        } + +        ret = glfs_init (fs); +        if (ret < 0) { +                LOG_ERR ("glfs_init failed"); +                goto out; +        } + +        ret = 0; +out: +        if (ret) { +                glfs_fini (fs); +                fs = NULL; +        } + +        return fs; +} + +void* +write_data (void *data) +{ +        char           buf[1025] = {0}; +        glfs_fd_t      *glfd = NULL; +        glfs_t         *fs       = data; +        int            i     = 0; + +        pthread_mutex_lock (&lock); +        { +                memset(buf, thread_data, sizeof(buf)); +                thread_data++; +        } +        pthread_mutex_unlock (&lock); + +        for (i = 0; i < 1025; i++) { +                glfd = glfs_creat(fs, "parallel-write.txt", O_WRONLY | O_APPEND, +                                   S_IRUSR | S_IWUSR | O_SYNC); +                if (!glfd) { +                        LOG_ERR ("Failed to create file"); +                        exit(1); +                } + +                if (glfs_write (glfd, buf, sizeof(buf), 0) < 0) { +                        LOG_ERR ("Failed to write to file"); +                        exit(1); +                } +                if (glfs_close(glfd) != 0) { +                        LOG_ERR ("Failed to close file"); +                        exit(1); +                } +        } +        return NULL; +} + +int +main (int argc, char *argv[]) +{ +        pthread_t  tid[5] = {0}; +        char       buf[1025] = {0}; +        char       cmp_buf[1025] = {0}; +        int         ret      = 0; +        char       *hostname = NULL; +        char       *volname  = NULL; +        char       *logfile  = NULL; +        glfs_t     *fs       = NULL; +        glfs_fd_t  *glfd     = NULL; +        ssize_t     bytes_read = 0; +        ssize_t     total_bytes_read = 0; +        int i = 0; + +        if (argc != 4) { +                fprintf (stderr, "Invalid argument\n"); +                exit(1); +        } + +        hostname = argv[1]; +        volname = argv[2]; +        logfile = argv[3]; + +        fs = init_glfs (hostname, volname, logfile); +        if (fs == NULL) { +                LOG_ERR ("init_glfs failed"); +                return -1; +        } + +        for (i = 0; i < 5; i++) { +                pthread_create(&tid[i], NULL, write_data, fs); +        } + +        for (i = 0; i < 5; i++) { +                pthread_join(tid[i], NULL); +        } +        glfd = glfs_open(fs, "parallel-write.txt", O_RDONLY); +        if (!glfd) { +                LOG_ERR ("Failed to open file for reading"); +                exit(1); +        } + +        while ((bytes_read = glfs_read (glfd, buf, sizeof(buf), 0)) > 0) { +                if (bytes_read != sizeof(buf)) { +                        fprintf (stderr, "Didn't read complete data read: %zd " +                                 "expected: %lu", bytes_read, sizeof(buf)); +                        exit(1); +                } + +                total_bytes_read += bytes_read; +                if (buf[0] < '1' || buf[0] >= thread_data) { +                        fprintf(stderr, "Invalid character found: %c", buf[0]); +                        exit(1); +                } +                memset(cmp_buf, buf[0], sizeof(cmp_buf)); +                if (memcmp(cmp_buf, buf, sizeof(cmp_buf))) { +                        LOG_ERR ("Data corrupted"); +                        exit(1); +                } +                memset(cmp_buf, 0, sizeof(cmp_buf)); +        } + +        if (total_bytes_read != 5*1025*1025) { +                fprintf(stderr, "Failed to read what is written, read; %zd, " +                        "expected %zu", total_bytes_read, 5*1025*1025); +                exit(1); +        } + +        if (glfs_close(glfd) != 0) { +                LOG_ERR ("Failed to close"); +                exit(1); +        } +        return 0; +} diff --git a/tests/bugs/shard/shard-append-test.t b/tests/bugs/shard/shard-append-test.t new file mode 100644 index 00000000000..f8719f2a2c1 --- /dev/null +++ b/tests/bugs/shard/shard-append-test.t @@ -0,0 +1,32 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup; + +TEST glusterd + +TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{1,2,3}; +TEST $CLI volume set $V0 features.shard on +TEST $CLI volume set $V0 features.shard-block-size 4MB +TEST $CLI volume set $V0 performance.quick-read off +TEST $CLI volume set $V0 performance.io-cache off + +#Uncomment the following line after shard-queuing is implemented +#TEST $CLI volume set $V0 performance.write-behind off + +TEST $CLI volume set $V0 performance.strict-o-direct on +TEST $CLI volume set $V0 performance.stat-prefetch off +TEST $CLI volume set $V0 performance.read-ahead off +TEST $CLI volume start $V0; + +logdir=`gluster --print-logdir` + +TEST build_tester $(dirname $0)/shard-append-test.c -lgfapi -lpthread + +TEST ./$(dirname $0)/shard-append-test ${H0} $V0 $logdir/shard-append-test.log + +cleanup_tester $(dirname $0)/shard-append-test + +cleanup; diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index eaeb840e86d..d7526339591 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -3631,6 +3631,18 @@ shard_common_inode_write_post_update_size_handler (call_frame_t *frame,          return 0;  } +static gf_boolean_t +shard_is_appending_write (shard_local_t *local) +{ +        if (local->fop != GF_FOP_WRITE) +                return _gf_false; +        if (local->flags & O_APPEND) +                return _gf_true; +        if (local->fd->flags & O_APPEND) +                return _gf_true; +        return _gf_false; +} +  int  __shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode,                                         xlator_t *this) @@ -3645,13 +3657,15 @@ __shard_get_delta_size_from_inode_ctx (shard_local_t *local, inode_t *inode,          ctx = (shard_inode_ctx_t *) ctx_uint; -        if (local->offset + local->total_size > ctx->stat.ia_size) { +        if (shard_is_appending_write (local)) { +                local->delta_size = local->total_size; +        } else if (local->offset + local->total_size > ctx->stat.ia_size) {                  local->delta_size = (local->offset + local->total_size) -                                      ctx->stat.ia_size; -                ctx->stat.ia_size += (local->delta_size);          } else {                  local->delta_size = 0;          } +        ctx->stat.ia_size += (local->delta_size);          local->postbuf = ctx->stat;          return 0; @@ -3957,8 +3971,11 @@ shard_common_inode_write_post_mknod_handler (call_frame_t *frame,  }  int -shard_common_inode_write_post_lookup_handler (call_frame_t *frame, -                                              xlator_t *this) +shard_mkdir_dot_shard (call_frame_t *frame, xlator_t *this, +                       shard_post_resolve_fop_handler_t handler); +int +shard_common_inode_write_post_resolve_handler (call_frame_t *frame, +                                               xlator_t *this)  {          shard_local_t *local = NULL; @@ -3971,8 +3988,6 @@ shard_common_inode_write_post_lookup_handler (call_frame_t *frame,                  return 0;          } -        local->postbuf = local->prebuf; -          if (local->call_count) {                  shard_common_lookup_shards (frame, this,                                              local->resolver_base_inode, @@ -3985,12 +4000,11 @@ shard_common_inode_write_post_lookup_handler (call_frame_t *frame,  }  int -shard_common_inode_write_post_resolve_handler (call_frame_t *frame, -                                               xlator_t *this) +shard_common_inode_write_post_lookup_handler (call_frame_t *frame, +                                              xlator_t *this)  { -        shard_local_t *local = NULL; - -        local = frame->local; +        shard_local_t *local = frame->local; +        shard_priv_t  *priv  = this->private;          if (local->op_ret < 0) {                  shard_common_inode_write_failure_unwind (local->fop, frame, @@ -3999,8 +4013,46 @@ shard_common_inode_write_post_resolve_handler (call_frame_t *frame,                  return 0;          } -        shard_lookup_base_file (frame, this, &local->loc, -                                shard_common_inode_write_post_lookup_handler); +        local->postbuf = local->prebuf; + +        /*Adjust offset to EOF so that correct shard is chosen for append*/ +        if (shard_is_appending_write (local)) +                local->offset = local->prebuf.ia_size; + +        local->first_block = get_lowest_block (local->offset, +                                               local->block_size); +        local->last_block = get_highest_block (local->offset, local->total_size, +                                               local->block_size); +        local->num_blocks = local->last_block - local->first_block + 1; +        local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), +                                       gf_shard_mt_inode_list); +        if (!local->inode_list) { +                shard_common_inode_write_failure_unwind (local->fop, frame, +                                                         -1, ENOMEM); +                return 0; +        } + +        gf_msg_trace (this->name, 0, "%s: gfid=%s first_block=%"PRIu32" " +                      "last_block=%"PRIu32" num_blocks=%"PRIu32" offset=%"PRId64 +                      " total_size=%zu flags=%"PRId32"", +                      gf_fop_list[local->fop], +                      uuid_utoa (local->resolver_base_inode->gfid), +                      local->first_block, local->last_block, local->num_blocks, +                      local->offset, local->total_size, local->flags); + +        local->dot_shard_loc.inode = inode_find (this->itable, +                                                 priv->dot_shard_gfid); + +        if (!local->dot_shard_loc.inode) { +                /*change handler*/ +                shard_mkdir_dot_shard (frame, this, +                                 shard_common_inode_write_post_resolve_handler); +        } else { +                /*change handler*/ +                local->post_res_handler = +                                shard_common_inode_write_post_resolve_handler; +                shard_refresh_dot_shard (frame, this); +        }          return 0;  } @@ -4699,9 +4751,6 @@ shard_common_inode_write_begin (call_frame_t *frame, xlator_t *this,          int             i              = 0;          uint64_t        block_size     = 0;          shard_local_t  *local          = NULL; -        shard_priv_t   *priv           = NULL; - -        priv = this->private;          ret = shard_inode_ctx_get_block_size (fd->inode, this, &block_size);          if (ret) { @@ -4777,37 +4826,13 @@ shard_common_inode_write_begin (call_frame_t *frame, xlator_t *this,                  local->iobref = iobref_ref (iobref);          local->fd = fd_ref (fd);          local->block_size = block_size; -        local->first_block = get_lowest_block (offset, local->block_size); -        local->last_block = get_highest_block (offset, local->total_size, -                                               local->block_size); -        local->num_blocks = local->last_block - local->first_block + 1;          local->resolver_base_inode = local->fd->inode; -        local->inode_list = GF_CALLOC (local->num_blocks, sizeof (inode_t *), -                                       gf_shard_mt_inode_list); -        if (!local->inode_list) -                goto out;          local->loc.inode = inode_ref (fd->inode);          gf_uuid_copy (local->loc.gfid, fd->inode->gfid); -        gf_msg_trace (this->name, 0, "%s: gfid=%s first_block=%"PRIu32" " -                      "last_block=%"PRIu32" num_blocks=%"PRIu32" offset=%"PRId64"" -                      " total_size=%zu flags=%"PRId32"", gf_fop_list[fop], -                      uuid_utoa (fd->inode->gfid), local->first_block, -                      local->last_block, local->num_blocks, offset, -                      local->total_size, local->flags); - -        local->dot_shard_loc.inode = inode_find (this->itable, -                                                 priv->dot_shard_gfid); - -        if (!local->dot_shard_loc.inode) { -                shard_mkdir_dot_shard (frame, this, -                                 shard_common_inode_write_post_resolve_handler); -        } else { -                local->post_res_handler = shard_common_inode_write_post_resolve_handler; -                shard_refresh_dot_shard (frame, this); -        } - +        shard_lookup_base_file (frame, this, &local->loc, +                                shard_common_inode_write_post_lookup_handler);          return 0;  out:          shard_common_inode_write_failure_unwind (fop, frame, -1, ENOMEM);  | 
