diff options
author | Brian Foster <bfoster@redhat.com> | 2012-05-07 13:53:31 -0400 |
---|---|---|
committer | Anand Avati <avati@redhat.com> | 2012-06-07 19:07:52 -0700 |
commit | cd439e79ca7b3b26b11fb894220550156936c354 (patch) | |
tree | 45a4db5abd538348e21502ff828f0fe0b6dac916 | |
parent | ed648c3b393ec06d0da7c1a9af42286fb3cc978e (diff) |
cluster/stripe: implement the coalesce stripe file format
The coalesce file format for cluster/stripe condenses the striped
files to a contiguous layout. The elimination of holes in striped
files eliminates space wasted via local filesystem preallocation
heuristics and significantly improves read performance.
Coalesce mode is implemented with a new 'coalesce' xlator option,
which is user-configurable and disabled by default. The format of
newly created files is marked with a new 'stripe-coalesce' xattr.
Cluster/stripe handles/preserves the format of files regardless
of the current mode of operation (i.e., a volume can
simultaneously consist of coalesced and non-coalesced files).
Files without the stripe-coalesce attribute are assumed to have
the traditional format to provide backward compatibility.
extras/stripe-merge: support traditional and coalesce stripe formats
Update the stripe-merge recovery tool to handle the traditional
and coalesced file formats. The format of the file is detected
automatically (and verified) via the stripe-coalesce attributes.
BUG: 801887
Change-Id: I682f0b4e819f496ddb68c9a01c4de4688280fdf8
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-on: http://review.gluster.com/3282
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Amar Tumballi <amarts@redhat.com>
Reviewed-by: Anand Avati <avati@redhat.com>
-rw-r--r-- | extras/stripe-merge.c | 496 | ||||
-rw-r--r-- | xlators/cluster/stripe/src/stripe-helpers.c | 92 | ||||
-rw-r--r-- | xlators/cluster/stripe/src/stripe.c | 339 | ||||
-rw-r--r-- | xlators/cluster/stripe/src/stripe.h | 54 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volgen.c | 1 | ||||
-rw-r--r-- | xlators/storage/posix/src/posix.c | 2 |
6 files changed, 882 insertions, 102 deletions
diff --git a/extras/stripe-merge.c b/extras/stripe-merge.c index 3f8e4b1244d..32768badd36 100644 --- a/extras/stripe-merge.c +++ b/extras/stripe-merge.c @@ -1,48 +1,498 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +/* + * stripe-merge.c + * + * This program recovers an original file based on the striped files stored on + * the individual bricks of a striped volume. The file format and stripe + * geometry is validated through the extended attributes stored in the file. + * + * TODO: Support optional xattr recovery (i.e., user xattrs). Perhaps provide a + * command-line flag to toggle this behavior. + */ + #include <stdio.h> -#include <unistd.h> -#include <fcntl.h> #include <sys/types.h> #include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <errno.h> +#include <string.h> +#include <attr/xattr.h> +#include <fnmatch.h> -int -main (int argc, char *argv[]) +#define ATTRNAME_STRIPE_INDEX "trusted.*.stripe-index" +#define ATTRNAME_STRIPE_COUNT "trusted.*.stripe-count" +#define ATTRNAME_STRIPE_SIZE "trusted.*.stripe-size" +#define ATTRNAME_STRIPE_COALESCE "trusted.*.stripe-coalesce" + +#define INVALID_FD -1 +#define INVALID_MODE UINT32_MAX + +struct file_stripe_info { + int stripe_count; + int stripe_size; + int coalesce; + mode_t mode; + int fd[0]; +}; + +static int close_files(struct file_stripe_info *); + +static struct +file_stripe_info *alloc_file_stripe_info(int count) { - int fds[argc-1]; - char buf[argc-1][4096]; int i; - int max_ret, ret; + struct file_stripe_info *finfo; - if (argc < 2) { - printf ("Usage: %s file1 file2 ... >file\n", argv[0]); - return 1; + finfo = calloc(1, sizeof(struct file_stripe_info) + + (sizeof(int) * count)); + if (!finfo) + return NULL; + + for (i = 0; i < count; i++) + finfo->fd[i] = INVALID_FD; + + finfo->mode = INVALID_MODE; + finfo->coalesce = INVALID_FD; + + return finfo; +} + +/* + * Search for an attribute matching the provided pattern. Return a count for + * the total number of matching entries (including 0). Allocate a buffer for + * the first matching entry found. + */ +static int +get_stripe_attr_name(const char *path, const char *pattern, char **attrname) +{ + char attrbuf[4096]; + char *ptr, *match = NULL; + int len, r, match_count = 0; + + if (!path || !pattern || !attrname) + return -1; + + len = listxattr(path, attrbuf, sizeof(attrbuf)); + if (len < 0) + return len; + + ptr = attrbuf; + while (ptr) { + r = fnmatch(pattern, ptr, 0); + if (!r) { + if (!match) + match = ptr; + match_count++; + } else if (r != FNM_NOMATCH) { + return -1; + } + + len -= strlen(ptr) + 1; + if (len > 0) + ptr += strlen(ptr) + 1; + else + ptr = NULL; } - for (i=0; i<argc-1; i++) { - fds[i] = open (argv[i+1], O_RDONLY); - if (fds[i] == -1) { - perror (argv[i+1]); - return 1; + if (match) + *attrname = strdup(match); + + return match_count; +} + +/* + * Get the integer representation of a named attribute. + */ +static int +get_stripe_attr_val(const char *path, const char *attr, int *val) +{ + char attrbuf[4096]; + int len; + + if (!path || !attr || !val) + return -1; + + len = getxattr(path, attr, attrbuf, sizeof(attrbuf)); + if (len < 0) + return len; + + *val = atoi(attrbuf); + + return 0; +} + +/* + * Get an attribute name/value (assumed to be an integer) pair based on a + * specified search pattern. A buffer is allocated for the exact attr name + * returned. Optionally, skip the pattern search if a buffer is provided + * (which should contain an attribute name). + * + * Returns the attribute count or -1 on error. The value parameter is set only + * when a single attribute is found. + */ +static int +get_attr(const char *path, const char *pattern, char **buf, int *val) +{ + int count = 1; + + if (!buf) + return -1; + + if (!*buf) { + count = get_stripe_attr_name(path, pattern, buf); + if (count > 1) { + /* pattern isn't good enough */ + fprintf(stderr, "ERROR: duplicate attributes found " + "matching pattern: %s\n", pattern); + free(*buf); + *buf = NULL; + return count; + } else if (count < 1) { + return count; } } - max_ret = 0; + if (get_stripe_attr_val(path, *buf, val) < 0) + return -1; + + return count; +} + +/* + * validate_and_open_files() + * + * Open the provided source files and validate the extended attributes. Verify + * that the geometric attributes are consistent across all of the files and + * print a warning if any files are missing. We proceed without error in the + * latter case to support partial recovery. + */ +static struct +file_stripe_info *validate_and_open_files(char *paths[], int count) +{ + int i, val, tmp; + struct stat sbuf; + char *stripe_count_attr = NULL; + char *stripe_size_attr = NULL; + char *stripe_index_attr = NULL; + char *stripe_coalesce_attr = NULL; + struct file_stripe_info *finfo = NULL; + + for (i = 0; i < count; i++) { + if (!paths[i]) + goto err; + + /* + * Check the stripe count first so we can allocate the info + * struct with the appropriate number of fds. + */ + if (get_attr(paths[i], ATTRNAME_STRIPE_COUNT, + &stripe_count_attr, &val) != 1) { + fprintf(stderr, "ERROR: %s: attribute: '%s'\n", + paths[i], ATTRNAME_STRIPE_COUNT); + goto err; + } + if (!finfo) { + finfo = alloc_file_stripe_info(val); + if (!finfo) + goto err; + + if (val != count) + fprintf(stderr, "WARNING: %s: stripe-count " + "(%d) != file count (%d). Result may " + "be incomplete.\n", paths[i], val, + count); + + finfo->stripe_count = val; + } else if (val != finfo->stripe_count) { + fprintf(stderr, "ERROR %s: invalid stripe count: %d " + "(expected %d)\n", paths[i], val, + finfo->stripe_count); + goto err; + } + + /* + * Get and validate the chunk size. + */ + if (get_attr(paths[i], ATTRNAME_STRIPE_SIZE, &stripe_size_attr, + &val) != 1) { + fprintf(stderr, "ERROR: %s: attribute: '%s'\n", + paths[i], ATTRNAME_STRIPE_SIZE); + goto err; + } + + if (!finfo->stripe_size) { + finfo->stripe_size = val; + } else if (val != finfo->stripe_size) { + fprintf(stderr, "ERROR: %s: invalid stripe size: %d " + "(expected %d)\n", paths[i], val, + finfo->stripe_size); + goto err; + } + + /* + * stripe-coalesce is a backward compatible attribute. If the + * attribute does not exist, assume a value of zero for the + * traditional stripe format. + */ + tmp = get_attr(paths[i], ATTRNAME_STRIPE_COALESCE, + &stripe_coalesce_attr, &val); + if (!tmp) { + val = 0; + } else if (tmp != 1) { + fprintf(stderr, "ERROR: %s: attribute: '%s'\n", + paths[i], ATTRNAME_STRIPE_COALESCE); + goto err; + } + + if (finfo->coalesce == INVALID_FD) { + finfo->coalesce = val; + } else if (val != finfo->coalesce) { + fprintf(stderr, "ERROR: %s: invalid coalesce flag\n", + paths[i]); + goto err; + } + + /* + * Get/validate the stripe index and open the file in the + * appropriate fd slot. + */ + if (get_attr(paths[i], ATTRNAME_STRIPE_INDEX, + &stripe_index_attr, &val) != 1) { + fprintf(stderr, "ERROR: %s: attribute: '%s'\n", + paths[i], ATTRNAME_STRIPE_INDEX); + goto err; + } + if (finfo->fd[val] != INVALID_FD) { + fprintf(stderr, "ERROR: %s: duplicate stripe index: " + "%d\n", paths[i], val); + goto err; + } + + finfo->fd[val] = open(paths[i], O_RDONLY); + if (finfo->fd[val] < 0) + goto err; + + /* + * Get the creation mode for the file. + */ + if (fstat(finfo->fd[val], &sbuf) < 0) + goto err; + if (finfo->mode == INVALID_MODE) { + finfo->mode = sbuf.st_mode; + } else if (sbuf.st_mode != finfo->mode) { + fprintf(stderr, "ERROR: %s: invalid mode\n", paths[i]); + goto err; + } + } + + free(stripe_count_attr); + free(stripe_size_attr); + free(stripe_index_attr); + free(stripe_coalesce_attr); + + return finfo; +err: + + if (stripe_count_attr) + free(stripe_count_attr); + if (stripe_size_attr) + free(stripe_size_attr); + if (stripe_index_attr) + free(stripe_index_attr); + if (stripe_coalesce_attr) + free(stripe_coalesce_attr); + + if (finfo) { + close_files(finfo); + free(finfo); + } + + return NULL; +} + +static int +close_files(struct file_stripe_info *finfo) +{ + int i, ret; + + if (!finfo) + return -1; + + for (i = 0; i < finfo->stripe_count; i++) { + if (finfo->fd[i] == INVALID_FD) + continue; + + ret = close(finfo->fd[i]); + if (ret < 0) + return ret; + } + + return ret; +} + +/* + * Generate the original file using files striped in the coalesced format. + * Data in the striped files is stored at a coalesced offset based on the + * stripe number. + * + * Walk through the finfo fds (which are already ordered) and and iteratively + * copy stripe_size bytes from the source files to the target file. If a source + * file is missing, seek past the associated stripe_size bytes in the target + * file. + */ +static int +generate_file_coalesce(int target, struct file_stripe_info *finfo) +{ + char *buf; + int ret = 0; + int r, w, i; + + buf = malloc(finfo->stripe_size); + if (!buf) + return -1; + + i = 0; + while (1) { + if (finfo->fd[i] == INVALID_FD) { + if (lseek(target, finfo->stripe_size, SEEK_CUR) < 0) + break; + + i = (i + 1) % finfo->stripe_count; + continue; + } + + r = read(finfo->fd[i], buf, finfo->stripe_size); + if (r < 0) { + ret = r; + break; + } + if (!r) + break; + + w = write(target, buf, r); + if (w < 0) { + ret = w; + break; + } + + i = (i + 1) % finfo->stripe_count; + } + + free(buf); + return ret; +} + +/* + * Generate the original file using files striped with the traditional stripe + * format. Data in the striped files is stored at the equivalent offset from + * the source file. + */ +static int +generate_file_traditional(int target, struct file_stripe_info *finfo) +{ + int i, j, max_ret, ret; + char buf[finfo->stripe_count][4096]; + do { char newbuf[4096] = {0, }; - int j; max_ret = 0; - for (i=0; i<argc-1; i++) { - memset (buf[i], 0, 4096); - ret = read (fds[i], buf[i], 4096); + for (i = 0; i < finfo->stripe_count; i++) { + memset(buf[i], 0, 4096); + ret = read(finfo->fd[i], buf[i], 4096); if (ret > max_ret) max_ret = ret; } - for (i=0; i<max_ret;i++) - for (j=0; j<argc-1; j++) + for (i = 0; i < max_ret; i++) + for (j = 0; j < finfo->stripe_count; j++) newbuf[i] |= buf[j][i]; - write (1, newbuf, max_ret); + write(target, newbuf, max_ret); } while (max_ret); return 0; } +static int +generate_file(int target, struct file_stripe_info *finfo) +{ + if (finfo->coalesce) + return generate_file_coalesce(target, finfo); + + return generate_file_traditional(target, finfo); +} + +static void +usage(char *name) +{ + fprintf(stderr, "Usage: %s [-o <outputfile>] <inputfile1> " + "<inputfile2> ...\n", name); +} + +int +main(int argc, char *argv[]) +{ + int file_count, opt; + char *opath = NULL; + int targetfd; + struct file_stripe_info *finfo; + + while ((opt = getopt(argc, argv, "o:")) != -1) { + switch (opt) { + case 'o': + opath = optarg; + break; + default: + usage(argv[0]); + return -1; + } + } + + file_count = argc - optind; + + if (!opath || !file_count) { + usage(argv[0]); + return -1; + } + + finfo = validate_and_open_files(&argv[optind], file_count); + if (!finfo) + goto err; + + targetfd = open(opath, O_RDWR|O_CREAT, finfo->mode); + if (targetfd < 0) + goto err; + + if (generate_file(targetfd, finfo) < 0) + goto err; + + if (fsync(targetfd) < 0) + fprintf(stderr, "ERROR: %s\n", strerror(errno)); + if (close(targetfd) < 0) + fprintf(stderr, "ERROR: %s\n", strerror(errno)); + + close_files(finfo); + free(finfo); + + return 0; + +err: + if (finfo) { + close_files(finfo); + free(finfo); + } + + return -1; +} + diff --git a/xlators/cluster/stripe/src/stripe-helpers.c b/xlators/cluster/stripe/src/stripe-helpers.c index 1821832c20e..336da793e55 100644 --- a/xlators/cluster/stripe/src/stripe-helpers.c +++ b/xlators/cluster/stripe/src/stripe-helpers.c @@ -236,8 +236,6 @@ out: return block_size; } - - int32_t stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local, dict_t *dict) @@ -246,7 +244,6 @@ stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local, data_t *data = NULL; int32_t index = 0; stripe_private_t *priv = NULL; - int32_t ret = -1; priv = this->private; @@ -343,14 +340,31 @@ stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local, if (!local->fctx->xl_array[index]) local->fctx->xl_array[index] = prev->this; } - ret = 0; + + sprintf(key, "trusted.%s.stripe-coalesce", this->name); + data = dict_get(dict, key); + if (!data) { + /* + * The file was probably created prior to coalesce support. + * Assume non-coalesce mode for this file to maintain backwards + * compatibility. + */ + gf_log(this->name, GF_LOG_DEBUG, "missing stripe-coalesce " + "attr, assume non-coalesce mode"); + local->fctx->stripe_coalesce = 0; + } else { + local->fctx->stripe_coalesce = data_to_int32(data); + } + + out: - return ret; + return 0; } int32_t stripe_xattr_request_build (xlator_t *this, dict_t *dict, uint64_t stripe_size, - uint32_t stripe_count, uint32_t stripe_index) + uint32_t stripe_count, uint32_t stripe_index, + uint32_t stripe_coalesce) { char key[256] = {0,}; int32_t ret = -1; @@ -378,6 +392,14 @@ stripe_xattr_request_build (xlator_t *this, dict_t *dict, uint64_t stripe_size, "failed to set %s in xattr_req dict", key); goto out; } + + sprintf(key, "trusted.%s.stripe-coalesce", this->name); + ret = dict_set_int32(dict, key, stripe_coalesce); + if (ret) { + gf_log(this->name, GF_LOG_WARNING, + "failed to set %s in xattr_req_dict", key); + goto out; + } out: return ret; } @@ -508,3 +530,61 @@ stripe_iatt_merge (struct iatt *from, struct iatt *to) to->ia_atime = from->ia_atime; return 0; } + +off_t +coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count) +{ + size_t line_size = 0; + uint64_t stripe_num = 0; + off_t coalesced_offset = 0; + + line_size = stripe_size * stripe_count; + stripe_num = offset / line_size; + + coalesced_offset = (stripe_num * stripe_size) + + (offset % stripe_size); + + return coalesced_offset; +} + +off_t +uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count, + int stripe_index) +{ + uint64_t nr_full_stripe_chunks = 0, mod = 0; + + if (!size) + return size; + + /* + * Estimate the number of fully written stripes from the + * local file size. Each stripe_size chunk corresponds to + * a stripe. + */ + nr_full_stripe_chunks = (size / stripe_size) * stripe_count; + mod = size % stripe_size; + + if (!mod) { + /* + * There is no remainder, thus we could have overestimated + * the size of the file in terms of chunks. Trim the number + * of chunks by the following stripe members and leave it + * up to those nodes to respond with a larger size (if + * necessary). + */ + nr_full_stripe_chunks -= stripe_count - + (stripe_index + 1); + size = nr_full_stripe_chunks * stripe_size; + } else { + /* + * There is a remainder and thus we own the last chunk of the + * file. Add the preceding stripe members of the final stripe + * along with the remainder to calculate the exact size. + */ + nr_full_stripe_chunks += stripe_index; + size = nr_full_stripe_chunks * stripe_size + mod; + } + + return size; +} + diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c index a98e14e9508..efee9444e9e 100644 --- a/xlators/cluster/stripe/src/stripe.c +++ b/xlators/cluster/stripe/src/stripe.c @@ -32,7 +32,6 @@ struct volume_options options[]; - int32_t stripe_sh_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, @@ -237,6 +236,8 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->stbuf_blocks += buf->ia_blocks; local->postparent_blocks += postparent->ia_blocks; + correct_file_size(buf, local->fctx, prev); + if (local->stbuf_size < buf->ia_size) local->stbuf_size = buf->ia_size; if (local->postparent_size < postparent->ia_size) @@ -326,9 +327,19 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, /* get stripe-size xattr on lookup. This would be required for * open/read/write/pathinfo calls. Hence we send down the request * even when type == IA_INVAL */ + + /* + * We aren't guaranteed to have xdata here. We need the format info for + * the file, so allocate xdata if necessary. + */ + if (!xdata) + xdata = dict_new(); + else + xdata = dict_ref(xdata); + if (xdata && (IA_ISREG (loc->inode->ia_type) || (loc->inode->ia_type == IA_INVAL))) { - ret = stripe_xattr_request_build (this, xdata, 8, 4, 4); + ret = stripe_xattr_request_build (this, xdata, 8, 4, 4, 0); if (ret) gf_log (this->name , GF_LOG_ERROR, "Failed to build" " xattr request for %s", loc->path); @@ -344,6 +355,8 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, trav = trav->next; } + dict_unref(xdata); + return 0; err: STRIPE_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); @@ -388,6 +401,9 @@ stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } local->stbuf_blocks += buf->ia_blocks; + + correct_file_size(buf, local->fctx, prev); + if (local->stbuf_size < buf->ia_size) local->stbuf_size = buf->ia_size; } @@ -416,6 +432,7 @@ stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) xlator_list_t *trav = NULL; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; + stripe_fd_ctx_t *fctx = NULL; int32_t op_errno = EINVAL; VALIDATE_OR_GOTO (frame, err); @@ -442,6 +459,13 @@ stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) frame->local = local; local->call_count = priv->child_count; + if (IA_ISREG(loc->inode->ia_type)) { + inode_ctx_get(loc->inode, this, (uint64_t *) &fctx); + if (!fctx) + goto err; + local->fctx = fctx; + } + while (trav) { STACK_WIND (frame, stripe_stat_cbk, trav->xlator, trav->xlator->fops->stat, loc, NULL); @@ -583,6 +607,9 @@ stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->prebuf_blocks += prebuf->ia_blocks; local->postbuf_blocks += postbuf->ia_blocks; + correct_file_size(prebuf, local->fctx, prev); + correct_file_size(postbuf, local->fctx, prev); + if (local->prebuf_size < prebuf->ia_size) local->prebuf_size = prebuf->ia_size; @@ -614,10 +641,12 @@ out: int32_t stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata) { - xlator_list_t *trav = NULL; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; + stripe_fd_ctx_t *fctx = NULL; int32_t op_errno = EINVAL; + int i, eof_idx; + off_t dest_offset, tmp_offset; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -626,7 +655,6 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, VALIDATE_OR_GOTO (loc->inode, err); priv = this->private; - trav = this->children; if (priv->first_child_down) { op_errno = ENOTCONN; @@ -643,11 +671,51 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, frame->local = local; local->call_count = priv->child_count; - while (trav) { - STACK_WIND (frame, stripe_truncate_cbk, trav->xlator, - trav->xlator->fops->truncate, loc, offset, NULL); - trav = trav->next; - } + inode_ctx_get(loc->inode, this, (uint64_t *) &fctx); + if (!fctx) { + gf_log(this->name, GF_LOG_ERROR, "no stripe context"); + op_errno = EINVAL; + goto err; + } + + local->fctx = fctx; + eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count; + + for (i = 0; i < fctx->stripe_count; i++) { + if (!fctx->xl_array[i]) { + gf_log(this->name, GF_LOG_ERROR, + "no xlator at index %d", i); + op_errno = EINVAL; + goto err; + } + + if (fctx->stripe_coalesce) { + /* + * The node that owns EOF is truncated to the exact + * coalesced offset. Nodes prior to this index should + * be rounded up to the size of the complete stripe, + * while nodes after this index should be rounded down + * to the size of the previous stripe. + */ + if (i < eof_idx) + tmp_offset = roof(offset, fctx->stripe_size * + fctx->stripe_count); + else if (i > eof_idx) + tmp_offset = floor(offset, fctx->stripe_size * + fctx->stripe_count); + else + tmp_offset = offset; + + dest_offset = coalesced_offset(tmp_offset, + fctx->stripe_size, fctx->stripe_count); + } else { + dest_offset = offset; + } + + STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i], + fctx->xl_array[i]->fops->truncate, loc, dest_offset, + NULL); + } return 0; err: @@ -698,6 +766,9 @@ stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->prebuf_blocks += preop->ia_blocks; local->postbuf_blocks += postop->ia_blocks; + correct_file_size(preop, local->fctx, prev); + correct_file_size(postop, local->fctx, prev); + if (local->prebuf_size < preop->ia_size) local->prebuf_size = preop->ia_size; if (local->postbuf_size < postop->ia_size) @@ -733,6 +804,7 @@ stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, xlator_list_t *trav = NULL; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; + stripe_fd_ctx_t *fctx = NULL; int32_t op_errno = EINVAL; VALIDATE_OR_GOTO (frame, err); @@ -766,6 +838,13 @@ stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, return 0; } + if (IA_ISREG(loc->inode->ia_type)) { + inode_ctx_get(loc->inode, this, (uint64_t *) &fctx); + if (!fctx) + goto err; + local->fctx = fctx; + } + local->call_count = priv->child_count; while (trav) { STACK_WIND (frame, stripe_setattr_cbk, @@ -862,6 +941,8 @@ stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->pre_buf.ia_blocks += prenewparent->ia_blocks; local->post_buf.ia_blocks += postnewparent->ia_blocks; + correct_file_size(buf, local->fctx, prev); + if (local->stbuf.ia_size < buf->ia_size) local->stbuf.ia_size = buf->ia_size; @@ -947,6 +1028,7 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, stripe_private_t *priv = NULL; stripe_local_t *local = NULL; xlator_list_t *trav = NULL; + stripe_fd_ctx_t *fctx = NULL; int32_t op_errno = EINVAL; VALIDATE_OR_GOTO (frame, err); @@ -977,6 +1059,11 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, local->call_count = priv->child_count; + inode_ctx_get(oldloc->inode, this, (uint64_t *) &fctx); + if (!fctx) + goto err; + local->fctx = fctx; + frame->local = local; STACK_WIND (frame, stripe_first_rename_cbk, trav->xlator, @@ -1367,7 +1454,6 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, stripe_private_t *priv = NULL; call_frame_t *prev = NULL; xlator_list_t *trav = NULL; - stripe_fd_ctx_t *fctx = NULL; if (!this || !frame || !frame->local || !cookie) { gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -1399,10 +1485,16 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (uuid_is_null (local->ia_gfid)) uuid_copy (local->ia_gfid, buf->ia_gfid); + if (stripe_ctx_handle(this, prev, local, xdata)) + gf_log(this->name, GF_LOG_ERROR, + "Error getting fctx info from dict"); + local->stbuf_blocks += buf->ia_blocks; local->preparent_blocks += preparent->ia_blocks; local->postparent_blocks += postparent->ia_blocks; + correct_file_size(buf, local->fctx, prev); + if (local->stbuf_size < buf->ia_size) local->stbuf_size = buf->ia_size; if (local->preparent_size < preparent->ia_size) @@ -1441,23 +1533,10 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->postparent.ia_size = local->postparent_size; local->stbuf.ia_size = local->stbuf_size; local->stbuf.ia_blocks = local->stbuf_blocks; - fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t), - gf_stripe_mt_stripe_fd_ctx_t); - if (!fctx) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unwind; - } - - fctx->stripe_size = local->stripe_size; - fctx->stripe_count = priv->child_count; - fctx->static_array = 1; - fctx->xl_array = priv->xl_array; inode_ctx_put (local->inode, this, - (uint64_t)(long)fctx); + (uint64_t)(long) local->fctx); } -unwind: STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno, local->inode, &local->stbuf, &local->preparent, &local->postparent, NULL); @@ -1531,7 +1610,8 @@ stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ret = stripe_xattr_request_build (this, dict, local->stripe_size, - priv->child_count, i); + priv->child_count, i, + priv->coalesce); if (ret) gf_log (this->name, GF_LOG_ERROR, "Failed to build xattr request"); @@ -1579,9 +1659,6 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, stripe_local_t *local = NULL; int32_t op_errno = EINVAL; int32_t i = 0; - char size_key[256] = {0,}; - char index_key[256] = {0,}; - char count_key[256] = {0,}; dict_t *dict = NULL; int ret = 0; int need_unref = 0; @@ -1631,15 +1708,6 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, be looked up */ local->call_count = priv->child_count; - /* Send a setxattr request to nodes where the - files are created */ - sprintf (size_key, - "trusted.%s.stripe-size", this->name); - sprintf (count_key, - "trusted.%s.stripe-count", this->name); - sprintf (index_key, - "trusted.%s.stripe-index", this->name); - if (priv->xattr_supported) { dict = dict_new (); if (!dict) { @@ -1653,7 +1721,7 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, ret = stripe_xattr_request_build (this, dict, local->stripe_size, priv->child_count, - i); + i, priv->coalesce); if (ret) gf_log (this->name, GF_LOG_ERROR, "failed to build xattr request"); @@ -1867,6 +1935,7 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t callcnt = 0; stripe_local_t *local = NULL; call_frame_t *prev = NULL; + stripe_fd_ctx_t *fctx = NULL; if (!this || !frame || !frame->local || !cookie) { gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -1880,6 +1949,14 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, { callcnt = --local->call_count; + inode_ctx_get(inode, this, (uint64_t *) &fctx); + if (!fctx) { + gf_log(this->name, GF_LOG_ERROR, "failed to get stripe " + "context"); + op_ret = -1; + op_errno = EINVAL; + } + if (op_ret == -1) { gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", @@ -1903,6 +1980,8 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->preparent_blocks += preparent->ia_blocks; local->postparent_blocks += postparent->ia_blocks; + correct_file_size(buf, fctx, prev); + if (local->stbuf_size < buf->ia_size) local->stbuf_size = buf->ia_size; if (local->preparent_size < preparent->ia_size) @@ -2023,7 +2102,6 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t callcnt = 0; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; - stripe_fd_ctx_t *fctx = NULL; call_frame_t *prev = NULL; xlator_list_t *trav = NULL; @@ -2049,12 +2127,21 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } if (op_ret >= 0) { + if (IA_ISREG(buf->ia_type)) { + if (stripe_ctx_handle(this, prev, local, xdata)) + gf_log(this->name, GF_LOG_ERROR, + "Error getting fctx info from " + "dict"); + } + local->op_ret = op_ret; local->stbuf_blocks += buf->ia_blocks; local->preparent_blocks += preparent->ia_blocks; local->postparent_blocks += postparent->ia_blocks; + correct_file_size(buf, local->fctx, prev); + if (local->stbuf_size < buf->ia_size) local->stbuf_size = buf->ia_size; if (local->preparent_size < preparent->ia_size) @@ -2092,23 +2179,13 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->stbuf.ia_size = local->stbuf_size; local->stbuf.ia_blocks = local->stbuf_blocks; - fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t), - gf_stripe_mt_stripe_fd_ctx_t); - if (!fctx) { - local->op_ret = -1; - local->op_errno = ENOMEM; - goto unwind; - } - - fctx->stripe_size = local->stripe_size; - fctx->stripe_count = priv->child_count; - fctx->static_array = 1; - fctx->xl_array = priv->xl_array; - inode_ctx_put (local->inode, this, - (uint64_t)(long)fctx); + stripe_copy_xl_array(local->fctx->xl_array, + priv->xl_array, + local->fctx->stripe_count); + inode_ctx_put(local->inode, this, + (uint64_t) local->fctx); } - unwind: /* Create itself has failed.. so return without setxattring */ STRIPE_STACK_UNWIND (create, frame, local->op_ret, @@ -2214,14 +2291,14 @@ stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, ret = stripe_xattr_request_build (this, dict, local->stripe_size, priv->child_count, - i); + i, priv->coalesce); if (ret) gf_log (this->name, GF_LOG_ERROR, "failed to build xattr request"); } else { dict = local->xattr; } - + STACK_WIND (frame, stripe_create_cbk, trav->xlator, trav->xlator->fops->create, &local->loc, local->flags, local->mode, local->umask, local->fd, @@ -2310,7 +2387,7 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc, ret = stripe_xattr_request_build (this, dict, local->stripe_size, priv->child_count, - i); + i, priv->coalesce); if (ret) gf_log (this->name, GF_LOG_ERROR, "failed to build xattr request"); @@ -2743,6 +2820,9 @@ stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->prebuf_blocks += prebuf->ia_blocks; local->postbuf_blocks += postbuf->ia_blocks; + correct_file_size(prebuf, local->fctx, prev); + correct_file_size(postbuf, local->fctx, prev); + if (local->prebuf_size < prebuf->ia_size) local->prebuf_size = prebuf->ia_size; @@ -2777,6 +2857,7 @@ stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict stripe_local_t *local = NULL; stripe_private_t *priv = NULL; xlator_list_t *trav = NULL; + stripe_fd_ctx_t *fctx = NULL; int32_t op_errno = 1; VALIDATE_OR_GOTO (frame, err); @@ -2793,6 +2874,14 @@ stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict op_errno = ENOMEM; goto err; } + + inode_ctx_get(fd->inode, this, (uint64_t *) &fctx); + if (!fctx) { + op_errno = EINVAL; + goto err; + } + local->fctx = fctx; + local->op_ret = -1; frame->local = local; local->call_count = priv->child_count; @@ -2846,6 +2935,9 @@ stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->stbuf = *buf; local->stbuf_blocks += buf->ia_blocks; + + correct_file_size(buf, local->fctx, prev); + if (local->stbuf_size < buf->ia_size) local->stbuf_size = buf->ia_size; } @@ -2877,6 +2969,7 @@ stripe_fstat (call_frame_t *frame, stripe_local_t *local = NULL; stripe_private_t *priv = NULL; xlator_list_t *trav = NULL; + stripe_fd_ctx_t *fctx = NULL; int32_t op_errno = 1; VALIDATE_OR_GOTO (frame, err); @@ -2897,6 +2990,13 @@ stripe_fstat (call_frame_t *frame, frame->local = local; local->call_count = priv->child_count; + if (IA_ISREG(fd->inode->ia_type)) { + inode_ctx_get(fd->inode, this, (uint64_t *) &fctx); + if (!fctx) + goto err; + local->fctx = fctx; + } + while (trav) { STACK_WIND (frame, stripe_fstat_cbk, trav->xlator, trav->xlator->fops->fstat, fd, NULL); @@ -2915,8 +3015,10 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, d { stripe_local_t *local = NULL; stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = 1; + stripe_fd_ctx_t *fctx = NULL; + int i, eof_idx; + off_t dest_offset, tmp_offset; + int32_t op_errno = 1; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -2924,7 +3026,6 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, d VALIDATE_OR_GOTO (fd->inode, err); priv = this->private; - trav = this->children; /* Initialization */ local = mem_get0 (this->local_pool); @@ -2936,11 +3037,49 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, d frame->local = local; local->call_count = priv->child_count; - while (trav) { - STACK_WIND (frame, stripe_truncate_cbk, trav->xlator, - trav->xlator->fops->ftruncate, fd, offset, NULL); - trav = trav->next; - } + inode_ctx_get(fd->inode, this, (uint64_t *) &fctx); + if (!fctx) { + gf_log(this->name, GF_LOG_ERROR, "no stripe context"); + op_errno = EINVAL; + goto err; + } + if (!fctx->stripe_count) { + gf_log(this->name, GF_LOG_ERROR, "no stripe count"); + op_errno = EINVAL; + goto err; + } + + local->fctx = fctx; + eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count; + + for (i = 0; i < fctx->stripe_count; i++) { + if (!fctx->xl_array[i]) { + gf_log(this->name, GF_LOG_ERROR, "no xlator at index " + "%d", i); + op_errno = EINVAL; + goto err; + } + + if (fctx->stripe_coalesce) { + if (i < eof_idx) + tmp_offset = roof(offset, fctx->stripe_size * + fctx->stripe_count); + else if (i > eof_idx) + tmp_offset = floor(offset, fctx->stripe_size * + fctx->stripe_count); + else + tmp_offset = offset; + + dest_offset = coalesced_offset(tmp_offset, + fctx->stripe_size, fctx->stripe_count); + } else { + dest_offset = offset; + } + + STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i], + fctx->xl_array[i]->fops->ftruncate, fd, dest_offset, + NULL); + } return 0; err: @@ -3045,6 +3184,7 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, struct iatt tmp_stbuf = {0,}; struct iobref *tmp_iobref = NULL; struct iobuf *iobuf = NULL; + call_frame_t *prev = NULL; if (!this || !frame || !frame->local) { gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -3052,13 +3192,16 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } local = frame->local; + prev = cookie; LOCK (&frame->lock); { callcnt = --local->call_count; - if (op_ret != -1) + if (op_ret != -1) { + correct_file_size(buf, local->fctx, prev); if (local->stbuf_size < buf->ia_size) local->stbuf_size = buf->ia_size; + } } UNLOCK (&frame->lock); @@ -3150,6 +3293,7 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, struct iatt *tmp_stbuf_p = NULL; //need it for a warning struct iobref *tmp_iobref = NULL; stripe_fd_ctx_t *fctx = NULL; + call_frame_t *prev = NULL; if (!this || !frame || !frame->local || !cookie) { gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); @@ -3158,6 +3302,7 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; index = local->node_index; + prev = cookie; mframe = local->orig_frame; if (!mframe) goto out; @@ -3177,6 +3322,9 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, mlocal->replies[index].stbuf = *stbuf; mlocal->replies[index].count = count; mlocal->replies[index].vector = iov_dup (vector, count); + + correct_file_size(stbuf, fctx, prev); + if (local->stbuf_size < stbuf->ia_size) local->stbuf_size = stbuf->ia_size; local->stbuf_blocks += stbuf->ia_blocks; @@ -3289,6 +3437,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, uint64_t stripe_size = 0; off_t rounded_start = 0; off_t frame_offset = offset; + off_t dest_offset = 0; stripe_local_t *local = NULL; call_frame_t *rframe = NULL; stripe_local_t *rlocal = NULL; @@ -3361,9 +3510,16 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, rlocal->readv_size = frame_size; rframe->local = rlocal; idx = (index % fctx->stripe_count); + + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(frame_offset, + stripe_size, fctx->stripe_count); + else + dest_offset = frame_offset; + STACK_WIND (rframe, stripe_readv_cbk, fctx->xl_array[idx], fctx->xl_array[idx]->fops->readv, - fd, frame_size, frame_offset, flags, xdata); + fd, frame_size, dest_offset, flags, xdata); frame_offset += frame_size; } @@ -3410,11 +3566,27 @@ stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->op_ret += op_ret; local->post_buf = *postbuf; local->pre_buf = *prebuf; + + local->prebuf_blocks += prebuf->ia_blocks; + local->postbuf_blocks += postbuf->ia_blocks; + + correct_file_size(prebuf, local->fctx, prev); + correct_file_size(postbuf, local->fctx, prev); + + if (local->prebuf_size < prebuf->ia_size) + local->prebuf_size = prebuf->ia_size; + if (local->postbuf_size < postbuf->ia_size) + local->postbuf_size = postbuf->ia_size; } } UNLOCK (&frame->lock); if ((callcnt == local->wind_count) && local->unwind) { + local->pre_buf.ia_size = local->prebuf_size; + local->pre_buf.ia_blocks = local->prebuf_blocks; + local->post_buf.ia_size = local->postbuf_size; + local->post_buf.ia_blocks = local->postbuf_blocks; + STRIPE_STACK_UNWIND (writev, frame, local->op_ret, local->op_errno, &local->pre_buf, &local->post_buf, NULL); @@ -3440,6 +3612,7 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t fill_size = 0; uint64_t stripe_size = 0; uint64_t tmp_fctx = 0; + off_t dest_offset = 0; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -3469,6 +3642,7 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, } frame->local = local; local->stripe_size = stripe_size; + local->fctx = fctx; if (!stripe_size) { gf_log (this->name, GF_LOG_DEBUG, @@ -3505,9 +3679,15 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, if (remaining_size == 0) local->unwind = 1; + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(offset + offset_offset, + local->stripe_size, fctx->stripe_count); + else + dest_offset = offset + offset_offset; + STACK_WIND (frame, stripe_writev_cbk, fctx->xl_array[idx], fctx->xl_array[idx]->fops->writev, fd, tmp_vec, - tmp_count, offset + offset_offset, flags, iobref, + tmp_count, dest_offset, flags, iobref, xdata); GF_FREE (tmp_vec); @@ -3859,10 +4039,15 @@ stripe_readdirp_lookup_cbk (call_frame_t *frame, void *cookie, local->op_ret = op_ret; goto unlock; } + + if (stripe_ctx_handle(this, prev, local, xattr)) + gf_log(this->name, GF_LOG_ERROR, + "Error getting fctx info from dict."); + + correct_file_size(stbuf, local->fctx, prev); + stripe_iatt_merge (stbuf, &entry->d_stat); local->stbuf_blocks += stbuf->ia_blocks; - - stripe_ctx_handle (this, prev, local, xattr); } unlock: UNLOCK(&frame->lock); @@ -3957,7 +4142,7 @@ unlock: xattrs = dict_new (); if (xattrs) - (void) stripe_xattr_request_build (this, xattrs, 0, 0, 0); + (void) stripe_xattr_request_build (this, xattrs, 0, 0, 0, 0); count = op_ret; list_for_each_entry_safe (local_entry, tmp_entry, (&local->entries.list), list) { @@ -4165,6 +4350,9 @@ reconfigure (xlator_t *this, dict_t *options) goto unlock; } } + + GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool, + unlock); } unlock: UNLOCK (&priv->lock); @@ -4285,6 +4473,8 @@ init (xlator_t *this) /* notify related */ priv->nodes_down = priv->child_count; + GF_OPTION_INIT("coalesce", priv->coalesce, bool, out); + this->local_pool = mem_pool_new (stripe_local_t, 128); if (!this->local_pool) { ret = -1; @@ -4768,5 +4958,12 @@ struct volume_options options[] = { .type = GF_OPTION_TYPE_BOOL, .default_value = "true" }, + { .key = {"coalesce"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .description = "Enable coalesce mode to flatten striped files as " + "stored on the server (i.e., eliminate holes caused " + "by the traditional format)." + }, { .key = {NULL} }, }; diff --git a/xlators/cluster/stripe/src/stripe.h b/xlators/cluster/stripe/src/stripe.h index cb05eb56fc0..1b9e660c126 100644 --- a/xlators/cluster/stripe/src/stripe.h +++ b/xlators/cluster/stripe/src/stripe.h @@ -101,6 +101,7 @@ struct stripe_private { int8_t child_count; int8_t *state; /* Current state of child node */ gf_boolean_t xattr_supported; /* default yes */ + gf_boolean_t coalesce; char vol_uuid[UUID_SIZE + 1]; }; @@ -119,6 +120,7 @@ struct readv_replies { typedef struct _stripe_fd_ctx { off_t stripe_size; int stripe_count; + int stripe_coalesce; int static_array; xlator_t **xl_array; } stripe_fd_ctx_t; @@ -214,13 +216,41 @@ struct stripe_local { typedef struct stripe_local stripe_local_t; typedef struct stripe_private stripe_private_t; +/* + * Determine the stripe index of a particular frame based on the translator. + */ +static inline int32_t stripe_get_frame_index(stripe_fd_ctx_t *fctx, + call_frame_t *prev) +{ + int32_t i, idx = -1; + + for (i = 0; i < fctx->stripe_count; i++) { + if (fctx->xl_array[i] == prev->this) { + idx = i; + break; + } + } + + return idx; +} + +static inline void stripe_copy_xl_array(xlator_t **dst, xlator_t **src, + int count) +{ + int i; + + for (i = 0; i < count; i++) + dst[i] = src[i]; +} + void stripe_local_wipe (stripe_local_t *local); int32_t stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local, dict_t *dict); void stripe_aggregate_xattr (dict_t *dst, dict_t *src); int32_t stripe_xattr_request_build (xlator_t *this, dict_t *dict, uint64_t stripe_size, uint32_t stripe_count, - uint32_t stripe_index); + uint32_t stripe_index, + uint32_t stripe_coalesce); int32_t stripe_get_matching_bs (const char *path, stripe_private_t *priv); int set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data); int32_t stripe_iatt_merge (struct iatt *from, struct iatt *to); @@ -229,5 +259,27 @@ int32_t stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local, int32_t stripe_free_xattr_str (stripe_local_t *local); int32_t stripe_xattr_aggregate (char *buffer, stripe_local_t *local, int32_t *total); +off_t coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count); +off_t uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count, + int stripe_index); + +/* + * Adjust the size attribute for files if coalesce is enabled. + */ +static inline void correct_file_size(struct iatt *buf, stripe_fd_ctx_t *fctx, + call_frame_t *prev) +{ + int index; + + if (!IA_ISREG(buf->ia_type)) + return; + + if (!fctx || !fctx->stripe_coalesce) + return; + + index = stripe_get_frame_index(fctx, prev); + buf->ia_size = uncoalesced_size(buf->ia_size, fctx->stripe_size, + fctx->stripe_count, index); +} #endif /* _STRIPE_H_ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 2a97e40522a..a01d2a773eb 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -141,6 +141,7 @@ static struct volopt_map_entry glusterd_volopt_map[] = { {"cluster.choose-local", "cluster/replicate", NULL, NULL, DOC, 0}, {"cluster.stripe-block-size", "cluster/stripe", "block-size", NULL, DOC, 0}, + {"cluster.stripe-coalesce", "cluster/stripe", "coalesce", NULL, DOC, 0}, {VKEY_DIAG_LAT_MEASUREMENT, "debug/io-stats", "latency-measurement", "off", NO_DOC, 0}, {"diagnostics.dump-fd-stats", "debug/io-stats", NULL, NULL, NO_DOC, 0}, diff --git a/xlators/storage/posix/src/posix.c b/xlators/storage/posix/src/posix.c index e18654cc728..46a83ea2f85 100644 --- a/xlators/storage/posix/src/posix.c +++ b/xlators/storage/posix/src/posix.c @@ -1810,7 +1810,7 @@ out: STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, (loc)?loc->inode:NULL, &stbuf, &preparent, - &postparent, NULL); + &postparent, xdata); return 0; } |