summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
authorBrian Foster <bfoster@redhat.com>2012-05-07 13:53:31 -0400
committerAnand Avati <avati@redhat.com>2012-06-07 19:07:52 -0700
commitcd439e79ca7b3b26b11fb894220550156936c354 (patch)
tree45a4db5abd538348e21502ff828f0fe0b6dac916 /xlators/cluster
parented648c3b393ec06d0da7c1a9af42286fb3cc978e (diff)
cluster/stripe: implement the coalesce stripe file format
The coalesce file format for cluster/stripe condenses the striped files to a contiguous layout. The elimination of holes in striped files eliminates space wasted via local filesystem preallocation heuristics and significantly improves read performance. Coalesce mode is implemented with a new 'coalesce' xlator option, which is user-configurable and disabled by default. The format of newly created files is marked with a new 'stripe-coalesce' xattr. Cluster/stripe handles/preserves the format of files regardless of the current mode of operation (i.e., a volume can simultaneously consist of coalesced and non-coalesced files). Files without the stripe-coalesce attribute are assumed to have the traditional format to provide backward compatibility. extras/stripe-merge: support traditional and coalesce stripe formats Update the stripe-merge recovery tool to handle the traditional and coalesced file formats. The format of the file is detected automatically (and verified) via the stripe-coalesce attributes. BUG: 801887 Change-Id: I682f0b4e819f496ddb68c9a01c4de4688280fdf8 Signed-off-by: Brian Foster <bfoster@redhat.com> Reviewed-on: http://review.gluster.com/3282 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Amar Tumballi <amarts@redhat.com> Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c92
-rw-r--r--xlators/cluster/stripe/src/stripe.c339
-rw-r--r--xlators/cluster/stripe/src/stripe.h54
3 files changed, 407 insertions, 78 deletions
diff --git a/xlators/cluster/stripe/src/stripe-helpers.c b/xlators/cluster/stripe/src/stripe-helpers.c
index 1821832c2..336da793e 100644
--- a/xlators/cluster/stripe/src/stripe-helpers.c
+++ b/xlators/cluster/stripe/src/stripe-helpers.c
@@ -236,8 +236,6 @@ out:
return block_size;
}
-
-
int32_t
stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local,
dict_t *dict)
@@ -246,7 +244,6 @@ stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local,
data_t *data = NULL;
int32_t index = 0;
stripe_private_t *priv = NULL;
- int32_t ret = -1;
priv = this->private;
@@ -343,14 +340,31 @@ stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local,
if (!local->fctx->xl_array[index])
local->fctx->xl_array[index] = prev->this;
}
- ret = 0;
+
+ sprintf(key, "trusted.%s.stripe-coalesce", this->name);
+ data = dict_get(dict, key);
+ if (!data) {
+ /*
+ * The file was probably created prior to coalesce support.
+ * Assume non-coalesce mode for this file to maintain backwards
+ * compatibility.
+ */
+ gf_log(this->name, GF_LOG_DEBUG, "missing stripe-coalesce "
+ "attr, assume non-coalesce mode");
+ local->fctx->stripe_coalesce = 0;
+ } else {
+ local->fctx->stripe_coalesce = data_to_int32(data);
+ }
+
+
out:
- return ret;
+ return 0;
}
int32_t
stripe_xattr_request_build (xlator_t *this, dict_t *dict, uint64_t stripe_size,
- uint32_t stripe_count, uint32_t stripe_index)
+ uint32_t stripe_count, uint32_t stripe_index,
+ uint32_t stripe_coalesce)
{
char key[256] = {0,};
int32_t ret = -1;
@@ -378,6 +392,14 @@ stripe_xattr_request_build (xlator_t *this, dict_t *dict, uint64_t stripe_size,
"failed to set %s in xattr_req dict", key);
goto out;
}
+
+ sprintf(key, "trusted.%s.stripe-coalesce", this->name);
+ ret = dict_set_int32(dict, key, stripe_coalesce);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to set %s in xattr_req_dict", key);
+ goto out;
+ }
out:
return ret;
}
@@ -508,3 +530,61 @@ stripe_iatt_merge (struct iatt *from, struct iatt *to)
to->ia_atime = from->ia_atime;
return 0;
}
+
+off_t
+coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count)
+{
+ size_t line_size = 0;
+ uint64_t stripe_num = 0;
+ off_t coalesced_offset = 0;
+
+ line_size = stripe_size * stripe_count;
+ stripe_num = offset / line_size;
+
+ coalesced_offset = (stripe_num * stripe_size) +
+ (offset % stripe_size);
+
+ return coalesced_offset;
+}
+
+off_t
+uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
+ int stripe_index)
+{
+ uint64_t nr_full_stripe_chunks = 0, mod = 0;
+
+ if (!size)
+ return size;
+
+ /*
+ * Estimate the number of fully written stripes from the
+ * local file size. Each stripe_size chunk corresponds to
+ * a stripe.
+ */
+ nr_full_stripe_chunks = (size / stripe_size) * stripe_count;
+ mod = size % stripe_size;
+
+ if (!mod) {
+ /*
+ * There is no remainder, thus we could have overestimated
+ * the size of the file in terms of chunks. Trim the number
+ * of chunks by the following stripe members and leave it
+ * up to those nodes to respond with a larger size (if
+ * necessary).
+ */
+ nr_full_stripe_chunks -= stripe_count -
+ (stripe_index + 1);
+ size = nr_full_stripe_chunks * stripe_size;
+ } else {
+ /*
+ * There is a remainder and thus we own the last chunk of the
+ * file. Add the preceding stripe members of the final stripe
+ * along with the remainder to calculate the exact size.
+ */
+ nr_full_stripe_chunks += stripe_index;
+ size = nr_full_stripe_chunks * stripe_size + mod;
+ }
+
+ return size;
+}
+
diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c
index a98e14e95..efee9444e 100644
--- a/xlators/cluster/stripe/src/stripe.c
+++ b/xlators/cluster/stripe/src/stripe.c
@@ -32,7 +32,6 @@
struct volume_options options[];
-
int32_t
stripe_sh_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno,
@@ -237,6 +236,8 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf_blocks += buf->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->postparent_size < postparent->ia_size)
@@ -326,9 +327,19 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
/* get stripe-size xattr on lookup. This would be required for
* open/read/write/pathinfo calls. Hence we send down the request
* even when type == IA_INVAL */
+
+ /*
+ * We aren't guaranteed to have xdata here. We need the format info for
+ * the file, so allocate xdata if necessary.
+ */
+ if (!xdata)
+ xdata = dict_new();
+ else
+ xdata = dict_ref(xdata);
+
if (xdata && (IA_ISREG (loc->inode->ia_type) ||
(loc->inode->ia_type == IA_INVAL))) {
- ret = stripe_xattr_request_build (this, xdata, 8, 4, 4);
+ ret = stripe_xattr_request_build (this, xdata, 8, 4, 4, 0);
if (ret)
gf_log (this->name , GF_LOG_ERROR, "Failed to build"
" xattr request for %s", loc->path);
@@ -344,6 +355,8 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
trav = trav->next;
}
+ dict_unref(xdata);
+
return 0;
err:
STRIPE_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
@@ -388,6 +401,9 @@ stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
local->stbuf_blocks += buf->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
}
@@ -416,6 +432,7 @@ stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
@@ -442,6 +459,13 @@ stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
frame->local = local;
local->call_count = priv->child_count;
+ if (IA_ISREG(loc->inode->ia_type)) {
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
while (trav) {
STACK_WIND (frame, stripe_stat_cbk, trav->xlator,
trav->xlator->fops->stat, loc, NULL);
@@ -583,6 +607,9 @@ stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->prebuf_blocks += prebuf->ia_blocks;
local->postbuf_blocks += postbuf->ia_blocks;
+ correct_file_size(prebuf, local->fctx, prev);
+ correct_file_size(postbuf, local->fctx, prev);
+
if (local->prebuf_size < prebuf->ia_size)
local->prebuf_size = prebuf->ia_size;
@@ -614,10 +641,12 @@ out:
int32_t
stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
{
- xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
+ int i, eof_idx;
+ off_t dest_offset, tmp_offset;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -626,7 +655,6 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
VALIDATE_OR_GOTO (loc->inode, err);
priv = this->private;
- trav = this->children;
if (priv->first_child_down) {
op_errno = ENOTCONN;
@@ -643,11 +671,51 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
frame->local = local;
local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame, stripe_truncate_cbk, trav->xlator,
- trav->xlator->fops->truncate, loc, offset, NULL);
- trav = trav->next;
- }
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe context");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->fctx = fctx;
+ eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (!fctx->xl_array[i]) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "no xlator at index %d", i);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (fctx->stripe_coalesce) {
+ /*
+ * The node that owns EOF is truncated to the exact
+ * coalesced offset. Nodes prior to this index should
+ * be rounded up to the size of the complete stripe,
+ * while nodes after this index should be rounded down
+ * to the size of the previous stripe.
+ */
+ if (i < eof_idx)
+ tmp_offset = roof(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else if (i > eof_idx)
+ tmp_offset = floor(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else
+ tmp_offset = offset;
+
+ dest_offset = coalesced_offset(tmp_offset,
+ fctx->stripe_size, fctx->stripe_count);
+ } else {
+ dest_offset = offset;
+ }
+
+ STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
+ fctx->xl_array[i]->fops->truncate, loc, dest_offset,
+ NULL);
+ }
return 0;
err:
@@ -698,6 +766,9 @@ stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->prebuf_blocks += preop->ia_blocks;
local->postbuf_blocks += postop->ia_blocks;
+ correct_file_size(preop, local->fctx, prev);
+ correct_file_size(postop, local->fctx, prev);
+
if (local->prebuf_size < preop->ia_size)
local->prebuf_size = preop->ia_size;
if (local->postbuf_size < postop->ia_size)
@@ -733,6 +804,7 @@ stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
xlator_list_t *trav = NULL;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
@@ -766,6 +838,13 @@ stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
return 0;
}
+ if (IA_ISREG(loc->inode->ia_type)) {
+ inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
local->call_count = priv->child_count;
while (trav) {
STACK_WIND (frame, stripe_setattr_cbk,
@@ -862,6 +941,8 @@ stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->pre_buf.ia_blocks += prenewparent->ia_blocks;
local->post_buf.ia_blocks += postnewparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf.ia_size < buf->ia_size)
local->stbuf.ia_size = buf->ia_size;
@@ -947,6 +1028,7 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
stripe_private_t *priv = NULL;
stripe_local_t *local = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = EINVAL;
VALIDATE_OR_GOTO (frame, err);
@@ -977,6 +1059,11 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
local->call_count = priv->child_count;
+ inode_ctx_get(oldloc->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+
frame->local = local;
STACK_WIND (frame, stripe_first_rename_cbk, trav->xlator,
@@ -1367,7 +1454,6 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
stripe_private_t *priv = NULL;
call_frame_t *prev = NULL;
xlator_list_t *trav = NULL;
- stripe_fd_ctx_t *fctx = NULL;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -1399,10 +1485,16 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (uuid_is_null (local->ia_gfid))
uuid_copy (local->ia_gfid, buf->ia_gfid);
+ if (stripe_ctx_handle(this, prev, local, xdata))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from dict");
+
local->stbuf_blocks += buf->ia_blocks;
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->preparent_size < preparent->ia_size)
@@ -1441,23 +1533,10 @@ stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->postparent.ia_size = local->postparent_size;
local->stbuf.ia_size = local->stbuf_size;
local->stbuf.ia_blocks = local->stbuf_blocks;
- fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!fctx) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unwind;
- }
-
- fctx->stripe_size = local->stripe_size;
- fctx->stripe_count = priv->child_count;
- fctx->static_array = 1;
- fctx->xl_array = priv->xl_array;
inode_ctx_put (local->inode, this,
- (uint64_t)(long)fctx);
+ (uint64_t)(long) local->fctx);
}
-unwind:
STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
local->inode, &local->stbuf,
&local->preparent, &local->postparent, NULL);
@@ -1531,7 +1610,8 @@ stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ret = stripe_xattr_request_build (this, dict,
local->stripe_size,
- priv->child_count, i);
+ priv->child_count, i,
+ priv->coalesce);
if (ret)
gf_log (this->name, GF_LOG_ERROR,
"Failed to build xattr request");
@@ -1579,9 +1659,6 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
stripe_local_t *local = NULL;
int32_t op_errno = EINVAL;
int32_t i = 0;
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
dict_t *dict = NULL;
int ret = 0;
int need_unref = 0;
@@ -1631,15 +1708,6 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
be looked up */
local->call_count = priv->child_count;
- /* Send a setxattr request to nodes where the
- files are created */
- sprintf (size_key,
- "trusted.%s.stripe-size", this->name);
- sprintf (count_key,
- "trusted.%s.stripe-count", this->name);
- sprintf (index_key,
- "trusted.%s.stripe-index", this->name);
-
if (priv->xattr_supported) {
dict = dict_new ();
if (!dict) {
@@ -1653,7 +1721,7 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
ret = stripe_xattr_request_build (this, dict,
local->stripe_size,
priv->child_count,
- i);
+ i, priv->coalesce);
if (ret)
gf_log (this->name, GF_LOG_ERROR,
"failed to build xattr request");
@@ -1867,6 +1935,7 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t callcnt = 0;
stripe_local_t *local = NULL;
call_frame_t *prev = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -1880,6 +1949,14 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
{
callcnt = --local->call_count;
+ inode_ctx_get(inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR, "failed to get stripe "
+ "context");
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
if (op_ret == -1) {
gf_log (this->name, GF_LOG_DEBUG,
"%s returned error %s",
@@ -1903,6 +1980,8 @@ stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->preparent_size < preparent->ia_size)
@@ -2023,7 +2102,6 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t callcnt = 0;
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
call_frame_t *prev = NULL;
xlator_list_t *trav = NULL;
@@ -2049,12 +2127,21 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
if (op_ret >= 0) {
+ if (IA_ISREG(buf->ia_type)) {
+ if (stripe_ctx_handle(this, prev, local, xdata))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from "
+ "dict");
+ }
+
local->op_ret = op_ret;
local->stbuf_blocks += buf->ia_blocks;
local->preparent_blocks += preparent->ia_blocks;
local->postparent_blocks += postparent->ia_blocks;
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
if (local->preparent_size < preparent->ia_size)
@@ -2092,23 +2179,13 @@ stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf.ia_size = local->stbuf_size;
local->stbuf.ia_blocks = local->stbuf_blocks;
- fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!fctx) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto unwind;
- }
-
- fctx->stripe_size = local->stripe_size;
- fctx->stripe_count = priv->child_count;
- fctx->static_array = 1;
- fctx->xl_array = priv->xl_array;
- inode_ctx_put (local->inode, this,
- (uint64_t)(long)fctx);
+ stripe_copy_xl_array(local->fctx->xl_array,
+ priv->xl_array,
+ local->fctx->stripe_count);
+ inode_ctx_put(local->inode, this,
+ (uint64_t) local->fctx);
}
- unwind:
/* Create itself has failed.. so return
without setxattring */
STRIPE_STACK_UNWIND (create, frame, local->op_ret,
@@ -2214,14 +2291,14 @@ stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
ret = stripe_xattr_request_build (this, dict,
local->stripe_size,
priv->child_count,
- i);
+ i, priv->coalesce);
if (ret)
gf_log (this->name, GF_LOG_ERROR,
"failed to build xattr request");
} else {
dict = local->xattr;
}
-
+
STACK_WIND (frame, stripe_create_cbk, trav->xlator,
trav->xlator->fops->create, &local->loc,
local->flags, local->mode, local->umask, local->fd,
@@ -2310,7 +2387,7 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
ret = stripe_xattr_request_build (this, dict,
local->stripe_size,
priv->child_count,
- i);
+ i, priv->coalesce);
if (ret)
gf_log (this->name, GF_LOG_ERROR,
"failed to build xattr request");
@@ -2743,6 +2820,9 @@ stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->prebuf_blocks += prebuf->ia_blocks;
local->postbuf_blocks += postbuf->ia_blocks;
+ correct_file_size(prebuf, local->fctx, prev);
+ correct_file_size(postbuf, local->fctx, prev);
+
if (local->prebuf_size < prebuf->ia_size)
local->prebuf_size = prebuf->ia_size;
@@ -2777,6 +2857,7 @@ stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
@@ -2793,6 +2874,14 @@ stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict
op_errno = ENOMEM;
goto err;
}
+
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->fctx = fctx;
+
local->op_ret = -1;
frame->local = local;
local->call_count = priv->child_count;
@@ -2846,6 +2935,9 @@ stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->stbuf = *buf;
local->stbuf_blocks += buf->ia_blocks;
+
+ correct_file_size(buf, local->fctx, prev);
+
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
}
@@ -2877,6 +2969,7 @@ stripe_fstat (call_frame_t *frame,
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
xlator_list_t *trav = NULL;
+ stripe_fd_ctx_t *fctx = NULL;
int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
@@ -2897,6 +2990,13 @@ stripe_fstat (call_frame_t *frame,
frame->local = local;
local->call_count = priv->child_count;
+ if (IA_ISREG(fd->inode->ia_type)) {
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx)
+ goto err;
+ local->fctx = fctx;
+ }
+
while (trav) {
STACK_WIND (frame, stripe_fstat_cbk, trav->xlator,
trav->xlator->fops->fstat, fd, NULL);
@@ -2915,8 +3015,10 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, d
{
stripe_local_t *local = NULL;
stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
+ stripe_fd_ctx_t *fctx = NULL;
+ int i, eof_idx;
+ off_t dest_offset, tmp_offset;
+ int32_t op_errno = 1;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -2924,7 +3026,6 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, d
VALIDATE_OR_GOTO (fd->inode, err);
priv = this->private;
- trav = this->children;
/* Initialization */
local = mem_get0 (this->local_pool);
@@ -2936,11 +3037,49 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, d
frame->local = local;
local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame, stripe_truncate_cbk, trav->xlator,
- trav->xlator->fops->ftruncate, fd, offset, NULL);
- trav = trav->next;
- }
+ inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
+ if (!fctx) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe context");
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (!fctx->stripe_count) {
+ gf_log(this->name, GF_LOG_ERROR, "no stripe count");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->fctx = fctx;
+ eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (!fctx->xl_array[i]) {
+ gf_log(this->name, GF_LOG_ERROR, "no xlator at index "
+ "%d", i);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (fctx->stripe_coalesce) {
+ if (i < eof_idx)
+ tmp_offset = roof(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else if (i > eof_idx)
+ tmp_offset = floor(offset, fctx->stripe_size *
+ fctx->stripe_count);
+ else
+ tmp_offset = offset;
+
+ dest_offset = coalesced_offset(tmp_offset,
+ fctx->stripe_size, fctx->stripe_count);
+ } else {
+ dest_offset = offset;
+ }
+
+ STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
+ fctx->xl_array[i]->fops->ftruncate, fd, dest_offset,
+ NULL);
+ }
return 0;
err:
@@ -3045,6 +3184,7 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt tmp_stbuf = {0,};
struct iobref *tmp_iobref = NULL;
struct iobuf *iobuf = NULL;
+ call_frame_t *prev = NULL;
if (!this || !frame || !frame->local) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -3052,13 +3192,16 @@ stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
}
local = frame->local;
+ prev = cookie;
LOCK (&frame->lock);
{
callcnt = --local->call_count;
- if (op_ret != -1)
+ if (op_ret != -1) {
+ correct_file_size(buf, local->fctx, prev);
if (local->stbuf_size < buf->ia_size)
local->stbuf_size = buf->ia_size;
+ }
}
UNLOCK (&frame->lock);
@@ -3150,6 +3293,7 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *tmp_stbuf_p = NULL; //need it for a warning
struct iobref *tmp_iobref = NULL;
stripe_fd_ctx_t *fctx = NULL;
+ call_frame_t *prev = NULL;
if (!this || !frame || !frame->local || !cookie) {
gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
@@ -3158,6 +3302,7 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local = frame->local;
index = local->node_index;
+ prev = cookie;
mframe = local->orig_frame;
if (!mframe)
goto out;
@@ -3177,6 +3322,9 @@ stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
mlocal->replies[index].stbuf = *stbuf;
mlocal->replies[index].count = count;
mlocal->replies[index].vector = iov_dup (vector, count);
+
+ correct_file_size(stbuf, fctx, prev);
+
if (local->stbuf_size < stbuf->ia_size)
local->stbuf_size = stbuf->ia_size;
local->stbuf_blocks += stbuf->ia_blocks;
@@ -3289,6 +3437,7 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
uint64_t stripe_size = 0;
off_t rounded_start = 0;
off_t frame_offset = offset;
+ off_t dest_offset = 0;
stripe_local_t *local = NULL;
call_frame_t *rframe = NULL;
stripe_local_t *rlocal = NULL;
@@ -3361,9 +3510,16 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
rlocal->readv_size = frame_size;
rframe->local = rlocal;
idx = (index % fctx->stripe_count);
+
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(frame_offset,
+ stripe_size, fctx->stripe_count);
+ else
+ dest_offset = frame_offset;
+
STACK_WIND (rframe, stripe_readv_cbk, fctx->xl_array[idx],
fctx->xl_array[idx]->fops->readv,
- fd, frame_size, frame_offset, flags, xdata);
+ fd, frame_size, dest_offset, flags, xdata);
frame_offset += frame_size;
}
@@ -3410,11 +3566,27 @@ stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret += op_ret;
local->post_buf = *postbuf;
local->pre_buf = *prebuf;
+
+ local->prebuf_blocks += prebuf->ia_blocks;
+ local->postbuf_blocks += postbuf->ia_blocks;
+
+ correct_file_size(prebuf, local->fctx, prev);
+ correct_file_size(postbuf, local->fctx, prev);
+
+ if (local->prebuf_size < prebuf->ia_size)
+ local->prebuf_size = prebuf->ia_size;
+ if (local->postbuf_size < postbuf->ia_size)
+ local->postbuf_size = postbuf->ia_size;
}
}
UNLOCK (&frame->lock);
if ((callcnt == local->wind_count) && local->unwind) {
+ local->pre_buf.ia_size = local->prebuf_size;
+ local->pre_buf.ia_blocks = local->prebuf_blocks;
+ local->post_buf.ia_size = local->postbuf_size;
+ local->post_buf.ia_blocks = local->postbuf_blocks;
+
STRIPE_STACK_UNWIND (writev, frame, local->op_ret,
local->op_errno, &local->pre_buf,
&local->post_buf, NULL);
@@ -3440,6 +3612,7 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
off_t fill_size = 0;
uint64_t stripe_size = 0;
uint64_t tmp_fctx = 0;
+ off_t dest_offset = 0;
VALIDATE_OR_GOTO (frame, err);
VALIDATE_OR_GOTO (this, err);
@@ -3469,6 +3642,7 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
}
frame->local = local;
local->stripe_size = stripe_size;
+ local->fctx = fctx;
if (!stripe_size) {
gf_log (this->name, GF_LOG_DEBUG,
@@ -3505,9 +3679,15 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
if (remaining_size == 0)
local->unwind = 1;
+ if (fctx->stripe_coalesce)
+ dest_offset = coalesced_offset(offset + offset_offset,
+ local->stripe_size, fctx->stripe_count);
+ else
+ dest_offset = offset + offset_offset;
+
STACK_WIND (frame, stripe_writev_cbk, fctx->xl_array[idx],
fctx->xl_array[idx]->fops->writev, fd, tmp_vec,
- tmp_count, offset + offset_offset, flags, iobref,
+ tmp_count, dest_offset, flags, iobref,
xdata);
GF_FREE (tmp_vec);
@@ -3859,10 +4039,15 @@ stripe_readdirp_lookup_cbk (call_frame_t *frame, void *cookie,
local->op_ret = op_ret;
goto unlock;
}
+
+ if (stripe_ctx_handle(this, prev, local, xattr))
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error getting fctx info from dict.");
+
+ correct_file_size(stbuf, local->fctx, prev);
+
stripe_iatt_merge (stbuf, &entry->d_stat);
local->stbuf_blocks += stbuf->ia_blocks;
-
- stripe_ctx_handle (this, prev, local, xattr);
}
unlock:
UNLOCK(&frame->lock);
@@ -3957,7 +4142,7 @@ unlock:
xattrs = dict_new ();
if (xattrs)
- (void) stripe_xattr_request_build (this, xattrs, 0, 0, 0);
+ (void) stripe_xattr_request_build (this, xattrs, 0, 0, 0, 0);
count = op_ret;
list_for_each_entry_safe (local_entry, tmp_entry,
(&local->entries.list), list) {
@@ -4165,6 +4350,9 @@ reconfigure (xlator_t *this, dict_t *options)
goto unlock;
}
}
+
+ GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool,
+ unlock);
}
unlock:
UNLOCK (&priv->lock);
@@ -4285,6 +4473,8 @@ init (xlator_t *this)
/* notify related */
priv->nodes_down = priv->child_count;
+ GF_OPTION_INIT("coalesce", priv->coalesce, bool, out);
+
this->local_pool = mem_pool_new (stripe_local_t, 128);
if (!this->local_pool) {
ret = -1;
@@ -4768,5 +4958,12 @@ struct volume_options options[] = {
.type = GF_OPTION_TYPE_BOOL,
.default_value = "true"
},
+ { .key = {"coalesce"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .description = "Enable coalesce mode to flatten striped files as "
+ "stored on the server (i.e., eliminate holes caused "
+ "by the traditional format)."
+ },
{ .key = {NULL} },
};
diff --git a/xlators/cluster/stripe/src/stripe.h b/xlators/cluster/stripe/src/stripe.h
index cb05eb56f..1b9e660c1 100644
--- a/xlators/cluster/stripe/src/stripe.h
+++ b/xlators/cluster/stripe/src/stripe.h
@@ -101,6 +101,7 @@ struct stripe_private {
int8_t child_count;
int8_t *state; /* Current state of child node */
gf_boolean_t xattr_supported; /* default yes */
+ gf_boolean_t coalesce;
char vol_uuid[UUID_SIZE + 1];
};
@@ -119,6 +120,7 @@ struct readv_replies {
typedef struct _stripe_fd_ctx {
off_t stripe_size;
int stripe_count;
+ int stripe_coalesce;
int static_array;
xlator_t **xl_array;
} stripe_fd_ctx_t;
@@ -214,13 +216,41 @@ struct stripe_local {
typedef struct stripe_local stripe_local_t;
typedef struct stripe_private stripe_private_t;
+/*
+ * Determine the stripe index of a particular frame based on the translator.
+ */
+static inline int32_t stripe_get_frame_index(stripe_fd_ctx_t *fctx,
+ call_frame_t *prev)
+{
+ int32_t i, idx = -1;
+
+ for (i = 0; i < fctx->stripe_count; i++) {
+ if (fctx->xl_array[i] == prev->this) {
+ idx = i;
+ break;
+ }
+ }
+
+ return idx;
+}
+
+static inline void stripe_copy_xl_array(xlator_t **dst, xlator_t **src,
+ int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ dst[i] = src[i];
+}
+
void stripe_local_wipe (stripe_local_t *local);
int32_t stripe_ctx_handle (xlator_t *this, call_frame_t *prev,
stripe_local_t *local, dict_t *dict);
void stripe_aggregate_xattr (dict_t *dst, dict_t *src);
int32_t stripe_xattr_request_build (xlator_t *this, dict_t *dict,
uint64_t stripe_size, uint32_t stripe_count,
- uint32_t stripe_index);
+ uint32_t stripe_index,
+ uint32_t stripe_coalesce);
int32_t stripe_get_matching_bs (const char *path, stripe_private_t *priv);
int set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data);
int32_t stripe_iatt_merge (struct iatt *from, struct iatt *to);
@@ -229,5 +259,27 @@ int32_t stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local,
int32_t stripe_free_xattr_str (stripe_local_t *local);
int32_t stripe_xattr_aggregate (char *buffer, stripe_local_t *local,
int32_t *total);
+off_t coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count);
+off_t uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
+ int stripe_index);
+
+/*
+ * Adjust the size attribute for files if coalesce is enabled.
+ */
+static inline void correct_file_size(struct iatt *buf, stripe_fd_ctx_t *fctx,
+ call_frame_t *prev)
+{
+ int index;
+
+ if (!IA_ISREG(buf->ia_type))
+ return;
+
+ if (!fctx || !fctx->stripe_coalesce)
+ return;
+
+ index = stripe_get_frame_index(fctx, prev);
+ buf->ia_size = uncoalesced_size(buf->ia_size, fctx->stripe_size,
+ fctx->stripe_count, index);
+}
#endif /* _STRIPE_H_ */