diff options
Diffstat (limited to 'xlators/cluster/stripe/src/stripe.c')
| -rw-r--r-- | xlators/cluster/stripe/src/stripe.c | 6099 |
1 files changed, 4220 insertions, 1879 deletions
diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c index ee230a9c3..69b510e23 100644 --- a/xlators/cluster/stripe/src/stripe.c +++ b/xlators/cluster/stripe/src/stripe.c @@ -1,305 +1,182 @@ /* - Copyright (c) 2007-2009 Z RESEARCH, Inc. <http://www.zresearch.com> + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> This file is part of GlusterFS. - GlusterFS is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published - by the Free Software Foundation; either version 3 of the License, - or (at your option) any later version. - - GlusterFS is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program. If not, see - <http://www.gnu.org/licenses/>. + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. */ /** * xlators/cluster/stripe: - * Stripe translator, stripes the data accross its child nodes, - * as per the options given in the volfile. The striping works - * fairly simple. It writes files at different offset as per - * calculation. So, 'ls -l' output at the real posix level will - * show file size bigger than the actual size. But when one does + * Stripe translator, stripes the data across its child nodes, + * as per the options given in the volfile. The striping works + * fairly simple. It writes files at different offset as per + * calculation. So, 'ls -l' output at the real posix level will + * show file size bigger than the actual size. But when one does * 'df' or 'du <file>', real size of the file on the server is shown. * * WARNING: * Stripe translator can't regenerate data if a child node gets disconnected. - * So, no 'self-heal' for stripe. Hence the advice, use stripe only when its - * very much necessary, or else, use it in combination with AFR, to have a - * backup copy. - */ - -/* TODO: - * 1. Implement basic self-heal ability to manage the basic backend - * layout missmatch. - * + * So, no 'self-heal' for stripe. Hence the advice, use stripe only when its + * very much necessary, or else, use it in combination with AFR, to have a + * backup copy. */ +#include <fnmatch.h> #include "stripe.h" +#include "libxlator.h" +#include "byte-order.h" +#include "statedump.h" -/** - * stripe_get_matching_bs - Get the matching block size for the given path. - */ -int32_t -stripe_get_matching_bs (const char *path, struct stripe_options *opts, - uint64_t default_bs) -{ - struct stripe_options *trav = NULL; - char *pathname = NULL; - uint64_t block_size = 0; - - block_size = default_bs; - pathname = strdup (path); - trav = opts; - - while (trav) { - if (!fnmatch (trav->path_pattern, pathname, FNM_NOESCAPE)) { - block_size = trav->block_size; - break; - } - trav = trav->next; - } - free (pathname); - - return block_size; -} - +struct volume_options options[]; -/* - * stripe_common_cbk - - */ int32_t -stripe_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) -{ - STACK_UNWIND (frame, op_ret, op_errno); - return 0; -} - -/** - * stripe_stack_unwind_cbk - This function is used for all the _cbk without - * any extra arguments (other than the minimum given) - * This is called from functions like fsync,unlink,rmdir etc. - * - */ -int32_t -stripe_stack_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +stripe_sh_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preop, struct iatt *postop, dict_t *xdata) { - int32_t callcnt = 0; + int callcnt = -1; stripe_local_t *local = NULL; + if (!this || !frame || !frame->local) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + local = frame->local; LOCK (&frame->lock); { callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned %s", - ((call_frame_t *)cookie)->this->name, - strerror (op_errno)); - local->op_errno = op_errno; - if (op_errno == ENOTCONN) - local->failed = 1; - } - if (op_ret >= 0) - local->op_ret = op_ret; } UNLOCK (&frame->lock); if (!callcnt) { - if (local->failed) - local->op_ret = -1; - - if (local->loc.path) - loc_wipe (&local->loc); - if (local->loc2.path) - loc_wipe (&local->loc2); - - STACK_UNWIND (frame, local->op_ret, local->op_errno); + STRIPE_STACK_DESTROY (frame); } +out: return 0; } -int32_t -stripe_common_buf_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct stat *buf) -{ - STACK_UNWIND (frame, op_ret, op_errno, buf); - return 0; -} - -/** - * stripe_stack_unwind_buf_cbk - This function is used for all the _cbk with - * 'struct stat *buf' as extra argument (other than minimum) - * This is called from functions like, chmod, fchmod, chown, fchown, - * truncate, ftruncate, utimens etc. - * - * @cookie - this argument should be always 'xlator_t *' of child node - */ -int32_t -stripe_stack_unwind_buf_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct stat *buf) +int32_t +stripe_sh_make_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t callcnt = 0; stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - local = frame->local; - - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - ((call_frame_t *)cookie)->this->name, - strerror (op_errno)); - local->op_errno = op_errno; - if (op_errno == ENOTCONN) - local->failed = 1; - } - - if (op_ret == 0) { - local->op_ret = 0; - if (local->stbuf.st_blksize == 0) { - local->stbuf = *buf; - /* Because st_blocks gets added again */ - local->stbuf.st_blocks = 0; - } - - if (FIRST_CHILD(this) == - ((call_frame_t *)cookie)->this) { - /* Always, pass the inode number of - first child to the above layer */ - local->stbuf.st_ino = buf->st_ino; - local->stbuf.st_mtime = buf->st_mtime; - } - - local->stbuf.st_blocks += buf->st_blocks; - if (local->stbuf.st_size < buf->st_size) - local->stbuf.st_size = buf->st_size; - if (local->stbuf.st_blksize != buf->st_blksize) { - /* TODO: add to blocks in terms of - original block size */ - } - } + if (!frame || !frame->local || !cookie || !this) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; } - UNLOCK (&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - if (local->loc.path) - loc_wipe (&local->loc); - if (local->loc2.path) - loc_wipe (&local->loc2); + prev = cookie; + local = frame->local; - STACK_UNWIND (frame, local->op_ret, local->op_errno, - &local->stbuf); - } + STACK_WIND (frame, stripe_sh_chown_cbk, prev->this, + prev->this->fops->setattr, &local->loc, + &local->stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL); +out: return 0; } -/* In case of symlink, mknod, the file is created on just first node */ -int32_t -stripe_common_inode_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, inode_t *inode, - struct stat *buf) +int32_t +stripe_entry_self_heal (call_frame_t *frame, xlator_t *this, + stripe_local_t *local) { - STACK_UNWIND (frame, op_ret, op_errno, inode, buf); - return 0; -} + xlator_list_t *trav = NULL; + call_frame_t *rframe = NULL; + stripe_local_t *rlocal = NULL; + stripe_private_t *priv = NULL; + dict_t *xdata = NULL; + int ret = 0; -/** - * stripe_stack_unwind_inode_cbk - This is called by the function like, - * link (), symlink (), mkdir (), mknod () - * This creates a inode for new inode. It keeps a list of all - * the inodes received from the child nodes. It is used while - * forwarding any fops to child nodes. - * - */ -int32_t -stripe_stack_unwind_inode_cbk (call_frame_t *frame, void *cookie, - xlator_t *this, int32_t op_ret, - int32_t op_errno, inode_t *inode, - struct stat *buf) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; + if (!local || !this || !frame) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - local = frame->local; + if (!(IA_ISREG (local->stbuf.ia_type) || + IA_ISDIR (local->stbuf.ia_type))) + return 0; - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - ((call_frame_t *)cookie)->this->name, - strerror (op_errno)); - local->op_errno = op_errno; - if (op_errno == ENOTCONN) - local->failed = 1; - } - - if (op_ret >= 0) { - local->op_ret = 0; + priv = this->private; + trav = this->children; + rframe = copy_frame (frame); + if (!rframe) { + goto out; + } + rlocal = mem_get0 (this->local_pool); + if (!rlocal) { + goto out; + } + rframe->local = rlocal; + rlocal->call_count = priv->child_count; + loc_copy (&rlocal->loc, &local->loc); + memcpy (&rlocal->stbuf, &local->stbuf, sizeof (struct iatt)); - if (local->stbuf.st_blksize == 0) { - local->inode = inode; - local->stbuf = *buf; - /* Because st_blocks gets added again */ - local->stbuf.st_blocks = 0; - } - if (FIRST_CHILD(this) == - ((call_frame_t *)cookie)->this) { - local->stbuf.st_ino = buf->st_ino; - local->stbuf.st_mtime = buf->st_mtime; - } + xdata = dict_new (); + if (!xdata) + goto out; - local->stbuf.st_blocks += buf->st_blocks; - if (local->stbuf.st_size < buf->st_size) - local->stbuf.st_size = buf->st_size; - if (local->stbuf.st_blksize != buf->st_blksize) { - /* TODO: add to blocks in terms of - original block size */ - } + ret = dict_set_static_bin (xdata, "gfid-req", local->stbuf.ia_gfid, 16); + if (ret) + gf_log (this->name, GF_LOG_WARNING, + "%s: failed to set gfid-req", local->loc.path); + + while (trav) { + if (IA_ISREG (local->stbuf.ia_type)) { + STACK_WIND (rframe, stripe_sh_make_entry_cbk, + trav->xlator, trav->xlator->fops->mknod, + &local->loc, + st_mode_from_ia (local->stbuf.ia_prot, + local->stbuf.ia_type), + 0, 0, xdata); } + if (IA_ISDIR (local->stbuf.ia_type)) { + STACK_WIND (rframe, stripe_sh_make_entry_cbk, + trav->xlator, trav->xlator->fops->mkdir, + &local->loc, + st_mode_from_ia (local->stbuf.ia_prot, + local->stbuf.ia_type), + 0, xdata); + } + trav = trav->next; } - UNLOCK (&frame->lock); - if (!callcnt) { - if (local->failed) - local->op_ret = -1; + if (xdata) + dict_unref (xdata); + return 0; - STACK_UNWIND (frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf); - } +out: + if (rframe) + STRIPE_STACK_DESTROY (rframe); + if (xdata) + dict_unref (xdata); return 0; } -int32_t -stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + +int32_t +stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, - struct stat *buf, dict_t *dict) + struct iatt *buf, dict_t *xdata, struct iatt *postparent) { - int32_t callcnt = 0; - dict_t *tmp_dict = NULL; - inode_t *tmp_inode = NULL; - stripe_local_t *local = NULL; - call_frame_t *prev = NULL; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + int ret = 0; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } prev = cookie; local = frame->local; @@ -307,89 +184,115 @@ stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, LOCK (&frame->lock); { callcnt = --local->call_count; - + if (op_ret == -1) { if (op_errno != ENOENT) - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", prev->this->name, strerror (op_errno)); if (local->op_errno != ESTALE) local->op_errno = op_errno; - if ((op_errno == ENOTCONN) || (op_errno == ESTALE)) + if (((op_errno != ENOENT) && (op_errno != ENOTCONN)) || + (prev->this == FIRST_CHILD (this))) local->failed = 1; - /* TODO: bring in self-heal ability */ - /* - * if (local->op_ret == 0) { - * if (S_ISREG (local->stbuf.st_mode) || - * S_ISDIR (local->stbuf.st_mode)) - * local->entry_self_heal_needed = 1; - * } - */ - } - + if (op_errno == ENOENT) + local->entry_self_heal_needed = 1; + } + if (op_ret >= 0) { local->op_ret = 0; + if (IA_ISREG (buf->ia_type)) { + ret = stripe_ctx_handle (this, prev, local, + xdata); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Error getting fctx info from" + " dict"); + } - if (local->stbuf.st_blksize == 0) { + if (FIRST_CHILD(this) == prev->this) { + local->stbuf = *buf; + local->postparent = *postparent; local->inode = inode_ref (inode); - local->stbuf = *buf; - /* Because st_blocks gets added again */ - local->stbuf.st_blocks = 0; + if (xdata) + local->xdata = dict_ref (xdata); + if (local->xattr) { + stripe_aggregate_xattr (local->xdata, + local->xattr); + dict_unref (local->xattr); + local->xattr = NULL; + } } - if (FIRST_CHILD(this) == prev->this) { - local->stbuf.st_ino = buf->st_ino; - local->stbuf.st_mtime = buf->st_mtime; - if (local->dict) - dict_unref (local->dict); - local->dict = dict_ref (dict); - } else { - if (!local->dict) - local->dict = dict_ref (dict); + + if (!local->xdata && !local->xattr) { + local->xattr = dict_ref (xdata); + } else if (local->xdata) { + stripe_aggregate_xattr (local->xdata, xdata); + } else if (local->xattr) { + stripe_aggregate_xattr (local->xattr, xdata); } - local->stbuf.st_blocks += buf->st_blocks; - if (local->stbuf.st_size < buf->st_size) - local->stbuf.st_size = buf->st_size; - if (local->stbuf.st_blksize != buf->st_blksize) { - /* TODO: add to blocks in terms of - original block size */ + + local->stbuf_blocks += buf->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + correct_file_size(buf, local->fctx, prev); + + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + if (local->postparent_size < postparent->ia_size) + local->postparent_size = postparent->ia_size; + + if (uuid_is_null (local->ia_gfid)) + uuid_copy (local->ia_gfid, buf->ia_gfid); + + /* Make sure the gfid on all the nodes are same */ + if (uuid_compare (local->ia_gfid, buf->ia_gfid)) { + gf_log (this->name, GF_LOG_WARNING, + "%s: gfid different on subvolume %s", + local->loc.path, prev->this->name); } } } UNLOCK (&frame->lock); if (!callcnt) { + if (local->op_ret == 0 && local->entry_self_heal_needed && + !uuid_is_null (local->loc.inode->gfid)) + stripe_entry_self_heal (frame, this, local); + if (local->failed) local->op_ret = -1; - tmp_dict = local->dict; - tmp_inode = local->inode; - - STACK_UNWIND (frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf, local->dict); + if (local->op_ret != -1) { + local->stbuf.ia_blocks = local->stbuf_blocks; + local->stbuf.ia_size = local->stbuf_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + inode_ctx_put (local->inode, this, + (uint64_t) (long)local->fctx); + } - if (tmp_inode) - inode_unref (tmp_inode); - if (tmp_dict) - dict_unref (tmp_dict); + STRIPE_STACK_UNWIND (lookup, frame, local->op_ret, + local->op_errno, local->inode, + &local->stbuf, local->xdata, + &local->postparent); } - +out: return 0; } - -/** - * stripe_lookup - - */ -int32_t -stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *xattr_req) +int32_t +stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, + dict_t *xdata) { - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; - char send_lookup_to_all = 0; - int32_t op_errno = 1; + stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + stripe_private_t *priv = NULL; + int32_t op_errno = EINVAL; + int64_t filesize = 0; + int ret = 0; + uint64_t tmpctx = 0; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -401,172 +304,136 @@ stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, trav = this->children; /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } local->op_ret = -1; frame->local = local; + loc_copy (&local->loc, loc); - if ((!loc->inode->st_mode) || S_ISDIR (loc->inode->st_mode) || - S_ISREG (loc->inode->st_mode)) { - send_lookup_to_all = 1; - } + inode_ctx_get (local->inode, this, &tmpctx); + if (tmpctx) + local->fctx = (stripe_fd_ctx_t*) (long)tmpctx; - if (send_lookup_to_all) { - /* Everytime in stripe lookup, all child nodes - should be looked up */ - local->call_count = priv->child_count; - while (trav) { - STACK_WIND (frame, stripe_lookup_cbk, trav->xlator, - trav->xlator->fops->lookup, - loc, xattr_req); - trav = trav->next; - } - } else { - local->call_count = 1; - - STACK_WIND (frame, stripe_lookup_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->lookup, - loc, xattr_req); + /* quick-read friendly changes */ + if (xdata && dict_get (xdata, GF_CONTENT_KEY)) { + ret = dict_get_int64 (xdata, GF_CONTENT_KEY, &filesize); + if (!ret && (filesize > priv->block_size)) + dict_del (xdata, GF_CONTENT_KEY); } - - return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL, NULL); - return 0; -} - -/** - * stripe_stat - - */ -int32_t -stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc) -{ - int send_lookup_to_all = 0; - xlator_list_t *trav = NULL; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = 1; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); - - priv = this->private; - trav = this->children; + /* get stripe-size xattr on lookup. This would be required for + * open/read/write/pathinfo calls. Hence we send down the request + * even when type == IA_INVAL */ + + /* + * We aren't guaranteed to have xdata here. We need the format info for + * the file, so allocate xdata if necessary. + */ + if (!xdata) + xdata = dict_new(); + else + xdata = dict_ref(xdata); + + if (xdata && (IA_ISREG (loc->inode->ia_type) || + (loc->inode->ia_type == IA_INVAL))) { + ret = stripe_xattr_request_build (this, xdata, 8, 4, 4, 0); + if (ret) + gf_log (this->name , GF_LOG_ERROR, "Failed to build" + " xattr request for %s", loc->path); - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; } - if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode)) - send_lookup_to_all = 1; - - if (!send_lookup_to_all) { - STACK_WIND (frame, stripe_common_buf_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->stat, loc); - } else { - /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->inode = loc->inode; - local->call_count = priv->child_count; - - while (trav) { - STACK_WIND (frame, stripe_stack_unwind_buf_cbk, - trav->xlator, trav->xlator->fops->stat, - loc); - trav = trav->next; - } + /* Everytime in stripe lookup, all child nodes + should be looked up */ + local->call_count = priv->child_count; + while (trav) { + STACK_WIND (frame, stripe_lookup_cbk, trav->xlator, + trav->xlator->fops->lookup, loc, xdata); + trav = trav->next; } + dict_unref(xdata); + return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +err: + STRIPE_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL); return 0; } -/** - * stripe_chmod - - */ int32_t -stripe_chmod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode) +stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) { - int send_fop_to_all = 0; - xlator_list_t *trav = NULL; - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - int32_t op_errno = 1; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + prev = cookie; + local = frame->local; - priv = this->private; - trav = this->children; + LOCK (&frame->lock); + { + callcnt = --local->call_count; - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; - } + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "%s returned error %s", + prev->this->name, strerror (op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || + (prev->this == FIRST_CHILD (this))) + local->failed = 1; + } - if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode)) - send_fop_to_all = 1; + if (op_ret == 0) { + local->op_ret = 0; - if (!send_fop_to_all) { - STACK_WIND (frame, stripe_common_buf_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->chmod, loc, mode); - } else { - /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); - if (!local) { - op_errno = ENOMEM; - goto err; + if (FIRST_CHILD(this) == prev->this) { + local->stbuf = *buf; + } + + local->stbuf_blocks += buf->ia_blocks; + + correct_file_size(buf, local->fctx, prev); + + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; } - local->op_ret = -1; - frame->local = local; - local->inode = loc->inode; - local->call_count = priv->child_count; + } + UNLOCK (&frame->lock); - while (trav) { - STACK_WIND (frame, stripe_stack_unwind_buf_cbk, - trav->xlator, trav->xlator->fops->chmod, - loc, mode); - trav = trav->next; + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + if (local->op_ret != -1) { + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; } + + STRIPE_STACK_UNWIND (stat, frame, local->op_ret, + local->op_errno, &local->stbuf, NULL); } - return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +out: return 0; } - -/** - * stripe_chown - - */ int32_t -stripe_chown (call_frame_t *frame, xlator_t *this, loc_t *loc, uid_t uid, - gid_t gid) +stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { - int send_fop_to_all = 0; xlator_list_t *trav = NULL; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; - int32_t op_errno = 1; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = EINVAL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -582,49 +449,48 @@ stripe_chown (call_frame_t *frame, xlator_t *this, loc_t *loc, uid_t uid, goto err; } - if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode)) - send_fop_to_all = 1; + /* Initialization */ + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; - if (!send_fop_to_all) { - STACK_WIND (frame, stripe_common_buf_cbk, trav->xlator, - trav->xlator->fops->chown, loc, uid, gid); - } else { - /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->inode = loc->inode; - local->call_count = priv->child_count; + if (IA_ISREG(loc->inode->ia_type)) { + inode_ctx_get(loc->inode, this, (uint64_t *) &fctx); + if (!fctx) + goto err; + local->fctx = fctx; + } - while (trav) { - STACK_WIND (frame, stripe_stack_unwind_buf_cbk, - trav->xlator, trav->xlator->fops->chown, - loc, uid, gid); - trav = trav->next; - } + while (trav) { + STACK_WIND (frame, stripe_stat_cbk, trav->xlator, + trav->xlator->fops->stat, loc, NULL); + trav = trav->next; } return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); + +err: + STRIPE_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL); return 0; } -/** - * stripe_statfs_cbk - - */ int32_t stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct statvfs *stbuf) + int32_t op_ret, int32_t op_errno, struct statvfs *stbuf, dict_t *xdata) { stripe_local_t *local = NULL; int32_t callcnt = 0; + if (!this || !frame || !frame->local) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } local = frame->local; LOCK(&frame->lock); @@ -651,32 +517,32 @@ stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this, } } UNLOCK (&frame->lock); - + if (!callcnt) { - STACK_UNWIND (frame, local->op_ret, - local->op_errno, &local->statvfs_buf); + STRIPE_STACK_UNWIND (statfs, frame, local->op_ret, + local->op_errno, &local->statvfs_buf, NULL); } - +out: return 0; } - -/** - * stripe_statfs - - */ int32_t -stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc) +stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) { stripe_local_t *local = NULL; xlator_list_t *trav = NULL; stripe_private_t *priv = NULL; - int32_t op_errno = 1; + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); trav = this->children; priv = this->private; /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -688,28 +554,99 @@ stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc) local->call_count = priv->child_count; while (trav) { STACK_WIND (frame, stripe_statfs_cbk, trav->xlator, - trav->xlator->fops->statfs, loc); + trav->xlator->fops->statfs, loc, NULL); trav = trav->next; } return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +err: + STRIPE_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL); return 0; } -/** - * stripe_truncate - - */ + int32_t -stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) +stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "%s returned error %s", + prev->this->name, strerror (op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || + (prev->this == FIRST_CHILD (this))) + local->failed = 1; + } + + if (op_ret == 0) { + local->op_ret = 0; + if (FIRST_CHILD(this) == prev->this) { + local->pre_buf = *prebuf; + local->post_buf = *postbuf; + } + + local->prebuf_blocks += prebuf->ia_blocks; + local->postbuf_blocks += postbuf->ia_blocks; + + correct_file_size(prebuf, local->fctx, prev); + correct_file_size(postbuf, local->fctx, prev); + + if (local->prebuf_size < prebuf->ia_size) + local->prebuf_size = prebuf->ia_size; + + if (local->postbuf_size < postbuf->ia_size) + local->postbuf_size = postbuf->ia_size; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + if (local->op_ret != -1) { + local->pre_buf.ia_blocks = local->prebuf_blocks; + local->pre_buf.ia_size = local->prebuf_size; + local->post_buf.ia_blocks = local->postbuf_blocks; + local->post_buf.ia_size = local->postbuf_size; + } + + STRIPE_STACK_UNWIND (truncate, frame, local->op_ret, + local->op_errno, &local->pre_buf, + &local->post_buf, NULL); + } +out: + return 0; +} + +int32_t +stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata) { - int send_fop_to_all = 0; - xlator_list_t *trav = NULL; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; - int32_t op_errno = 1; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = EINVAL; + int i, eof_idx; + off_t dest_offset, tmp_offset; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -718,58 +655,157 @@ stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset) VALIDATE_OR_GOTO (loc->inode, err); priv = this->private; - trav = this->children; if (priv->first_child_down) { op_errno = ENOTCONN; goto err; } - if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode)) - send_fop_to_all = 1; + /* Initialization */ + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + inode_ctx_get(loc->inode, this, (uint64_t *) &fctx); + if (!fctx) { + gf_log(this->name, GF_LOG_ERROR, "no stripe context"); + op_errno = EINVAL; + goto err; + } + + local->fctx = fctx; + eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count; + + for (i = 0; i < fctx->stripe_count; i++) { + if (!fctx->xl_array[i]) { + gf_log(this->name, GF_LOG_ERROR, + "no xlator at index %d", i); + op_errno = EINVAL; + goto err; + } + + if (fctx->stripe_coalesce) { + /* + * The node that owns EOF is truncated to the exact + * coalesced offset. Nodes prior to this index should + * be rounded up to the size of the complete stripe, + * while nodes after this index should be rounded down + * to the size of the previous stripe. + */ + if (i < eof_idx) + tmp_offset = roof(offset, fctx->stripe_size * + fctx->stripe_count); + else if (i > eof_idx) + tmp_offset = floor(offset, fctx->stripe_size * + fctx->stripe_count); + else + tmp_offset = offset; + + dest_offset = coalesced_offset(tmp_offset, + fctx->stripe_size, fctx->stripe_count); + } else { + dest_offset = offset; + } + + STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i], + fctx->xl_array[i]->fops->truncate, loc, dest_offset, + NULL); + } - if (!send_fop_to_all) { - STACK_WIND (frame, stripe_common_buf_cbk, trav->xlator, - trav->xlator->fops->truncate, loc, offset); - } else { - /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); - if (!local) { - op_errno = ENOMEM; - goto err; + return 0; +err: + STRIPE_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + + +int32_t +stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + struct iatt *preop, struct iatt *postop, dict_t *xdata) +{ + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "%s returned error %s", + prev->this->name, strerror (op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || + (prev->this == FIRST_CHILD (this))) + local->failed = 1; } - local->op_ret = -1; - frame->local = local; - local->inode = loc->inode; - local->call_count = priv->child_count; - - while (trav) { - STACK_WIND (frame, stripe_stack_unwind_buf_cbk, - trav->xlator, trav->xlator->fops->truncate, - loc, offset); - trav = trav->next; + + if (op_ret == 0) { + local->op_ret = 0; + + if (FIRST_CHILD(this) == prev->this) { + local->pre_buf = *preop; + local->post_buf = *postop; + } + + local->prebuf_blocks += preop->ia_blocks; + local->postbuf_blocks += postop->ia_blocks; + + correct_file_size(preop, local->fctx, prev); + correct_file_size(postop, local->fctx, prev); + + if (local->prebuf_size < preop->ia_size) + local->prebuf_size = preop->ia_size; + if (local->postbuf_size < postop->ia_size) + local->postbuf_size = postop->ia_size; } } + UNLOCK (&frame->lock); - return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + if (local->op_ret != -1) { + local->pre_buf.ia_blocks = local->prebuf_blocks; + local->pre_buf.ia_size = local->prebuf_size; + local->post_buf.ia_blocks = local->postbuf_blocks; + local->post_buf.ia_size = local->postbuf_size; + } + + STRIPE_STACK_UNWIND (setattr, frame, local->op_ret, + local->op_errno, &local->pre_buf, + &local->post_buf, NULL); + } +out: return 0; } -/** - * stripe_utimens - - */ -int32_t -stripe_utimens (call_frame_t *frame, xlator_t *this, loc_t *loc, - struct timespec tv[2]) +int32_t +stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, + struct iatt *stbuf, int32_t valid, dict_t *xdata) { - int send_fop_to_all = 0; xlator_list_t *trav = NULL; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; - int32_t op_errno = 1; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = EINVAL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -785,46 +821,175 @@ stripe_utimens (call_frame_t *frame, xlator_t *this, loc_t *loc, goto err; } - if (S_ISDIR (loc->inode->st_mode) || S_ISREG (loc->inode->st_mode)) - send_fop_to_all = 1; + /* Initialization */ + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + if (!IA_ISDIR (loc->inode->ia_type) && + !IA_ISREG (loc->inode->ia_type)) { + local->call_count = 1; + STACK_WIND (frame, stripe_setattr_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->setattr, + loc, stbuf, valid, NULL); + return 0; + } - if (!send_fop_to_all) { - STACK_WIND (frame, stripe_common_buf_cbk, trav->xlator, - trav->xlator->fops->utimens, loc, tv); - } else { - /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->inode = loc->inode; - local->call_count = priv->child_count; - - while (trav) { - STACK_WIND (frame, stripe_stack_unwind_buf_cbk, - trav->xlator, trav->xlator->fops->utimens, - loc, tv); - trav = trav->next; - } + if (IA_ISREG(loc->inode->ia_type)) { + inode_ctx_get(loc->inode, this, (uint64_t *) &fctx); + if (!fctx) + goto err; + local->fctx = fctx; + } + + local->call_count = priv->child_count; + while (trav) { + STACK_WIND (frame, stripe_setattr_cbk, + trav->xlator, trav->xlator->fops->setattr, + loc, stbuf, valid, NULL); + trav = trav->next; } return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +err: + STRIPE_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL); return 0; } -int32_t +int32_t +stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + struct iatt *stbuf, int32_t valid, dict_t *xdata) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + xlator_list_t *trav = NULL; + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + priv = this->private; + trav = this->children; + + /* Initialization */ + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; + + while (trav) { + STACK_WIND (frame, stripe_setattr_cbk, trav->xlator, + trav->xlator->fops->fsetattr, fd, stbuf, valid, NULL); + trav = trav->next; + } + + return 0; +err: + STRIPE_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + +int32_t +stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) +{ + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "%s returned error %s", + prev->this->name, strerror (op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || + (prev->this == FIRST_CHILD (this))) + local->failed = 1; + } + + if (op_ret == 0) { + local->op_ret = 0; + + local->stbuf.ia_blocks += buf->ia_blocks; + local->preparent.ia_blocks += preoldparent->ia_blocks; + local->postparent.ia_blocks += postoldparent->ia_blocks; + local->pre_buf.ia_blocks += prenewparent->ia_blocks; + local->post_buf.ia_blocks += postnewparent->ia_blocks; + + correct_file_size(buf, local->fctx, prev); + + if (local->stbuf.ia_size < buf->ia_size) + local->stbuf.ia_size = buf->ia_size; + + if (local->preparent.ia_size < preoldparent->ia_size) + local->preparent.ia_size = preoldparent->ia_size; + + if (local->postparent.ia_size < postoldparent->ia_size) + local->postparent.ia_size = postoldparent->ia_size; + + if (local->pre_buf.ia_size < prenewparent->ia_size) + local->pre_buf.ia_size = prenewparent->ia_size; + + if (local->post_buf.ia_size < postnewparent->ia_size) + local->post_buf.ia_size = postnewparent->ia_size; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + STRIPE_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, + &local->stbuf, &local->preparent, + &local->postparent, &local->pre_buf, + &local->post_buf, NULL); + } +out: + return 0; +} + +int32_t stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct stat *buf) + int32_t op_ret, int32_t op_errno, struct iatt *buf, + struct iatt *preoldparent, struct iatt *postoldparent, + struct iatt *prenewparent, struct iatt *postnewparent, + dict_t *xdata) { stripe_local_t *local = NULL; xlator_list_t *trav = NULL; + if (!this || !frame || !frame->local) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + op_errno = EINVAL; + goto unwind; + } + if (op_ret == -1) { goto unwind; } @@ -832,34 +997,39 @@ stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local = frame->local; trav = this->children; + local->stbuf = *buf; + local->preparent = *preoldparent; + local->postparent = *postoldparent; + local->pre_buf = *prenewparent; + local->post_buf = *postnewparent; + local->op_ret = 0; - local->stbuf = *buf; local->call_count--; - trav = trav->next; /* Skip first child */ + trav = trav->next; /* Skip first child */ while (trav) { - STACK_WIND (frame, stripe_stack_unwind_buf_cbk, + STACK_WIND (frame, stripe_stack_rename_cbk, trav->xlator, trav->xlator->fops->rename, - &local->loc, &local->loc2); + &local->loc, &local->loc2, NULL); trav = trav->next; } return 0; - unwind: - STACK_UNWIND (frame, op_ret, op_errno, buf); +unwind: + STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, buf, preoldparent, + postoldparent, prenewparent, postnewparent, NULL); return 0; } -/** - * stripe_rename - - */ + int32_t -stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, - loc_t *newloc) +stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, + loc_t *newloc, dict_t *xdata) { stripe_private_t *priv = NULL; stripe_local_t *local = NULL; xlator_list_t *trav = NULL; - int32_t op_errno = 1; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = EINVAL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -878,101 +1048,131 @@ stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, } /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } local->op_ret = -1; - local->inode = oldloc->inode; loc_copy (&local->loc, oldloc); loc_copy (&local->loc2, newloc); local->call_count = priv->child_count; - + + if (IA_ISREG(oldloc->inode->ia_type)) { + inode_ctx_get(oldloc->inode, this, (uint64_t *) &fctx); + if (!fctx) + goto err; + local->fctx = fctx; + } + frame->local = local; STACK_WIND (frame, stripe_first_rename_cbk, trav->xlator, - trav->xlator->fops->rename, oldloc, newloc); + trav->xlator->fops->rename, oldloc, newloc, NULL); return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +err: + STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, + NULL, NULL, NULL); return 0; } - - -/** - * stripe_access - - */ int32_t -stripe_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask) +stripe_first_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t op_errno = 1; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - STACK_WIND (frame, stripe_common_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->access, loc, mask); + prev = cookie; + local = frame->local; + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "%s returned %s", + prev->this->name, strerror (op_errno)); + goto out; + } + local->op_ret = 0; + local->preparent = *preparent; + local->postparent = *postparent; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + STRIPE_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, xdata); return 0; - err: - STACK_UNWIND (frame, -1, op_errno); +out: + STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } -/** - * stripe_readlink_cbk - - */ -int32_t -stripe_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, const char *path) -{ - STACK_UNWIND (frame, op_ret, op_errno, path); - return 0; -} -/** - * stripe_readlink - - */ int32_t -stripe_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size) +stripe_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t op_errno = 1; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; - STACK_WIND (frame, stripe_readlink_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->readlink, loc, size); + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "%s returned %s", + prev->this->name, strerror (op_errno)); + local->op_errno = op_errno; + if (op_errno != ENOENT) { + local->failed = 1; + local->op_ret = op_ret; + } + } + } + UNLOCK (&frame->lock); + if (callcnt == 1) { + if (local->failed) { + op_errno = local->op_errno; + goto out; + } + STACK_WIND(frame, stripe_first_unlink_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->unlink, &local->loc, + local->xflag, local->xdata); + } return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +out: + STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); + return 0; } - -/** - * stripe_unlink - - */ int32_t -stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) +stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, + int xflag, dict_t *xdata) { - int send_fop_to_all = 0; xlator_list_t *trav = NULL; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; - int32_t op_errno = 1; + int32_t op_errno = EINVAL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -987,82 +1187,132 @@ stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc) op_errno = ENOTCONN; goto err; } - - if (S_ISREG (loc->inode->st_mode)) - send_fop_to_all = 1; - if (!send_fop_to_all) { - STACK_WIND (frame, stripe_common_cbk, trav->xlator, - trav->xlator->fops->unlink, loc); - } else { - /* Don't unlink a file if a node is down */ - if (priv->nodes_down) { - op_errno = ENOTCONN; - goto err; - } + /* Don't unlink a file if a node is down */ + if (priv->nodes_down) { + op_errno = ENOTCONN; + goto err; + } - /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - while (trav) { - STACK_WIND (frame, stripe_stack_unwind_cbk, - trav->xlator, trav->xlator->fops->unlink, - loc); - trav = trav->next; - } + /* Initialization */ + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + loc_copy (&local->loc, loc); + local->xflag = xflag; + + if (xdata) + local->xdata = dict_ref (xdata); + + frame->local = local; + local->call_count = priv->child_count; + trav = trav->next; /* Skip the first child */ + + while (trav) { + STACK_WIND (frame, stripe_unlink_cbk, + trav->xlator, trav->xlator->fops->unlink, + loc, xflag, xdata); + trav = trav->next; } return 0; - err: - STACK_UNWIND (frame, -1, op_errno); +err: + STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL); return 0; } -int32_t +int32_t stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) + int32_t op_ret, int32_t op_errno,struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - xlator_list_t *trav = NULL; stripe_local_t *local = NULL; + if (!this || !frame || !frame->local) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + op_errno = EINVAL; + goto err; + } + if (op_ret == -1) { - STACK_UNWIND (frame, op_ret, op_errno); - return 0; + goto err; } - trav = this->children; local = frame->local; + local->op_ret = 0; local->call_count--; /* First child successful */ - trav = trav->next; /* Skip first child */ - while (trav) { - STACK_WIND (frame, stripe_stack_unwind_cbk, trav->xlator, - trav->xlator->fops->rmdir, &local->loc); - trav = trav->next; + local->preparent = *preparent; + local->postparent = *postparent; + local->preparent_size = preparent->ia_size; + local->postparent_size = postparent->ia_size; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + STRIPE_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno, + &local->preparent, &local->postparent, xdata); + return 0; +err: + STRIPE_STACK_UNWIND (rmdir, frame, op_ret, op_errno, NULL, NULL, NULL); + return 0; + +} + +int32_t +stripe_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "%s returned %s", + prev->this->name, strerror (op_errno)); + if (op_errno != ENOENT) + local->failed = 1; + } } + UNLOCK (&frame->lock); + if (callcnt == 1) { + if (local->failed) + goto out; + STACK_WIND (frame, stripe_first_rmdir_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->rmdir, &local->loc, + local->flags, NULL); + } + return 0; +out: + STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL); return 0; } -/** - * stripe_rmdir - - */ int32_t -stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc) +stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata) { xlator_list_t *trav = NULL; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; - int32_t op_errno = 1; + int32_t op_errno = EINVAL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -1080,68 +1330,45 @@ stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc) } /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } local->op_ret = -1; frame->local = local; - local->inode = loc->inode; loc_copy (&local->loc, loc); + local->flags = flags; local->call_count = priv->child_count; - - STACK_WIND (frame, stripe_first_rmdir_cbk, trav->xlator, - trav->xlator->fops->rmdir, loc); - - return 0; - err: - STACK_UNWIND (frame, -1, op_errno); - return 0; -} - - -/** - * stripe_setxattr - - */ -int32_t -stripe_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - dict_t *dict, int32_t flags) -{ - stripe_private_t *priv = NULL; - int32_t op_errno = 1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); + trav = trav->next; /* skip the first child */ - priv = this->private; - - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; + while (trav) { + STACK_WIND (frame, stripe_rmdir_cbk, trav->xlator, + trav->xlator->fops->rmdir, loc, flags, NULL); + trav = trav->next; } - STACK_WIND (frame, stripe_common_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->setxattr, loc, dict, flags); - return 0; - err: - STACK_UNWIND (frame, -1, op_errno); +err: + STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL); return 0; } -int32_t +int32_t stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno) + int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; + if (!this || !frame || !frame->local) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + local = frame->local; LOCK (&frame->lock); @@ -1151,11 +1378,11 @@ stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie, UNLOCK (&frame->lock); if (!callcnt) { - loc_wipe (&local->loc); - STACK_UNWIND (frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf); + STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, + &local->preparent, &local->postparent, NULL); } - +out: return 0; } @@ -1165,23 +1392,31 @@ stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie, int32_t stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno) + int32_t op_errno, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; xlator_list_t *trav = NULL; + call_frame_t *prev = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + priv = this->private; + local = frame->local; - priv = this->private; LOCK (&frame->lock); { callcnt = --local->call_count; - + if (op_ret == -1) { gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", - ((call_frame_t *)cookie)->this->name, - strerror (op_errno)); + prev->this->name, strerror (op_errno)); local->op_ret = -1; local->op_errno = op_errno; } @@ -1196,142 +1431,239 @@ stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie, stripe_mknod_ifreg_fail_unlink_cbk, trav->xlator, trav->xlator->fops->unlink, - &local->loc); + &local->loc, 0, NULL); trav = trav->next; } return 0; } - loc_wipe (&local->loc); - STACK_UNWIND (frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf); + STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, + &local->preparent, &local->postparent, NULL); } +out: return 0; } -/** - */ int32_t stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, inode_t *inode, - struct stat *buf) + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int ret = 0; int32_t callcnt = 0; stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; stripe_private_t *priv = NULL; + call_frame_t *prev = NULL; + xlator_list_t *trav = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - priv = this->private; + prev = cookie; + priv = this->private; local = frame->local; LOCK (&frame->lock); { callcnt = --local->call_count; - + if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", - ((call_frame_t *)cookie)->this->name, - strerror (op_errno)); - local->failed = 1; + prev->this->name, strerror (op_errno)); + if ((op_errno != ENOENT) || + (prev->this == FIRST_CHILD (this))) + local->failed = 1; local->op_errno = op_errno; } - if (op_ret >= 0) { local->op_ret = op_ret; - /* Get the mapping in inode private */ - /* Get the stat buf right */ - if (local->stbuf.st_blksize == 0) { - local->stbuf = *buf; - /* Because st_blocks gets added again */ - local->stbuf.st_blocks = 0; - } - /* Always, pass the inode number of first child - to the above layer */ - if (FIRST_CHILD(this) == - ((call_frame_t *)cookie)->this) - local->stbuf.st_ino = buf->st_ino; - - local->stbuf.st_blocks += buf->st_blocks; - if (local->stbuf.st_size < buf->st_size) - local->stbuf.st_size = buf->st_size; - if (local->stbuf.st_blksize != buf->st_blksize) { - /* TODO: add to blocks in terms of - original block size */ - } + /* Can be used as a mechanism to understand if mknod + was successful in at least one place */ + if (uuid_is_null (local->ia_gfid)) + uuid_copy (local->ia_gfid, buf->ia_gfid); + + if (stripe_ctx_handle(this, prev, local, xdata)) + gf_log(this->name, GF_LOG_ERROR, + "Error getting fctx info from dict"); + + local->stbuf_blocks += buf->ia_blocks; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + correct_file_size(buf, local->fctx, prev); + + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + if (local->preparent_size < preparent->ia_size) + local->preparent_size = preparent->ia_size; + if (local->postparent_size < postparent->ia_size) + local->postparent_size = postparent->ia_size; } } UNLOCK (&frame->lock); if (!callcnt) { - if (local->failed) + if (local->failed) local->op_ret = -1; - if ((local->op_ret != -1) && priv->xattr_supported) { - /* Send a setxattr request to nodes where the - files are created */ - int32_t index = 0; - char size_key[256] = {0,}; - char index_key[256] = {0,}; - char count_key[256] = {0,}; - dict_t *dict = NULL; - - trav = this->children; - sprintf (size_key, - "trusted.%s.stripe-size", this->name); - sprintf (count_key, - "trusted.%s.stripe-count", this->name); - sprintf (index_key, - "trusted.%s.stripe-index", this->name); - + if ((local->op_ret == -1) && !uuid_is_null (local->ia_gfid)) { + /* ia_gfid set means, at least on one node 'mknod' + is successful */ local->call_count = priv->child_count; - + trav = this->children; while (trav) { - dict = get_new_dict (); - dict_ref (dict); - /* TODO: check return value */ - ret = dict_set_int64 (dict, size_key, - local->stripe_size); - ret = dict_set_int32 (dict, count_key, - priv->child_count); - ret = dict_set_int32 (dict, index_key, index); - STACK_WIND (frame, - stripe_mknod_ifreg_setxattr_cbk, + stripe_mknod_ifreg_fail_unlink_cbk, trav->xlator, - trav->xlator->fops->setxattr, - &local->loc, dict, 0); - - dict_unref (dict); - index++; + trav->xlator->fops->unlink, + &local->loc, 0, NULL); trav = trav->next; } + return 0; + } + + + if (local->op_ret != -1) { + local->preparent.ia_blocks = local->preparent_blocks; + local->preparent.ia_size = local->preparent_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; + inode_ctx_put (local->inode, this, + (uint64_t)(long) local->fctx); + + } + STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno, + local->inode, &local->stbuf, + &local->preparent, &local->postparent, NULL); + } +out: + return 0; +} + + +int32_t +stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + call_frame_t *prev = NULL; + xlator_list_t *trav = NULL; + int i = 1; + dict_t *dict = NULL; + int ret = 0; + int need_unref = 0; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + priv = this->private; + local = frame->local; + trav = this->children; + + local->call_count--; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror (op_errno)); + local->failed = 1; + local->op_errno = op_errno; + goto out; + } + + local->op_ret = op_ret; + + local->stbuf = *buf; + local->preparent = *preparent; + local->postparent = *postparent; + + if (uuid_is_null (local->ia_gfid)) + uuid_copy (local->ia_gfid, buf->ia_gfid); + local->preparent.ia_blocks = local->preparent_blocks; + local->preparent.ia_size = local->preparent_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; + + trav = trav->next; + while (trav) { + if (priv->xattr_supported) { + dict = dict_new (); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, + "failed to allocate dict %s", local->loc.path); + } + need_unref = 1; + + dict_copy (local->xattr, dict); + + ret = stripe_xattr_request_build (this, dict, + local->stripe_size, + priv->child_count, i, + priv->coalesce); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Failed to build xattr request"); + } else { - /* Create itself has failed.. so return - without setxattring */ - loc_wipe (&local->loc); - STACK_UNWIND (frame, local->op_ret, local->op_errno, - local->inode, &local->stbuf); + dict = local->xattr; } + + STACK_WIND (frame, stripe_mknod_ifreg_cbk, + trav->xlator, trav->xlator->fops->mknod, + &local->loc, local->mode, local->rdev, 0, dict); + trav = trav->next; + i++; + + if (dict && need_unref) + dict_unref (dict); } - + return 0; + +out: + + STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, NULL); + return 0; } -/** - * stripe_mknod - - */ int32_t +stripe_single_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf, + preparent, postparent, xdata); + return 0; +} + + +int stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, - dev_t rdev) + dev_t rdev, mode_t umask, dict_t *xdata) { - stripe_private_t *priv = NULL; - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = 1; + stripe_private_t *priv = NULL; + stripe_local_t *local = NULL; + int32_t op_errno = EINVAL; + int32_t i = 0; + dict_t *dict = NULL; + int ret = 0; + int need_unref = 0; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -1340,71 +1672,216 @@ stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, VALIDATE_OR_GOTO (loc->inode, err); priv = this->private; - trav = this->children; - + if (priv->first_child_down) { op_errno = ENOTCONN; goto err; } if (S_ISREG(mode)) { - /* NOTE: on older kernels (older than 2.6.9), - creat() fops is sent as mknod() + open(). Hence handling + /* NOTE: on older kernels (older than 2.6.9), + creat() fops is sent as mknod() + open(). Hence handling S_IFREG files is necessary */ if (priv->nodes_down) { - gf_log (this->name, GF_LOG_WARNING, + gf_log (this->name, GF_LOG_WARNING, "Some node down, returning EIO"); op_errno = EIO; goto err; } - + /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } local->op_ret = -1; local->op_errno = ENOTCONN; - local->stripe_size = stripe_get_matching_bs (loc->path, - priv->pattern, - priv->block_size); + local->stripe_size = stripe_get_matching_bs (loc->path, priv); frame->local = local; - local->inode = loc->inode; + local->inode = inode_ref (loc->inode); loc_copy (&local->loc, loc); + local->xattr = dict_copy_with_ref (xdata, NULL); + local->mode = mode; + local->umask = umask; + local->rdev = rdev; /* Everytime in stripe lookup, all child nodes should be looked up */ local->call_count = priv->child_count; - - while (trav) { - STACK_WIND (frame, stripe_mknod_ifreg_cbk, - trav->xlator, trav->xlator->fops->mknod, - loc, mode, rdev); - trav = trav->next; + + if (priv->xattr_supported) { + dict = dict_new (); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, + "failed to allocate dict %s", loc->path); + } + need_unref = 1; + + dict_copy (xdata, dict); + + ret = stripe_xattr_request_build (this, dict, + local->stripe_size, + priv->child_count, + i, priv->coalesce); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "failed to build xattr request"); + } else { + dict = xdata; } - /* This case is handled, no need to continue further. */ - return 0; - } + STACK_WIND (frame, stripe_mknod_first_ifreg_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod, + loc, mode, rdev, umask, dict); + if (dict && need_unref) + dict_unref (dict); + return 0; + } - STACK_WIND (frame, stripe_common_inode_cbk, + STACK_WIND (frame, stripe_single_mknod_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod, - loc, mode, rdev); + loc, mode, rdev, umask, xdata); return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL, NULL); +err: + STRIPE_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); + return 0; +} + + +int32_t +stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "%s returned error %s", + prev->this->name, strerror (op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || + (prev->this == FIRST_CHILD (this))) + local->failed = 1; + } + + if (op_ret >= 0) { + local->op_ret = 0; + + local->stbuf_blocks += buf->ia_blocks; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + if (local->preparent_size < preparent->ia_size) + local->preparent_size = preparent->ia_size; + if (local->postparent_size < postparent->ia_size) + local->postparent_size = postparent->ia_size; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed != -1) { + local->preparent.ia_blocks = local->preparent_blocks; + local->preparent.ia_size = local->preparent_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; + } + STRIPE_STACK_UNWIND (mkdir, frame, local->op_ret, + local->op_errno, local->inode, + &local->stbuf, &local->preparent, + &local->postparent, NULL); + } +out: return 0; } -/** - * stripe_mkdir - - */ int32_t -stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode) +stripe_first_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) +{ + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + xlator_list_t *trav = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + trav = this->children; + + local->call_count--; /* first child is successful */ + trav = trav->next; /* skip first child */ + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror (op_errno)); + local->op_errno = op_errno; + goto out; + } + + local->op_ret = 0; + + local->inode = inode_ref (inode); + local->stbuf = *buf; + local->postparent = *postparent; + local->preparent = *preparent; + + local->stbuf_blocks += buf->ia_blocks; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + local->stbuf_size = buf->ia_size; + local->preparent_size = preparent->ia_size; + local->postparent_size = postparent->ia_size; + + while (trav) { + STACK_WIND (frame, stripe_mkdir_cbk, trav->xlator, + trav->xlator->fops->mkdir, &local->loc, local->mode, + local->umask, local->xdata); + trav = trav->next; + } + return 0; +out: + STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, + NULL, NULL); + + return 0; + +} + + +int +stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode, + mode_t umask, dict_t *xdata) { stripe_private_t *priv = NULL; stripe_local_t *local = NULL; @@ -1419,72 +1896,130 @@ stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode) priv = this->private; trav = this->children; - + if (priv->first_child_down) { op_errno = ENOTCONN; goto err; } /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } local->op_ret = -1; local->call_count = priv->child_count; + if (xdata) + local->xdata = dict_ref (xdata); + local->mode = mode; + local->umask = umask; + loc_copy (&local->loc, loc); frame->local = local; /* Everytime in stripe lookup, all child nodes should be looked up */ - while (trav) { - STACK_WIND (frame, stripe_stack_unwind_inode_cbk, - trav->xlator, trav->xlator->fops->mkdir, - loc, mode); - trav = trav->next; - } + STACK_WIND (frame, stripe_first_mkdir_cbk, trav->xlator, + trav->xlator->fops->mkdir, loc, mode, umask, xdata); return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL, NULL); +err: + STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); return 0; } -/** - * stripe_symlink - - */ int32_t -stripe_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath, - loc_t *loc) +stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, inode_t *inode, + struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t op_errno = 1; - stripe_private_t *priv = NULL; - - priv = this->private; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + stripe_fd_ctx_t *fctx = NULL; - if (priv->first_child_down) { - op_errno = ENOTCONN; - goto err; + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; } - /* send symlink to only first node */ - STACK_WIND (frame, stripe_common_inode_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->symlink, linkpath, loc); + prev = cookie; + local = frame->local; - return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL, NULL); + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "%s returned error %s", + prev->this->name, strerror (op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || + (prev->this == FIRST_CHILD (this))) + local->failed = 1; + } + + if (op_ret >= 0) { + local->op_ret = 0; + + if (IA_ISREG(inode->ia_type)) { + inode_ctx_get(inode, this, (uint64_t *) &fctx); + if (!fctx) { + gf_log(this->name, GF_LOG_ERROR, + "failed to get stripe context"); + op_ret = -1; + op_errno = EINVAL; + } + } + + if (FIRST_CHILD(this) == prev->this) { + local->inode = inode_ref (inode); + local->stbuf = *buf; + local->postparent = *postparent; + local->preparent = *preparent; + } + local->stbuf_blocks += buf->ia_blocks; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + correct_file_size(buf, fctx, prev); + + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + if (local->preparent_size < preparent->ia_size) + local->preparent_size = preparent->ia_size; + if (local->postparent_size < postparent->ia_size) + local->postparent_size = postparent->ia_size; + } + } + UNLOCK (&frame->lock); + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + if (local->op_ret != -1) { + local->preparent.ia_blocks = local->preparent_blocks; + local->preparent.ia_size = local->preparent_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; + } + STRIPE_STACK_UNWIND (link, frame, local->op_ret, + local->op_errno, local->inode, + &local->stbuf, &local->preparent, + &local->postparent, NULL); + } +out: return 0; } -/** - * stripe_link - - */ int32_t -stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) +stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata) { - int send_fop_to_all = 0; xlator_list_t *trav = NULL; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; @@ -1505,49 +2040,45 @@ stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc) goto err; } - if (S_ISREG (oldloc->inode->st_mode)) - send_fop_to_all = 1; + /* Initialization */ + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + local->call_count = priv->child_count; - if (!send_fop_to_all) { - STACK_WIND (frame, stripe_common_inode_cbk, + /* Everytime in stripe lookup, all child + nodes should be looked up */ + while (trav) { + STACK_WIND (frame, stripe_link_cbk, trav->xlator, trav->xlator->fops->link, - oldloc, newloc); - } else { - /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - /* Everytime in stripe lookup, all child - nodes should be looked up */ - while (trav) { - STACK_WIND (frame, stripe_stack_unwind_inode_cbk, - trav->xlator, trav->xlator->fops->link, - oldloc, newloc); - trav = trav->next; - } + oldloc, newloc, NULL); + trav = trav->next; } return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL, NULL); +err: + STRIPE_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL); return 0; } -int32_t +int32_t stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, - int32_t op_errno) + int32_t op_errno, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { int32_t callcnt = 0; - fd_t *lfd = NULL; stripe_local_t *local = NULL; + if (!this || !frame || !frame->local) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + local = frame->local; LOCK (&frame->lock); @@ -1557,48 +2088,78 @@ stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie, UNLOCK (&frame->lock); if (!callcnt) { - lfd = local->fd; - loc_wipe (&local->loc); - STACK_UNWIND (frame, local->op_ret, local->op_errno, - local->fd, local->inode, &local->stbuf); - fd_unref (lfd); + STRIPE_STACK_UNWIND (create, frame, local->op_ret, local->op_errno, + local->fd, local->inode, &local->stbuf, + &local->preparent, &local->postparent, NULL); } +out: return 0; } -/** - * stripe_create_setxattr_cbk - - */ int32_t -stripe_create_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno) +stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, + inode_t *inode, struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - fd_t *lfd = NULL; + int32_t callcnt = 0; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; + call_frame_t *prev = NULL; xlator_list_t *trav = NULL; - int32_t callcnt = 0; - priv = this->private; + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + priv = this->private; local = frame->local; LOCK (&frame->lock); { callcnt = --local->call_count; - + if (op_ret == -1) { gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", - ((call_frame_t *)cookie)->this->name, - strerror (op_errno)); - local->op_ret = -1; + prev->this->name, strerror (op_errno)); + local->failed = 1; local->op_errno = op_errno; } + + if (op_ret >= 0) { + if (IA_ISREG(buf->ia_type)) { + if (stripe_ctx_handle(this, prev, local, xdata)) + gf_log(this->name, GF_LOG_ERROR, + "Error getting fctx info from " + "dict"); + } + + local->op_ret = op_ret; + + local->stbuf_blocks += buf->ia_blocks; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + correct_file_size(buf, local->fctx, prev); + + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + if (local->preparent_size < preparent->ia_size) + local->preparent_size = preparent->ia_size; + if (local->postparent_size < postparent->ia_size) + local->postparent_size = postparent->ia_size; + } } UNLOCK (&frame->lock); if (!callcnt) { + if (local->failed) + local->op_ret = -1; + if (local->op_ret == -1) { local->call_count = priv->child_count; trav = this->children; @@ -1607,165 +2168,180 @@ stripe_create_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, stripe_create_fail_unlink_cbk, trav->xlator, trav->xlator->fops->unlink, - &local->loc); + &local->loc, 0, NULL); trav = trav->next; } - + return 0; } - lfd = local->fd; - loc_wipe (&local->loc); - STACK_UNWIND (frame, local->op_ret, local->op_errno, - local->fd, local->inode, &local->stbuf); - fd_unref (lfd); + if (local->op_ret >= 0) { + local->preparent.ia_blocks = local->preparent_blocks; + local->preparent.ia_size = local->preparent_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; + + stripe_copy_xl_array(local->fctx->xl_array, + priv->xl_array, + local->fctx->stripe_count); + inode_ctx_put(local->inode, this, + (uint64_t) local->fctx); + } + + /* Create itself has failed.. so return + without setxattring */ + STRIPE_STACK_UNWIND (create, frame, local->op_ret, + local->op_errno, local->fd, + local->inode, &local->stbuf, + &local->preparent, &local->postparent, NULL); } +out: return 0; } -/** - * stripe_create_cbk - - */ + + int32_t -stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, fd_t *fd, - inode_t *inode, struct stat *buf) + inode_t *inode, struct iatt *buf, struct iatt *preparent, + struct iatt *postparent, dict_t *xdata) { - int32_t callcnt = 0; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; - fd_t *lfd = NULL; - stripe_fd_ctx_t *fctx = NULL; + call_frame_t *prev = NULL; + xlator_list_t *trav = NULL; + int i = 1; + dict_t *dict = NULL; + loc_t *loc = NULL; + int32_t need_unref = 0; + int32_t ret = -1; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + prev = cookie; priv = this->private; local = frame->local; + trav = this->children; + loc = &local->loc; - LOCK (&frame->lock); - { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - ((call_frame_t *)cookie)->this->name, - strerror (op_errno)); - local->failed = 1; - local->op_errno = op_errno; - } - - if (op_ret >= 0) { - local->op_ret = op_ret; - /* Get the mapping in inode private */ - /* Get the stat buf right */ - if (local->stbuf.st_blksize == 0) { - local->stbuf = *buf; - /* Because st_blocks gets added again */ - local->stbuf.st_blocks = 0; - } - - /* Always, pass the inode number of first - child to the above layer */ - if (FIRST_CHILD(this) == - ((call_frame_t *)cookie)->this) - local->stbuf.st_ino = buf->st_ino; - - local->stbuf.st_blocks += buf->st_blocks; - if (local->stbuf.st_size < buf->st_size) - local->stbuf.st_size = buf->st_size; - if (local->stbuf.st_blksize != buf->st_blksize) { - /* TODO: add to blocks in terms of - original block size */ - } - } + --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", + prev->this->name, strerror (op_errno)); + local->failed = 1; + local->op_errno = op_errno; } - UNLOCK (&frame->lock); - if (!callcnt) { - if (local->failed) - local->op_ret = -1; + local->op_ret = 0; + /* Get the mapping in inode private */ + /* Get the stat buf right */ + local->stbuf = *buf; + local->preparent = *preparent; + local->postparent = *postparent; + + local->stbuf_blocks += buf->ia_blocks; + local->preparent_blocks += preparent->ia_blocks; + local->postparent_blocks += postparent->ia_blocks; + + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + if (local->preparent_size < preparent->ia_size) + local->preparent_size = preparent->ia_size; + if (local->postparent_size < postparent->ia_size) + local->postparent_size = postparent->ia_size; + + if (local->failed) + local->op_ret = -1; - /* */ - if (local->op_ret >= 0) { - fctx = CALLOC (1, sizeof (stripe_fd_ctx_t)); - if (fctx) { - fctx->stripe_size = local->stripe_size; - fctx->stripe_count = priv->child_count; - fctx->static_array = 1; - fctx->xl_array = priv->xl_array; - fd_ctx_set (local->fd, this, - (uint64_t)(long)fctx); - } - } + if (local->op_ret == -1) { + local->call_count = 1; + STACK_WIND (frame, stripe_create_fail_unlink_cbk, + FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink, + &local->loc, 0, NULL); + return 0; + } - if ((local->op_ret != -1) && - local->stripe_size && priv->xattr_supported) { - /* Send a setxattr request to nodes where - the files are created */ - int ret = 0; - int32_t i = 0; - char size_key[256] = {0,}; - char index_key[256] = {0,}; - char count_key[256] = {0,}; - dict_t *dict = NULL; - - sprintf (size_key, - "trusted.%s.stripe-size", this->name); - sprintf (count_key, - "trusted.%s.stripe-count", this->name); - sprintf (index_key, - "trusted.%s.stripe-index", this->name); + if (local->op_ret >= 0) { + local->preparent.ia_blocks = local->preparent_blocks; + local->preparent.ia_size = local->preparent_size; + local->postparent.ia_blocks = local->postparent_blocks; + local->postparent.ia_size = local->postparent_size; + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; + } - local->call_count = priv->child_count; - - for (i = 0; i < priv->child_count; i++) { - dict = get_new_dict (); - dict_ref (dict); - - /* TODO: check return values */ - ret = dict_set_int64 (dict, size_key, - local->stripe_size); - ret = dict_set_int32 (dict, count_key, - priv->child_count); - ret = dict_set_int32 (dict, index_key, i); - - STACK_WIND (frame, stripe_create_setxattr_cbk, - priv->xl_array[i], - priv->xl_array[i]->fops->setxattr, - &local->loc, dict, 0); - - dict_unref (dict); + /* Send a setxattr request to nodes where the + files are created */ + trav = trav->next; + while (trav) { + if (priv->xattr_supported) { + dict = dict_new (); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, + "failed to allocate dict %s", loc->path); } + need_unref = 1; + + dict_copy (local->xattr, dict); + + ret = stripe_xattr_request_build (this, dict, + local->stripe_size, + priv->child_count, + i, priv->coalesce); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "failed to build xattr request"); } else { - /* Create itself has failed.. so return - without setxattring */ - lfd = local->fd; - loc_wipe (&local->loc); - STACK_UNWIND (frame, local->op_ret, local->op_errno, - local->fd, local->inode, &local->stbuf); - - fd_unref (lfd); + dict = local->xattr; } + + STACK_WIND (frame, stripe_create_cbk, trav->xlator, + trav->xlator->fops->create, &local->loc, + local->flags, local->mode, local->umask, local->fd, + dict); + trav = trav->next; + if (need_unref && dict) + dict_unref (dict); + i++; } - + +out: return 0; } + /** - * stripe_create - If a block-size is specified for the 'name', create the + * stripe_create - If a block-size is specified for the 'name', create the * file in all the child nodes. If not, create it in only first child. * * @name- complete path of the file to be created. */ int32_t stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc, - int32_t flags, mode_t mode, fd_t *fd) + int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata) { stripe_private_t *priv = NULL; stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = 1; + int32_t op_errno = EINVAL; + int ret = 0; + int need_unref = 0; + int i = 0; + dict_t *dict = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->path, err); + VALIDATE_OR_GOTO (loc->inode, err); priv = this->private; @@ -1773,121 +2349,86 @@ stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc, flags &= ~O_APPEND; if (priv->first_child_down || priv->nodes_down) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "First node down, returning EIO"); op_errno = EIO; goto err; } /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } local->op_ret = -1; local->op_errno = ENOTCONN; - local->stripe_size = stripe_get_matching_bs (loc->path, - priv->pattern, - priv->block_size); + local->stripe_size = stripe_get_matching_bs (loc->path, priv); frame->local = local; - local->inode = loc->inode; + local->inode = inode_ref (loc->inode); loc_copy (&local->loc, loc); local->fd = fd_ref (fd); + local->flags = flags; + local->mode = mode; + local->umask = umask; + if (xdata) + local->xattr = dict_ref (xdata); local->call_count = priv->child_count; - - trav = this->children; - while (trav) { - STACK_WIND (frame, stripe_create_cbk, trav->xlator, - trav->xlator->fops->create, loc, flags, mode, fd); - trav = trav->next; - } - - return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL); - return 0; -} - -/** - * stripe_open_cbk - - */ -int32_t -stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd) -{ - int32_t callcnt = 0; - stripe_local_t *local = NULL; - fd_t *lfd = NULL; + /* Send a setxattr request to nodes where the + files are created */ - local = frame->local; + if (priv->xattr_supported) { + dict = dict_new (); + if (!dict) { + gf_log (this->name, GF_LOG_ERROR, + "failed to allocate dict %s", loc->path); + } + need_unref = 1; - LOCK (&frame->lock); - { - callcnt = --local->call_count; + dict_copy (xdata, dict); - if (op_ret == -1) { - local->failed = 1; - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - ((call_frame_t *)cookie)->this->name, - strerror (op_errno)); - local->op_ret = -1; - local->op_errno = op_errno; - } - - if (op_ret >= 0) - local->op_ret = op_ret; + ret = stripe_xattr_request_build (this, dict, + local->stripe_size, + priv->child_count, + i, priv->coalesce); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "failed to build xattr request"); + } else { + dict = xdata; } - UNLOCK (&frame->lock); - - if (!callcnt) { - if (local->failed) - local->op_ret = -1; - if (local->op_ret == -1) { - if (local->fctx) { - if (!local->fctx->static_array) - FREE (local->fctx->xl_array); - FREE (local->fctx); - } - } else { - fd_ctx_set (local->fd, this, - (uint64_t)(long)local->fctx); - } - lfd = local->fd; - loc_wipe (&local->loc); - STACK_UNWIND (frame, local->op_ret, local->op_errno, - local->fd); - fd_unref (lfd); + STACK_WIND (frame, stripe_first_create_cbk, FIRST_CHILD (this), + FIRST_CHILD (this)->fops->create, loc, flags, mode, + umask, fd, dict); + + if (need_unref && dict) + dict_unref (dict); - } return 0; +err: + STRIPE_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL, + NULL, NULL, xdata); + return 0; } - -/** - * stripe_getxattr_cbk - - */ int32_t -stripe_open_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *dict) +stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { - int32_t index = 0; - int32_t callcnt = 0; - char key[256] = {0,}; - stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; - data_t *data = NULL; - call_frame_t *prev = NULL; - fd_t *lfd = NULL; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - prev = (call_frame_t *)cookie; - priv = this->private; + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; local = frame->local; LOCK (&frame->lock); @@ -1895,145 +2436,39 @@ stripe_open_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, callcnt = --local->call_count; if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, + + gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", - ((call_frame_t *)cookie)->this->name, - strerror (op_errno)); - local->op_ret = -1; - if (local->op_errno != EIO) - local->op_errno = op_errno; - if (op_errno == ENOTCONN) + prev->this->name, strerror (op_errno)); + if ((op_errno != ENOENT) || + (prev->this == FIRST_CHILD (this))) local->failed = 1; - goto unlock; - } - - if (!local->fctx) { - local->fctx = CALLOC (1, sizeof (stripe_fd_ctx_t)); - if (!local->fctx) { - local->op_errno = ENOMEM; - local->op_ret = -1; - goto unlock; - } - - local->fctx->static_array = 0; - } - /* Stripe block size */ - sprintf (key, "trusted.%s.stripe-size", this->name); - data = dict_get (dict, key); - if (!data) { - local->xattr_self_heal_needed = 1; - } else { - if (!local->fctx->stripe_size) { - local->fctx->stripe_size = - data_to_int64 (data); - } - - if (local->fctx->stripe_size != data_to_int64 (data)) { - gf_log (this->name, GF_LOG_DEBUG, - "stripe-size mismatch in blocks"); - local->xattr_self_heal_needed = 1; - } - } - /* Stripe count */ - sprintf (key, "trusted.%s.stripe-count", this->name); - data = dict_get (dict, key); - if (!data) { - local->xattr_self_heal_needed = 1; - goto unlock; - } - if (!local->fctx->xl_array) { - local->fctx->stripe_count = data_to_int32 (data); - if (!local->fctx->stripe_count) { - gf_log (this->name, GF_LOG_ERROR, - "error with stripe-count xattr"); - local->op_ret = -1; - local->op_errno = EIO; - goto unlock; - } - local->fctx->xl_array = - CALLOC (local->fctx->stripe_count, - sizeof (xlator_t *)); - } - if (local->fctx->stripe_count != data_to_int32 (data)) { - gf_log (this->name, GF_LOG_ERROR, - "error with stripe-count xattr"); - local->op_ret = -1; - local->op_errno = EIO; - goto unlock; + local->op_errno = op_errno; } - /* index */ - sprintf (key, "trusted.%s.stripe-index", this->name); - data = dict_get (dict, key); - if (!data) { - local->xattr_self_heal_needed = 1; - goto unlock; - } - index = data_to_int32 (data); - if (index > priv->child_count) { - gf_log (this->name, GF_LOG_ERROR, - "error with stripe-index xattr"); - local->op_ret = -1; - local->op_errno = EIO; - goto unlock; - } - if (local->fctx->xl_array) - local->fctx->xl_array[index] = prev->this; - local->entry_count++; - local->op_ret = 0; + if (op_ret >= 0) + local->op_ret = op_ret; } - unlock: UNLOCK (&frame->lock); - - if (!callcnt) { - /* TODO: if self-heal flag is set, do it */ - if (local->xattr_self_heal_needed) { - gf_log (this->name, GF_LOG_DEBUG, - "%s: stripe info need to be healed", - local->loc.path); - } - - if (local->op_ret) - goto err; - if (local->entry_count != local->fctx->stripe_count) { - local->op_ret = -1; - local->op_errno = EIO; - goto err; - } - if (!local->fctx->stripe_size) { + if (!callcnt) { + if (local->failed) local->op_ret = -1; - local->op_errno = EIO; - goto err; - } - - local->call_count = local->fctx->stripe_count; - trav = this->children; - while (trav) { - STACK_WIND (frame, stripe_open_cbk, trav->xlator, - trav->xlator->fops->open, &local->loc, - local->flags, local->fd); - trav = trav->next; - } + STRIPE_STACK_UNWIND (open, frame, local->op_ret, + local->op_errno, local->fd, xdata); } - - return 0; - err: - lfd = local->fd; - loc_wipe (&local->loc); - STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd); - fd_unref (lfd); - +out: return 0; } + /** - * stripe_open - + * stripe_open - */ int32_t stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc, - int32_t flags, fd_t *fd) + int32_t flags, fd_t *fd, dict_t *xdata) { stripe_local_t *local = NULL; stripe_private_t *priv = NULL; @@ -2055,7 +2490,7 @@ stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc, } /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -2066,97 +2501,76 @@ stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc, local->fd = fd_ref (fd); frame->local = local; - local->inode = loc->inode; loc_copy (&local->loc, loc); /* Striped files */ local->flags = flags; local->call_count = priv->child_count; - local->stripe_size = stripe_get_matching_bs (loc->path, - priv->pattern, - priv->block_size); - - if (priv->xattr_supported) { - while (trav) { - STACK_WIND (frame, stripe_open_getxattr_cbk, - trav->xlator, trav->xlator->fops->getxattr, - loc, NULL); - trav = trav->next; - } - } else { - local->fctx = CALLOC (1, sizeof (stripe_fd_ctx_t)); - if (!local->fctx) { - op_errno = ENOMEM; - goto err; - } - - local->fctx->static_array = 1; - local->fctx->stripe_size = local->stripe_size; - local->fctx->stripe_count = priv->child_count; - local->fctx->xl_array = priv->xl_array; - - while (trav) { - STACK_WIND (frame, stripe_open_cbk, trav->xlator, - trav->xlator->fops->open, - &local->loc, local->flags, local->fd); - trav = trav->next; - } - } + local->stripe_size = stripe_get_matching_bs (loc->path, priv); + while (trav) { + STACK_WIND (frame, stripe_open_cbk, trav->xlator, + trav->xlator->fops->open, + &local->loc, local->flags, local->fd, + xdata); + trav = trav->next; + } return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +err: + STRIPE_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL); return 0; } -/** - * stripe_opendir_cbk - - */ + int32_t stripe_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, fd_t *fd) + int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata) { int32_t callcnt = 0; - stripe_local_t *local = frame->local; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; LOCK (&frame->lock); { callcnt = --local->call_count; if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", - ((call_frame_t *)cookie)->this->name, - strerror (op_errno)); + prev->this->name, strerror (op_errno)); local->op_ret = -1; - local->failed = 1; local->op_errno = op_errno; } - - if (op_ret >= 0) + + if (op_ret >= 0) local->op_ret = op_ret; } UNLOCK (&frame->lock); if (!callcnt) { - STACK_UNWIND (frame, local->op_ret, local->op_errno, - local->fd); + STRIPE_STACK_UNWIND (opendir, frame, local->op_ret, + local->op_errno, local->fd, NULL); } - +out: return 0; } -/** - * stripe_opendir - - */ int32_t -stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) +stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata) { xlator_list_t *trav = NULL; stripe_local_t *local = NULL; stripe_private_t *priv = NULL; - int32_t op_errno = 1; + int32_t op_errno = EINVAL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -2173,118 +2587,61 @@ stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd) } /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } frame->local = local; - local->inode = loc->inode; - local->fd = fd; local->call_count = priv->child_count; + local->fd = fd_ref (fd); while (trav) { STACK_WIND (frame, stripe_opendir_cbk, trav->xlator, - trav->xlator->fops->opendir, loc, fd); + trav->xlator->fops->opendir, loc, fd, NULL); trav = trav->next; } - - return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); - return 0; -} - - -/** - * stripe_getxattr_cbk - - */ -int32_t -stripe_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *value) -{ - STACK_UNWIND (frame, op_ret, op_errno, value); - return 0; -} - - -/** - * stripe_getxattr - - */ -int32_t -stripe_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name) -{ - int32_t op_errno = 1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); - - STACK_WIND (frame, stripe_getxattr_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, loc, name); return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +err: + STRIPE_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL); return 0; } -/** - * stripe_removexattr - - */ -int32_t -stripe_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name) -{ - int32_t op_errno = 1; - - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (loc, err); - VALIDATE_OR_GOTO (loc->path, err); - VALIDATE_OR_GOTO (loc->inode, err); - - STACK_WIND (frame, stripe_common_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->removexattr, loc, name); - - return 0; - err: - STACK_UNWIND (frame, -1, op_errno); - return 0; -} - - -/** - * stripe_lk_cbk - - */ int32_t stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct flock *lock) + int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; local = frame->local; LOCK (&frame->lock); { callcnt = --local->call_count; if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, + gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s", - ((call_frame_t *)cookie)->this->name, - strerror (op_errno)); + prev->this->name, strerror (op_errno)); local->op_errno = op_errno; - if (op_errno == ENOTCONN) + if ((op_errno != ENOENT) || + (prev->this == FIRST_CHILD (this))) local->failed = 1; } - if (op_ret == 0 && local->op_ret == -1) { - /* First successful call, copy the *lock */ - local->op_ret = 0; - local->lock = *lock; + if (op_ret >= 0) { + if (FIRST_CHILD(this) == prev->this) { + /* First successful call, copy the *lock */ + local->op_ret = op_ret; + local->lock = *lock; + } } } UNLOCK (&frame->lock); @@ -2292,24 +2649,21 @@ stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!callcnt) { if (local->failed) local->op_ret = -1; - STACK_UNWIND (frame, local->op_ret, - local->op_errno, &local->lock); + STRIPE_STACK_UNWIND (lk, frame, local->op_ret, + local->op_errno, &local->lock, NULL); } +out: return 0; } - -/** - * stripe_lk - - */ int32_t stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, - struct flock *lock) + struct gf_flock *lock, dict_t *xdata) { stripe_local_t *local = NULL; xlator_list_t *trav = NULL; stripe_private_t *priv = NULL; - int32_t op_errno = 1; + int32_t op_errno = EINVAL; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -2320,77 +2674,75 @@ stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, priv = this->private; /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } local->op_ret = -1; frame->local = local; - local->call_count = priv->child_count; - + while (trav) { STACK_WIND (frame, stripe_lk_cbk, trav->xlator, - trav->xlator->fops->lk, fd, cmd, lock); + trav->xlator->fops->lk, fd, cmd, lock, NULL); trav = trav->next; } return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +err: + STRIPE_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL); return 0; } -/** - * stripe_writedir - - */ + int32_t -stripe_setdents (call_frame_t *frame, xlator_t *this, fd_t *fd, - int32_t flags, dir_entry_t *entries, int32_t count) +stripe_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = 1; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - priv = this->private; - trav = this->children; + prev = cookie; + local = frame->local; - /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; + LOCK (&frame->lock); + { + callcnt = --local->call_count; - while (trav) { - STACK_WIND (frame, stripe_stack_unwind_cbk, trav->xlator, - trav->xlator->fops->setdents, fd, flags, entries, - count); - trav = trav->next; + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "%s returned %s", + prev->this->name, strerror (op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || + (prev->this == FIRST_CHILD (this))) + local->failed = 1; + } + if (op_ret >= 0) + local->op_ret = op_ret; } + UNLOCK (&frame->lock); - return 0; - err: - STACK_UNWIND (frame, -1, op_errno); + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + STRIPE_STACK_UNWIND (flush, frame, local->op_ret, + local->op_errno, NULL); + } +out: return 0; } - -/** - * stripe_flush - - */ int32_t -stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) +stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) { stripe_local_t *local = NULL; stripe_private_t *priv = NULL; @@ -2410,7 +2762,7 @@ stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) goto err; } /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -2418,73 +2770,98 @@ stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd) local->op_ret = -1; frame->local = local; local->call_count = priv->child_count; - + while (trav) { - STACK_WIND (frame, stripe_stack_unwind_cbk, trav->xlator, - trav->xlator->fops->flush, fd); + STACK_WIND (frame, stripe_flush_cbk, trav->xlator, + trav->xlator->fops->flush, fd, NULL); trav = trav->next; } return 0; - err: - STACK_UNWIND (frame, -1, op_errno); +err: + STRIPE_STACK_UNWIND (flush, frame, -1, op_errno, NULL); return 0; } -/** - * stripe_fsync - - */ + int32_t -stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) +stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = 1; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - priv = this->private; - trav = this->children; + prev = cookie; + local = frame->local; - /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); - if (!local) { - op_errno = ENOMEM; - goto err; - } - local->op_ret = -1; - frame->local = local; - local->call_count = priv->child_count; - - while (trav) { - STACK_WIND (frame, stripe_stack_unwind_cbk, trav->xlator, - trav->xlator->fops->fsync, fd, flags); - trav = trav->next; + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "%s returned %s", + prev->this->name, strerror (op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || + (prev->this == FIRST_CHILD (this))) + local->failed = 1; + } + if (op_ret >= 0) { + local->op_ret = op_ret; + if (FIRST_CHILD(this) == prev->this) { + local->pre_buf = *prebuf; + local->post_buf = *postbuf; + } + local->prebuf_blocks += prebuf->ia_blocks; + local->postbuf_blocks += postbuf->ia_blocks; + + correct_file_size(prebuf, local->fctx, prev); + correct_file_size(postbuf, local->fctx, prev); + + if (local->prebuf_size < prebuf->ia_size) + local->prebuf_size = prebuf->ia_size; + + if (local->postbuf_size < postbuf->ia_size) + local->postbuf_size = postbuf->ia_size; + } } + UNLOCK (&frame->lock); - return 0; - err: - STACK_UNWIND (frame, -1, op_errno); + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + if (local->op_ret != -1) { + local->pre_buf.ia_blocks = local->prebuf_blocks; + local->pre_buf.ia_size = local->prebuf_size; + local->post_buf.ia_blocks = local->postbuf_blocks; + local->post_buf.ia_size = local->postbuf_size; + } + + STRIPE_STACK_UNWIND (fsync, frame, local->op_ret, + local->op_errno, &local->pre_buf, + &local->post_buf, NULL); + } +out: return 0; } - -/** - * stripe_fstat - - */ int32_t -stripe_fstat (call_frame_t *frame, - xlator_t *this, - fd_t *fd) +stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata) { stripe_local_t *local = NULL; stripe_private_t *priv = NULL; xlator_list_t *trav = NULL; + stripe_fd_ctx_t *fctx = NULL; int32_t op_errno = 1; VALIDATE_OR_GOTO (frame, err); @@ -2496,82 +2873,107 @@ stripe_fstat (call_frame_t *frame, trav = this->children; /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } + + inode_ctx_get(fd->inode, this, (uint64_t *) &fctx); + if (!fctx) { + op_errno = EINVAL; + goto err; + } + local->fctx = fctx; + local->op_ret = -1; frame->local = local; - local->inode = fd->inode; local->call_count = priv->child_count; - + while (trav) { - STACK_WIND (frame, stripe_stack_unwind_buf_cbk, trav->xlator, - trav->xlator->fops->fstat, fd); + STACK_WIND (frame, stripe_fsync_cbk, trav->xlator, + trav->xlator->fops->fsync, fd, flags, NULL); trav = trav->next; } return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +err: + STRIPE_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL); return 0; } - -/** - * stripe_fchmod - - */ -int32_t -stripe_fchmod (call_frame_t *frame, xlator_t *this, fd_t *fd, mode_t mode) +int32_t +stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) { - stripe_local_t *local = NULL; - stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = 1; + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; - VALIDATE_OR_GOTO (frame, err); - VALIDATE_OR_GOTO (this, err); - VALIDATE_OR_GOTO (fd, err); - VALIDATE_OR_GOTO (fd->inode, err); + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } - priv = this->private; - trav = this->children; + prev = cookie; + local = frame->local; - /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); - if (!local) { - op_errno = ENOMEM; - goto err; + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "%s returned error %s", + prev->this->name, strerror (op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || + (prev->this == FIRST_CHILD (this))) + local->failed = 1; + } + + if (op_ret == 0) { + local->op_ret = 0; + + if (FIRST_CHILD(this) == prev->this) + local->stbuf = *buf; + + local->stbuf_blocks += buf->ia_blocks; + + correct_file_size(buf, local->fctx, prev); + + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + } } - local->op_ret = -1; - frame->local = local; - local->inode = fd->inode; - local->call_count = priv->child_count; - - while (trav) { - STACK_WIND (frame, stripe_stack_unwind_buf_cbk, trav->xlator, - trav->xlator->fops->fchmod, fd, mode); - trav = trav->next; + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + if (local->op_ret != -1) { + local->stbuf.ia_size = local->stbuf_size; + local->stbuf.ia_blocks = local->stbuf_blocks; + } + + STRIPE_STACK_UNWIND (fstat, frame, local->op_ret, + local->op_errno, &local->stbuf, NULL); } - return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +out: return 0; } - -/** - * stripe_fchown - - */ -int32_t -stripe_fchown (call_frame_t *frame, xlator_t *this, fd_t *fd, uid_t uid, - gid_t gid) +int32_t +stripe_fstat (call_frame_t *frame, + xlator_t *this, + fd_t *fd, dict_t *xdata) { stripe_local_t *local = NULL; stripe_private_t *priv = NULL; xlator_list_t *trav = NULL; + stripe_fd_ctx_t *fctx = NULL; int32_t op_errno = 1; VALIDATE_OR_GOTO (frame, err); @@ -2583,39 +2985,44 @@ stripe_fchown (call_frame_t *frame, xlator_t *this, fd_t *fd, uid_t uid, trav = this->children; /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } local->op_ret = -1; frame->local = local; - local->inode = fd->inode; local->call_count = priv->child_count; - + + if (IA_ISREG(fd->inode->ia_type)) { + inode_ctx_get(fd->inode, this, (uint64_t *) &fctx); + if (!fctx) + goto err; + local->fctx = fctx; + } + while (trav) { - STACK_WIND (frame, stripe_stack_unwind_buf_cbk, trav->xlator, - trav->xlator->fops->fchown, fd, uid, gid); + STACK_WIND (frame, stripe_fstat_cbk, trav->xlator, + trav->xlator->fops->fstat, fd, NULL); trav = trav->next; } return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +err: + STRIPE_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL); return 0; } -/** - * stripe_ftruncate - - */ int32_t -stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) +stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata) { stripe_local_t *local = NULL; stripe_private_t *priv = NULL; - xlator_list_t *trav = NULL; - int32_t op_errno = 1; + stripe_fd_ctx_t *fctx = NULL; + int i, eof_idx; + off_t dest_offset, tmp_offset; + int32_t op_errno = 1; VALIDATE_OR_GOTO (frame, err); VALIDATE_OR_GOTO (this, err); @@ -2623,37 +3030,115 @@ stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset) VALIDATE_OR_GOTO (fd->inode, err); priv = this->private; - trav = this->children; /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } local->op_ret = -1; frame->local = local; - local->inode = fd->inode; local->call_count = priv->child_count; - - while (trav) { - STACK_WIND (frame, stripe_stack_unwind_buf_cbk, trav->xlator, - trav->xlator->fops->ftruncate, fd, offset); - trav = trav->next; - } + + inode_ctx_get(fd->inode, this, (uint64_t *) &fctx); + if (!fctx) { + gf_log(this->name, GF_LOG_ERROR, "no stripe context"); + op_errno = EINVAL; + goto err; + } + if (!fctx->stripe_count) { + gf_log(this->name, GF_LOG_ERROR, "no stripe count"); + op_errno = EINVAL; + goto err; + } + + local->fctx = fctx; + eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count; + + for (i = 0; i < fctx->stripe_count; i++) { + if (!fctx->xl_array[i]) { + gf_log(this->name, GF_LOG_ERROR, "no xlator at index " + "%d", i); + op_errno = EINVAL; + goto err; + } + + if (fctx->stripe_coalesce) { + if (i < eof_idx) + tmp_offset = roof(offset, fctx->stripe_size * + fctx->stripe_count); + else if (i > eof_idx) + tmp_offset = floor(offset, fctx->stripe_size * + fctx->stripe_count); + else + tmp_offset = offset; + + dest_offset = coalesced_offset(tmp_offset, + fctx->stripe_size, fctx->stripe_count); + } else { + dest_offset = offset; + } + + STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i], + fctx->xl_array[i]->fops->ftruncate, fd, dest_offset, + NULL); + } return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +err: + STRIPE_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL); return 0; } -/** - * stripe_fsyncdir - - */ int32_t -stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) +stripe_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + int32_t callcnt = 0; + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + + if (op_ret == -1) { + gf_log (this->name, GF_LOG_DEBUG, + "%s returned %s", + prev->this->name, strerror (op_errno)); + local->op_errno = op_errno; + if ((op_errno != ENOENT) || + (prev->this == FIRST_CHILD (this))) + local->failed = 1; + } + if (op_ret >= 0) + local->op_ret = op_ret; + } + UNLOCK (&frame->lock); + + if (!callcnt) { + if (local->failed) + local->op_ret = -1; + + STRIPE_STACK_UNWIND (fsyncdir, frame, local->op_ret, + local->op_errno, NULL); + } +out: + return 0; +} + +int32_t +stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata) { stripe_local_t *local = NULL; stripe_private_t *priv = NULL; @@ -2669,7 +3154,7 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) trav = this->children; /* Initialization */ - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; @@ -2679,29 +3164,114 @@ stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags) local->call_count = priv->child_count; while (trav) { - STACK_WIND (frame, stripe_stack_unwind_cbk, trav->xlator, - trav->xlator->fops->fsyncdir, fd, flags); + STACK_WIND (frame, stripe_fsyncdir_cbk, trav->xlator, + trav->xlator->fops->fsyncdir, fd, flags, NULL); trav = trav->next; } return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +err: + STRIPE_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL); return 0; } -/** - * stripe_single_readv_cbk - This function is used as return fn, when the - * file name doesn't match the pattern specified for striping. - */ int32_t -stripe_single_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct iovec *vector, int32_t count, - struct stat *stbuf, struct iobref *iobref) +stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata) { - STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref); + int32_t i = 0; + int32_t callcnt = 0; + int32_t count = 0; + stripe_local_t *local = NULL; + struct iovec *vec = NULL; + struct iatt tmp_stbuf = {0,}; + struct iobref *tmp_iobref = NULL; + struct iobuf *iobuf = NULL; + call_frame_t *prev = NULL; + + if (!this || !frame || !frame->local) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + local = frame->local; + prev = cookie; + + LOCK (&frame->lock); + { + callcnt = --local->call_count; + if (op_ret != -1) { + correct_file_size(buf, local->fctx, prev); + if (local->stbuf_size < buf->ia_size) + local->stbuf_size = buf->ia_size; + } + } + UNLOCK (&frame->lock); + + if (!callcnt) { + op_ret = 0; + + /* Keep extra space for filling in '\0's */ + vec = GF_CALLOC ((local->count * 2), sizeof (struct iovec), + gf_stripe_mt_iovec); + if (!vec) { + op_ret = -1; + goto done; + } + + for (i = 0; i < local->wind_count; i++) { + if (local->replies[i].op_ret) { + memcpy ((vec + count), local->replies[i].vector, + (local->replies[i].count * sizeof (struct iovec))); + count += local->replies[i].count; + op_ret += local->replies[i].op_ret; + } + if ((local->replies[i].op_ret < + local->replies[i].requested_size) && + (local->stbuf_size > (local->offset + op_ret))) { + /* Fill in 0s here */ + vec[count].iov_len = + (local->replies[i].requested_size - + local->replies[i].op_ret); + iobuf = iobuf_get2 (this->ctx->iobuf_pool, + vec[count].iov_len); + if (!iobuf) { + gf_log (this->name, GF_LOG_ERROR, + "Out of memory."); + op_ret = -1; + op_errno = ENOMEM; + goto done; + } + memset (iobuf->ptr, 0, vec[count].iov_len); + vec[count].iov_base = iobuf->ptr; + + iobref_add (local->iobref, iobuf); + iobuf_unref(iobuf); + + op_ret += vec[count].iov_len; + count++; + } + GF_FREE (local->replies[i].vector); + } + + /* FIXME: notice that st_ino, and st_dev (gen) will be + * different than what inode will have. Make sure this doesn't + * cause any bugs at higher levels */ + memcpy (&tmp_stbuf, &local->replies[0].stbuf, + sizeof (struct iatt)); + tmp_stbuf.ia_size = local->stbuf_size; + + done: + GF_FREE (local->replies); + tmp_iobref = local->iobref; + STRIPE_STACK_UNWIND (readv, frame, op_ret, op_errno, vec, + count, &tmp_stbuf, tmp_iobref, NULL); + + iobref_unref (tmp_iobref); + GF_FREE (vec); + } +out: return 0; } @@ -2710,116 +3280,155 @@ stripe_single_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, * to above layer after putting it in a single vector. */ int32_t -stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, struct iovec *vector, - int32_t count, struct stat *stbuf, struct iobref *iobref) + int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata) { int32_t index = 0; int32_t callcnt = 0; - call_frame_t *main_frame = NULL; - stripe_local_t *main_local = NULL; - stripe_local_t *local = frame->local; + int32_t final_count = 0; + int32_t need_to_check_proper_size = 0; + call_frame_t *mframe = NULL; + stripe_local_t *mlocal = NULL; + stripe_local_t *local = NULL; + struct iovec *final_vec = NULL; + struct iatt tmp_stbuf = {0,}; + struct iatt *tmp_stbuf_p = NULL; //need it for a warning + struct iobref *tmp_iobref = NULL; + stripe_fd_ctx_t *fctx = NULL; + call_frame_t *prev = NULL; - index = local->node_index; - main_frame = local->orig_frame; - main_local = main_frame->local; + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto end; + } + + local = frame->local; + index = local->node_index; + prev = cookie; + mframe = local->orig_frame; + if (!mframe) + goto out; - LOCK (&main_frame->lock); + mlocal = mframe->local; + if (!mlocal) + goto out; + + fctx = mlocal->fctx; + + LOCK (&mframe->lock); { - main_local->replies[index].op_ret = op_ret; - main_local->replies[index].op_errno = op_errno; + mlocal->replies[index].op_ret = op_ret; + mlocal->replies[index].op_errno = op_errno; + mlocal->replies[index].requested_size = local->readv_size; if (op_ret >= 0) { - main_local->replies[index].stbuf = *stbuf; - main_local->replies[index].count = count; - main_local->replies[index].vector = - iov_dup (vector, count); + mlocal->replies[index].stbuf = *stbuf; + mlocal->replies[index].count = count; + mlocal->replies[index].vector = iov_dup (vector, count); + + correct_file_size(stbuf, fctx, prev); + + if (local->stbuf_size < stbuf->ia_size) + local->stbuf_size = stbuf->ia_size; + local->stbuf_blocks += stbuf->ia_blocks; - if (!main_local->iobref) - main_local->iobref = iobref_new (); - iobref_merge (main_local->iobref, iobref); + if (!mlocal->iobref) + mlocal->iobref = iobref_new (); + iobref_merge (mlocal->iobref, iobref); } - callcnt = ++main_local->call_count; + callcnt = ++mlocal->call_count; } - UNLOCK(&main_frame->lock); - - if (callcnt == main_local->wind_count) { - int32_t final_count = 0; - struct iovec *final_vec = NULL; - struct stat tmp_stbuf = {0,}; - struct iobref *iobref = NULL; + UNLOCK(&mframe->lock); + if (callcnt == mlocal->wind_count) { op_ret = 0; - memcpy (&tmp_stbuf, &main_local->replies[0].stbuf, - sizeof (struct stat)); - for (index=0; index < main_local->wind_count; index++) { - /* TODO: check whether each stripe returned 'expected' - * number of bytes - */ - if (main_local->replies[index].op_ret == -1) { + + for (index=0; index < mlocal->wind_count; index++) { + /* check whether each stripe returned + * 'expected' number of bytes */ + if (mlocal->replies[index].op_ret == -1) { op_ret = -1; - op_errno = main_local->replies[index].op_errno; + op_errno = mlocal->replies[index].op_errno; break; } - op_ret += main_local->replies[index].op_ret; - final_count += main_local->replies[index].count; - /* TODO: Do I need to send anything more in stbuf? */ - if (tmp_stbuf.st_size < - main_local->replies[index].stbuf.st_size) { - tmp_stbuf.st_size = - main_local->replies[index].stbuf.st_size; + /* TODO: handle the 'holes' within the read range + properly */ + if (mlocal->replies[index].op_ret < + mlocal->replies[index].requested_size) { + need_to_check_proper_size = 1; } + + op_ret += mlocal->replies[index].op_ret; + mlocal->count += mlocal->replies[index].count; } - if (op_ret != -1) { - final_vec = CALLOC (final_count, - sizeof (struct iovec)); - if (!final_vec) { - op_ret = -1; - final_count = 0; - goto done; - } + if (op_ret == -1) + goto done; + if (need_to_check_proper_size) + goto check_size; - final_count = 0; + final_vec = GF_CALLOC (mlocal->count, sizeof (struct iovec), + gf_stripe_mt_iovec); - for (index=0; - index < main_local->wind_count; index++) { - memcpy (final_vec + final_count, - main_local->replies[index].vector, - (main_local->replies[index].count * - sizeof (struct iovec))); - final_count += - main_local->replies[index].count; + if (!final_vec) { + op_ret = -1; + goto done; + } - free (main_local->replies[index].vector); - } - } else { - final_vec = NULL; - final_count = 0; + for (index = 0; index < mlocal->wind_count; index++) { + memcpy ((final_vec + final_count), + mlocal->replies[index].vector, + (mlocal->replies[index].count * + sizeof (struct iovec))); + final_count += mlocal->replies[index].count; + GF_FREE (mlocal->replies[index].vector); } + /* FIXME: notice that st_ino, and st_dev (gen) will be + * different than what inode will have. Make sure this doesn't + * cause any bugs at higher levels */ + memcpy (&tmp_stbuf, &mlocal->replies[0].stbuf, + sizeof (struct iatt)); + tmp_stbuf.ia_size = local->stbuf_size; + tmp_stbuf.ia_blocks = local->stbuf_blocks; + done: /* */ - FREE (main_local->replies); - iobref = main_local->iobref; - STACK_UNWIND (main_frame, op_ret, op_errno, - final_vec, final_count, &tmp_stbuf, iobref); + GF_FREE (mlocal->replies); + tmp_iobref = mlocal->iobref; + /* work around for nfs truncated read. Bug 3774 */ + tmp_stbuf_p = &tmp_stbuf; + WIPE (tmp_stbuf_p); + STRIPE_STACK_UNWIND (readv, mframe, op_ret, op_errno, final_vec, + final_count, &tmp_stbuf, tmp_iobref, NULL); + + iobref_unref (tmp_iobref); + GF_FREE (final_vec); + } + + goto out; + +check_size: + mlocal->call_count = fctx->stripe_count; - iobref_unref (iobref); - if (final_vec) - FREE (final_vec); + for (index = 0; index < fctx->stripe_count; index++) { + STACK_WIND (mframe, stripe_readv_fstat_cbk, + (fctx->xl_array[index]), + (fctx->xl_array[index])->fops->fstat, + mlocal->fd, NULL); } - STACK_DESTROY (frame->root); +out: + STRIPE_STACK_DESTROY (frame); +end: return 0; } -/** - * stripe_readv - - */ + int32_t stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, - size_t size, off_t offset) + size_t size, off_t offset, uint32_t flags, dict_t *xdata) { - int32_t op_errno = 1; + int32_t op_errno = EINVAL; int32_t idx = 0; int32_t index = 0; int32_t num_stripe = 0; @@ -2830,17 +3439,18 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, uint64_t stripe_size = 0; off_t rounded_start = 0; off_t frame_offset = offset; + off_t dest_offset = 0; stripe_local_t *local = NULL; call_frame_t *rframe = NULL; stripe_local_t *rlocal = NULL; - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; stripe_fd_ctx_t *fctx = NULL; - trav = this->children; - priv = this->private; + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); - fd_ctx_get (fd, this, &tmp_fctx); + inode_ctx_get (fd->inode, this, &tmp_fctx); if (!tmp_fctx) { op_errno = EBADFD; goto err; @@ -2848,122 +3458,183 @@ stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; stripe_size = fctx->stripe_size; - /* The file is stripe across the child nodes. Send the read request - * to the child nodes appropriately after checking which region of + STRIPE_VALIDATE_FCTX (fctx, err); + + if (!stripe_size) { + gf_log (this->name, GF_LOG_DEBUG, + "Wrong stripe size for the file"); + goto err; + } + /* The file is stripe across the child nodes. Send the read request + * to the child nodes appropriately after checking which region of * the file is in which child node. Always '0-<stripe_size>' part of * the file resides in the first child. */ rounded_start = floor (offset, stripe_size); rounded_end = roof (offset+size, stripe_size); - num_stripe = (rounded_end - rounded_start) / stripe_size; - - local = CALLOC (1, sizeof (stripe_local_t)); + num_stripe = (rounded_end- rounded_start)/stripe_size; + + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } - local->wind_count = num_stripe; frame->local = local; - + /* This is where all the vectors should be copied. */ - local->replies = CALLOC (num_stripe, sizeof (struct readv_replies)); + local->replies = GF_CALLOC (num_stripe, sizeof (struct stripe_replies), + gf_stripe_mt_stripe_replies); if (!local->replies) { op_errno = ENOMEM; goto err; } - + off_index = (offset / stripe_size) % fctx->stripe_count; - + local->wind_count = num_stripe; + local->readv_size = size; + local->offset = offset; + local->fd = fd_ref (fd); + local->fctx = fctx; + for (index = off_index; index < (num_stripe + off_index); index++) { rframe = copy_frame (frame); - rlocal = CALLOC (1, sizeof (stripe_local_t)); + rlocal = mem_get0 (this->local_pool); if (!rlocal) { op_errno = ENOMEM; goto err; } - + frame_size = min (roof (frame_offset+1, stripe_size), (offset + size)) - frame_offset; - + rlocal->node_index = index - off_index; rlocal->orig_frame = frame; + rlocal->readv_size = frame_size; rframe->local = rlocal; idx = (index % fctx->stripe_count); + + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(frame_offset, + stripe_size, fctx->stripe_count); + else + dest_offset = frame_offset; + STACK_WIND (rframe, stripe_readv_cbk, fctx->xl_array[idx], fctx->xl_array[idx]->fops->readv, - fd, frame_size, frame_offset); - + fd, frame_size, dest_offset, flags, xdata); + frame_offset += frame_size; } return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +err: + if (rframe) + STRIPE_STACK_DESTROY (rframe); + + STRIPE_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL); return 0; } -/** - * stripe_writev_cbk - - */ int32_t stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct stat *stbuf) + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; + stripe_local_t *mlocal = NULL; + call_frame_t *prev = NULL; + call_frame_t *mframe = NULL; + struct stripe_replies *reply = NULL; + int32_t i = 0; + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; local = frame->local; + mframe = local->orig_frame; + mlocal = mframe->local; LOCK(&frame->lock); { - callcnt = ++local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - ((call_frame_t *)cookie)->this->name, - strerror (op_errno)); - local->op_errno = op_errno; - local->op_ret = -1; - } + callcnt = ++mlocal->call_count; + + mlocal->replies[local->node_index].op_ret = op_ret; + mlocal->replies[local->node_index].op_errno = op_errno; + if (op_ret >= 0) { - local->op_ret += op_ret; - local->stbuf = *stbuf; + mlocal->post_buf = *postbuf; + mlocal->pre_buf = *prebuf; + + mlocal->prebuf_blocks += prebuf->ia_blocks; + mlocal->postbuf_blocks += postbuf->ia_blocks; + + correct_file_size(prebuf, mlocal->fctx, prev); + correct_file_size(postbuf, mlocal->fctx, prev); + + if (mlocal->prebuf_size < prebuf->ia_size) + mlocal->prebuf_size = prebuf->ia_size; + if (mlocal->postbuf_size < postbuf->ia_size) + mlocal->postbuf_size = postbuf->ia_size; } } UNLOCK (&frame->lock); - if ((callcnt == local->wind_count) && local->unwind) { - STACK_UNWIND (frame, local->op_ret, - local->op_errno, &local->stbuf); + if ((callcnt == mlocal->wind_count) && mlocal->unwind) { + mlocal->pre_buf.ia_size = mlocal->prebuf_size; + mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; + mlocal->post_buf.ia_size = mlocal->postbuf_size; + mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; + + /* + * Only return the number of consecutively written bytes up until + * the first error. Only return an error if it occurs first. + * + * When a short write occurs, the application should retry at the + * appropriate offset, at which point we'll potentially pass back + * the error. + */ + for (i = 0, reply = mlocal->replies; i < mlocal->wind_count; + i++, reply++) { + if (reply->op_ret == -1) { + gf_log(this->name, GF_LOG_DEBUG, "reply %d " + "returned error %s", i, + strerror(reply->op_errno)); + if (!mlocal->op_ret) { + mlocal->op_ret = -1; + mlocal->op_errno = reply->op_errno; + } + break; + } + + mlocal->op_ret += reply->op_ret; + + if (reply->op_ret < reply->requested_size) + break; + } + + GF_FREE(mlocal->replies); + + STRIPE_STACK_UNWIND (writev, mframe, mlocal->op_ret, + mlocal->op_errno, &mlocal->pre_buf, + &mlocal->post_buf, NULL); } +out: + STRIPE_STACK_DESTROY(frame); return 0; } - -/** - * stripe_single_writev_cbk - - */ -int32_t -stripe_single_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct stat *stbuf) -{ - STACK_UNWIND (frame, op_ret, op_errno, stbuf); - return 0; -} -/** - * stripe_writev - - */ int32_t stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, int32_t count, off_t offset, - struct iobref *iobref) + uint32_t flags, struct iobref *iobref, dict_t *xdata) { - struct iovec *tmp_vec = vector; - stripe_private_t *priv = NULL; + struct iovec *tmp_vec = NULL; stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; stripe_fd_ctx_t *fctx = NULL; int32_t op_errno = 1; int32_t idx = 0; @@ -2974,10 +3645,19 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t fill_size = 0; uint64_t stripe_size = 0; uint64_t tmp_fctx = 0; + off_t dest_offset = 0; + off_t rounded_start = 0; + off_t rounded_end = 0; + int32_t total_chunks = 0; + call_frame_t *wframe = NULL; + stripe_local_t *wlocal = NULL; - priv = this->private; + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); - fd_ctx_get (fd, this, &tmp_fctx); + inode_ctx_get (fd->inode, this, &tmp_fctx); if (!tmp_fctx) { op_errno = EINVAL; goto err; @@ -2985,29 +3665,57 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; stripe_size = fctx->stripe_size; + STRIPE_VALIDATE_FCTX (fctx, err); + /* File has to be stripped across the child nodes */ for (idx = 0; idx< count; idx ++) { - total_size += tmp_vec[idx].iov_len; + total_size += vector[idx].iov_len; } remaining_size = total_size; - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } frame->local = local; local->stripe_size = stripe_size; + local->fctx = fctx; + + if (!stripe_size) { + gf_log (this->name, GF_LOG_DEBUG, + "Wrong stripe size for the file"); + op_errno = EINVAL; + goto err; + } + rounded_start = floor(offset, stripe_size); + rounded_end = roof(offset + total_size, stripe_size); + total_chunks = (rounded_end - rounded_start) / stripe_size; + local->replies = GF_CALLOC(total_chunks, sizeof(struct stripe_replies), + gf_stripe_mt_stripe_replies); + if (!local->replies) { + op_errno = ENOMEM; + goto err; + } + + total_chunks = 0; while (1) { - /* Send striped chunk of the vector to child + wframe = copy_frame(frame); + wlocal = mem_get0(this->local_pool); + if (!wlocal) { + op_errno = ENOMEM; + goto err; + } + wlocal->orig_frame = frame; + wframe->local = wlocal; + + /* Send striped chunk of the vector to child nodes appropriately. */ - trav = this->children; - - idx = (((offset + offset_offset) / + idx = (((offset + offset_offset) / local->stripe_size) % fctx->stripe_count); - fill_size = (local->stripe_size - + fill_size = (local->stripe_size - ((offset + offset_offset) % local->stripe_size)); if (fill_size > remaining_size) fill_size = remaining_size; @@ -3016,158 +3724,619 @@ stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, tmp_count = iov_subset (vector, count, offset_offset, offset_offset + fill_size, NULL); - tmp_vec = CALLOC (tmp_count, sizeof (struct iovec)); + tmp_vec = GF_CALLOC (tmp_count, sizeof (struct iovec), + gf_stripe_mt_iovec); if (!tmp_vec) { op_errno = ENOMEM; goto err; } tmp_count = iov_subset (vector, count, offset_offset, offset_offset + fill_size, tmp_vec); - + local->wind_count++; if (remaining_size == 0) local->unwind = 1; - STACK_WIND(frame, stripe_writev_cbk, fctx->xl_array[idx], - fctx->xl_array[idx]->fops->writev, fd, tmp_vec, - tmp_count, offset + offset_offset, iobref); - FREE (tmp_vec); + /* + * Store off the request index (with respect to the chunk of the + * initial offset) and the size of the request. This is required + * in the callback to calculate an appropriate return value in + * the event of a write failure in one or more requests. + */ + wlocal->node_index = total_chunks; + local->replies[total_chunks].requested_size = fill_size; + + dest_offset = offset + offset_offset; + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(dest_offset, + local->stripe_size, fctx->stripe_count); + + STACK_WIND (wframe, stripe_writev_cbk, fctx->xl_array[idx], + fctx->xl_array[idx]->fops->writev, fd, tmp_vec, + tmp_count, dest_offset, flags, iobref, + xdata); + + GF_FREE (tmp_vec); offset_offset += fill_size; + total_chunks++; if (remaining_size == 0) break; } return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +err: + if (wframe) + STRIPE_STACK_DESTROY(wframe); + + STRIPE_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL); return 0; } +int32_t +stripe_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + int32_t callcnt = 0; + stripe_local_t *local = NULL; + stripe_local_t *mlocal = NULL; + call_frame_t *prev = NULL; + call_frame_t *mframe = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + mframe = local->orig_frame; + mlocal = mframe->local; + + LOCK(&frame->lock); + { + callcnt = ++mlocal->call_count; + + if (op_ret == 0) { + mlocal->post_buf = *postbuf; + mlocal->pre_buf = *prebuf; + + mlocal->prebuf_blocks += prebuf->ia_blocks; + mlocal->postbuf_blocks += postbuf->ia_blocks; + + correct_file_size(prebuf, mlocal->fctx, prev); + correct_file_size(postbuf, mlocal->fctx, prev); + + if (mlocal->prebuf_size < prebuf->ia_size) + mlocal->prebuf_size = prebuf->ia_size; + if (mlocal->postbuf_size < postbuf->ia_size) + mlocal->postbuf_size = postbuf->ia_size; + } + + /* return the first failure */ + if (mlocal->op_ret == 0) { + mlocal->op_ret = op_ret; + mlocal->op_errno = op_errno; + } + } + UNLOCK (&frame->lock); + + if ((callcnt == mlocal->wind_count) && mlocal->unwind) { + mlocal->pre_buf.ia_size = mlocal->prebuf_size; + mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; + mlocal->post_buf.ia_size = mlocal->postbuf_size; + mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; + + STRIPE_STACK_UNWIND (fallocate, mframe, mlocal->op_ret, + mlocal->op_errno, &mlocal->pre_buf, + &mlocal->post_buf, NULL); + } +out: + STRIPE_STACK_DESTROY(frame); + return 0; +} + +int32_t +stripe_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, + off_t offset, size_t len, dict_t *xdata) +{ + stripe_local_t *local = NULL; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = 1; + int32_t idx = 0; + int32_t offset_offset = 0; + int32_t remaining_size = 0; + off_t fill_size = 0; + uint64_t stripe_size = 0; + uint64_t tmp_fctx = 0; + off_t dest_offset = 0; + call_frame_t *fframe = NULL; + stripe_local_t *flocal = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + inode_ctx_get (fd->inode, this, &tmp_fctx); + if (!tmp_fctx) { + op_errno = EINVAL; + goto err; + } + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + stripe_size = fctx->stripe_size; + + STRIPE_VALIDATE_FCTX (fctx, err); + + remaining_size = len; + + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + frame->local = local; + local->stripe_size = stripe_size; + local->fctx = fctx; + + if (!stripe_size) { + gf_log (this->name, GF_LOG_DEBUG, + "Wrong stripe size for the file"); + op_errno = EINVAL; + goto err; + } + + while (1) { + fframe = copy_frame(frame); + flocal = mem_get0(this->local_pool); + if (!flocal) { + op_errno = ENOMEM; + goto err; + } + flocal->orig_frame = frame; + fframe->local = flocal; + + /* send fallocate request to the associated child node */ + idx = (((offset + offset_offset) / + local->stripe_size) % fctx->stripe_count); + + fill_size = (local->stripe_size - + ((offset + offset_offset) % local->stripe_size)); + if (fill_size > remaining_size) + fill_size = remaining_size; + + remaining_size -= fill_size; + + local->wind_count++; + if (remaining_size == 0) + local->unwind = 1; + + dest_offset = offset + offset_offset; + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(dest_offset, + local->stripe_size, fctx->stripe_count); + + /* + * TODO: Create a separate handler for coalesce mode that sends a + * single fallocate per-child (since the ranges are linear). + */ + STACK_WIND(fframe, stripe_fallocate_cbk, fctx->xl_array[idx], + fctx->xl_array[idx]->fops->fallocate, fd, mode, + dest_offset, fill_size, xdata); + + offset_offset += fill_size; + if (remaining_size == 0) + break; + } + + return 0; +err: + if (fframe) + STRIPE_STACK_DESTROY(fframe); + + STRIPE_STACK_UNWIND (fallocate, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} -/* Management operations */ -/** - * stripe_stats_cbk - Add all the fields received from different clients. - * Once all the clients return, send stats to above layer. - * - */ int32_t -stripe_stats_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct xlator_stats *stats) +stripe_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { int32_t callcnt = 0; stripe_local_t *local = NULL; + stripe_local_t *mlocal = NULL; + call_frame_t *prev = NULL; + call_frame_t *mframe = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + prev = cookie; local = frame->local; + mframe = local->orig_frame; + mlocal = mframe->local; LOCK(&frame->lock); { - callcnt = --local->call_count; - - if (op_ret == -1) { - gf_log (this->name, GF_LOG_DEBUG, - "%s returned error %s", - ((call_frame_t *)cookie)->this->name, - strerror (op_errno)); - local->op_ret = -1; - local->op_errno = op_errno; - } + callcnt = ++mlocal->call_count; + if (op_ret == 0) { - if (local->op_ret == -2) { - /* This is to make sure this is the - first time */ - local->stats = *stats; - local->op_ret = 0; - } else { - local->stats.nr_files += stats->nr_files; - local->stats.free_disk += stats->free_disk; - local->stats.disk_usage += stats->disk_usage; - local->stats.nr_clients += stats->nr_clients; - } + mlocal->post_buf = *postbuf; + mlocal->pre_buf = *prebuf; + + mlocal->prebuf_blocks += prebuf->ia_blocks; + mlocal->postbuf_blocks += postbuf->ia_blocks; + + correct_file_size(prebuf, mlocal->fctx, prev); + correct_file_size(postbuf, mlocal->fctx, prev); + + if (mlocal->prebuf_size < prebuf->ia_size) + mlocal->prebuf_size = prebuf->ia_size; + if (mlocal->postbuf_size < postbuf->ia_size) + mlocal->postbuf_size = postbuf->ia_size; } + + /* return the first failure */ + if (mlocal->op_ret == 0) { + mlocal->op_ret = op_ret; + mlocal->op_errno = op_errno; + } } UNLOCK (&frame->lock); - if (!callcnt) { - STACK_UNWIND (frame, local->op_ret, local->op_errno, - &local->stats); - } + if ((callcnt == mlocal->wind_count) && mlocal->unwind) { + mlocal->pre_buf.ia_size = mlocal->prebuf_size; + mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; + mlocal->post_buf.ia_size = mlocal->postbuf_size; + mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; + STRIPE_STACK_UNWIND (discard, mframe, mlocal->op_ret, + mlocal->op_errno, &mlocal->pre_buf, + &mlocal->post_buf, NULL); + } +out: + STRIPE_STACK_DESTROY(frame); return 0; } -/** - * stripe_stats - - */ int32_t -stripe_stats (call_frame_t *frame, xlator_t *this, int32_t flags) +stripe_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) { stripe_local_t *local = NULL; - xlator_list_t *trav = NULL; - stripe_private_t *priv = NULL; + stripe_fd_ctx_t *fctx = NULL; int32_t op_errno = 1; + int32_t idx = 0; + int32_t offset_offset = 0; + int32_t remaining_size = 0; + off_t fill_size = 0; + uint64_t stripe_size = 0; + uint64_t tmp_fctx = 0; + off_t dest_offset = 0; + call_frame_t *fframe = NULL; + stripe_local_t *flocal = NULL; - priv = this->private; - trav = this->children; + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + inode_ctx_get (fd->inode, this, &tmp_fctx); + if (!tmp_fctx) { + op_errno = EINVAL; + goto err; + } + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + stripe_size = fctx->stripe_size; + + STRIPE_VALIDATE_FCTX (fctx, err); + + remaining_size = len; - local = CALLOC (1, sizeof (stripe_local_t)); + local = mem_get0 (this->local_pool); if (!local) { op_errno = ENOMEM; goto err; } frame->local = local; - local->op_ret = -2; /* to be used as a flag in _cbk */ - local->call_count = priv->child_count; + local->stripe_size = stripe_size; + local->fctx = fctx; - while (trav) { - STACK_WIND (frame, stripe_stats_cbk, trav->xlator, - trav->xlator->mops->stats, flags); - trav = trav->next; + if (!stripe_size) { + gf_log (this->name, GF_LOG_DEBUG, + "Wrong stripe size for the file"); + op_errno = EINVAL; + goto err; + } + + while (1) { + fframe = copy_frame(frame); + flocal = mem_get0(this->local_pool); + if (!flocal) { + op_errno = ENOMEM; + goto err; + } + flocal->orig_frame = frame; + fframe->local = flocal; + + /* send discard request to the associated child node */ + idx = (((offset + offset_offset) / + local->stripe_size) % fctx->stripe_count); + + fill_size = (local->stripe_size - + ((offset + offset_offset) % local->stripe_size)); + if (fill_size > remaining_size) + fill_size = remaining_size; + + remaining_size -= fill_size; + + local->wind_count++; + if (remaining_size == 0) + local->unwind = 1; + + dest_offset = offset + offset_offset; + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(dest_offset, + local->stripe_size, fctx->stripe_count); + + /* + * TODO: Create a separate handler for coalesce mode that sends a + * single discard per-child (since the ranges are linear). + */ + STACK_WIND(fframe, stripe_discard_cbk, fctx->xl_array[idx], + fctx->xl_array[idx]->fops->discard, fd, dest_offset, + fill_size, xdata); + + offset_offset += fill_size; + if (remaining_size == 0) + break; } + return 0; - err: - STACK_UNWIND (frame, -1, op_errno, NULL); +err: + if (fframe) + STRIPE_STACK_DESTROY(fframe); + + STRIPE_STACK_UNWIND (discard, frame, -1, op_errno, NULL, NULL, NULL); + return 0; +} + +int32_t +stripe_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + int32_t callcnt = 0; + stripe_local_t *local = NULL; + stripe_local_t *mlocal = NULL; + call_frame_t *prev = NULL; + call_frame_t *mframe = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + + prev = cookie; + local = frame->local; + mframe = local->orig_frame; + mlocal = mframe->local; + + LOCK(&frame->lock); + { + callcnt = ++mlocal->call_count; + + if (op_ret == 0) { + mlocal->post_buf = *postbuf; + mlocal->pre_buf = *prebuf; + + mlocal->prebuf_blocks += prebuf->ia_blocks; + mlocal->postbuf_blocks += postbuf->ia_blocks; + + correct_file_size(prebuf, mlocal->fctx, prev); + correct_file_size(postbuf, mlocal->fctx, prev); + + if (mlocal->prebuf_size < prebuf->ia_size) + mlocal->prebuf_size = prebuf->ia_size; + if (mlocal->postbuf_size < postbuf->ia_size) + mlocal->postbuf_size = postbuf->ia_size; + } + + /* return the first failure */ + if (mlocal->op_ret == 0) { + mlocal->op_ret = op_ret; + mlocal->op_errno = op_errno; + } + } + UNLOCK (&frame->lock); + + if ((callcnt == mlocal->wind_count) && mlocal->unwind) { + mlocal->pre_buf.ia_size = mlocal->prebuf_size; + mlocal->pre_buf.ia_blocks = mlocal->prebuf_blocks; + mlocal->post_buf.ia_size = mlocal->postbuf_size; + mlocal->post_buf.ia_blocks = mlocal->postbuf_blocks; + + STRIPE_STACK_UNWIND (zerofill, mframe, mlocal->op_ret, + mlocal->op_errno, &mlocal->pre_buf, + &mlocal->post_buf, NULL); + } +out: + STRIPE_STACK_DESTROY(frame); + return 0; +} + +int32_t +stripe_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + size_t len, dict_t *xdata) +{ + stripe_local_t *local = NULL; + stripe_fd_ctx_t *fctx = NULL; + int32_t op_errno = 1; + int32_t idx = 0; + int32_t offset_offset = 0; + int32_t remaining_size = 0; + off_t fill_size = 0; + uint64_t stripe_size = 0; + uint64_t tmp_fctx = 0; + off_t dest_offset = 0; + call_frame_t *fframe = NULL; + stripe_local_t *flocal = NULL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + VALIDATE_OR_GOTO (fd->inode, err); + + inode_ctx_get (fd->inode, this, &tmp_fctx); + if (!tmp_fctx) { + op_errno = EINVAL; + goto err; + } + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + stripe_size = fctx->stripe_size; + + STRIPE_VALIDATE_FCTX (fctx, err); + + remaining_size = len; + + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + stripe_size = fctx->stripe_size; + + STRIPE_VALIDATE_FCTX (fctx, err); + + remaining_size = len; + + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; + stripe_size = fctx->stripe_size; + + STRIPE_VALIDATE_FCTX (fctx, err); + + remaining_size = len; + + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + frame->local = local; + local->stripe_size = stripe_size; + local->fctx = fctx; + + if (!stripe_size) { + gf_log (this->name, GF_LOG_DEBUG, + "Wrong stripe size for the file"); + op_errno = EINVAL; + goto err; + } + + while (1) { + fframe = copy_frame(frame); + flocal = mem_get0(this->local_pool); + if (!flocal) { + op_errno = ENOMEM; + goto err; + } + flocal->orig_frame = frame; + fframe->local = flocal; + + idx = (((offset + offset_offset) / + local->stripe_size) % fctx->stripe_count); + + fill_size = (local->stripe_size - + ((offset + offset_offset) % local->stripe_size)); + if (fill_size > remaining_size) + fill_size = remaining_size; + + remaining_size -= fill_size; + + local->wind_count++; + if (remaining_size == 0) + local->unwind = 1; + + dest_offset = offset + offset_offset; + if (fctx->stripe_coalesce) + dest_offset = coalesced_offset(dest_offset, + local->stripe_size, + fctx->stripe_count); + + STACK_WIND(fframe, stripe_zerofill_cbk, fctx->xl_array[idx], + fctx->xl_array[idx]->fops->zerofill, fd, + dest_offset, fill_size, xdata); + offset_offset += fill_size; + if (remaining_size == 0) + break; + } + + return 0; +err: + if (fframe) + STRIPE_STACK_DESTROY(fframe); + + STRIPE_STACK_UNWIND (zerofill, frame, -1, op_errno, NULL, NULL, NULL); return 0; } int32_t stripe_release (xlator_t *this, fd_t *fd) { + return 0; +} + +int +stripe_forget (xlator_t *this, inode_t *inode) +{ uint64_t tmp_fctx = 0; stripe_fd_ctx_t *fctx = NULL; - fd_ctx_del (fd, this, &tmp_fctx); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (inode, err); + + (void) inode_ctx_del (inode, this, &tmp_fctx); if (!tmp_fctx) { - goto out; + goto err; } - + fctx = (stripe_fd_ctx_t *)(long)tmp_fctx; if (!fctx->static_array) - FREE (fctx->xl_array); - - FREE (fctx); - - out: - return 0; + GF_FREE (fctx->xl_array); + + GF_FREE (fctx); +err: + return 0; } -/** - * notify - */ int32_t notify (xlator_t *this, int32_t event, void *data, ...) { stripe_private_t *priv = NULL; int down_client = 0; int i = 0; + gf_boolean_t heard_from_all_children = _gf_false; + + if (!this) + return 0; priv = this->private; if (!priv) return 0; - switch (event) + switch (event) { case GF_EVENT_CHILD_UP: { @@ -3176,24 +4345,28 @@ notify (xlator_t *this, int32_t event, void *data, ...) if (data == priv->xl_array[i]) break; } - priv->state[i] = 1; - for (i = 0; i < priv->child_count; i++) { - if (!priv->state[i]) - down_client++; + + if (priv->child_count == i) { + gf_log (this->name, GF_LOG_ERROR, + "got GF_EVENT_CHILD_UP bad subvolume %s", + data? ((xlator_t *)data)->name: NULL); + break; } LOCK (&priv->lock); { - priv->nodes_down = down_client; - - if (data == FIRST_CHILD (this)) { + if (data == FIRST_CHILD (this)) priv->first_child_down = 0; - default_notify (this, event, data); - } + priv->last_event[i] = event; } UNLOCK (&priv->lock); } break; + case GF_EVENT_CHILD_CONNECTING: + { + // 'CONNECTING' doesn't ensure its CHILD_UP, so do nothing + goto out; + } case GF_EVENT_CHILD_DOWN: { /* get an index number to set */ @@ -3201,20 +4374,19 @@ notify (xlator_t *this, int32_t event, void *data, ...) if (data == priv->xl_array[i]) break; } - priv->state[i] = 0; - for (i = 0; i < priv->child_count; i++) { - if (!priv->state[i]) - down_client++; + + if (priv->child_count == i) { + gf_log (this->name, GF_LOG_ERROR, + "got GF_EVENT_CHILD_DOWN bad subvolume %s", + data? ((xlator_t *)data)->name: NULL); + break; } LOCK (&priv->lock); { - priv->nodes_down = down_client; - - if (data == FIRST_CHILD (this)) { + if (data == FIRST_CHILD (this)) priv->first_child_down = 1; - default_notify (this, event, data); - } + priv->last_event[i] = event; } UNLOCK (&priv->lock); } @@ -3224,82 +4396,751 @@ notify (xlator_t *this, int32_t event, void *data, ...) { /* */ default_notify (this, event, data); + goto out; } break; } + // Consider child as down if it's last_event is not CHILD_UP + for (i = 0, down_client = 0; i < priv->child_count; i++) + if (priv->last_event[i] != GF_EVENT_CHILD_UP) + down_client++; + + LOCK (&priv->lock); + { + priv->nodes_down = down_client; + } + UNLOCK (&priv->lock); + + heard_from_all_children = _gf_true; + for (i = 0; i < priv->child_count; i++) + if (!priv->last_event[i]) + heard_from_all_children = _gf_false; + + if (heard_from_all_children) + default_notify (this, event, data); +out: return 0; } int -set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data) +stripe_setxattr_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, dict_t *xdata) { - int ret = -1; - char *tmp_str = NULL; - char *tmp_str1 = NULL; - char *dup_str = NULL; - char *stripe_str = NULL; - char *pattern = NULL; - char *num = NULL; - struct stripe_options *temp_stripeopt = NULL; - struct stripe_options *stripe_opt = NULL; - - /* Get the pattern for striping. - "option block-size *avi:10MB" etc */ - stripe_str = strtok_r (data, ",", &tmp_str); - while (stripe_str) { - dup_str = strdup (stripe_str); - stripe_opt = CALLOC (1, sizeof (struct stripe_options)); - if (!stripe_opt) - goto out; + int ret = -1; + int call_cnt = 0; + stripe_local_t *local = NULL; + + if (!frame || !frame->local || !this) { + gf_log ("", GF_LOG_ERROR, "Possible NULL deref"); + return ret; + } + + local = frame->local; + + LOCK (&frame->lock); + { + call_cnt = --local->wind_count; + + /** + * We overwrite ->op_* values here for subsequent faliure + * conditions, hence we propogate the last errno down the + * stack. + */ + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; + goto unlock; + } + } + + unlock: + UNLOCK (&frame->lock); + + if (!call_cnt) { + STRIPE_STACK_UNWIND (setxattr, frame, local->op_ret, + local->op_errno, xdata); + } + + return 0; +} + +#ifdef HAVE_BD_XLATOR +int +stripe_is_bd (dict_t *this, char *key, data_t *value, void *data) +{ + gf_boolean_t *is_bd = data; + + if (data == NULL) + return 0; + + if (XATTR_IS_BD (key)) + *is_bd = _gf_true; + + return 0; +} + +inline gf_boolean_t +stripe_setxattr_is_bd (dict_t *dict) +{ + gf_boolean_t is_bd = _gf_false; + + if (dict == NULL) + goto out; + + dict_foreach (dict, stripe_is_bd, &is_bd); +out: + return is_bd; +} +#else +#define stripe_setxattr_is_bd(dict) _gf_false +#endif + +int +stripe_setxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, dict_t *dict, int flags, dict_t *xdata) +{ + int32_t op_errno = EINVAL; + xlator_list_t *trav = NULL; + stripe_private_t *priv = NULL; + stripe_local_t *local = NULL; + int i = 0; + gf_boolean_t is_bd = _gf_false; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->inode, err); + + GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict, + op_errno, err); + + priv = this->private; + trav = this->children; + + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + frame->local = local; + local->wind_count = priv->child_count; + local->op_ret = local->op_errno = 0; + + is_bd = stripe_setxattr_is_bd (dict); + + /** + * Set xattrs for directories on all subvolumes. Additionally + * this power is only given to a special client. Bd xlator + * also needs xattrs for regular files (ie LVs) + */ + if (((frame->root->pid == GF_CLIENT_PID_GSYNCD) && + IA_ISDIR (loc->inode->ia_type)) || is_bd) { + for (i = 0; i < priv->child_count; i++, trav = trav->next) { + STACK_WIND (frame, stripe_setxattr_cbk, + trav->xlator, trav->xlator->fops->setxattr, + loc, dict, flags, xdata); + } + } else { + local->wind_count = 1; + STACK_WIND (frame, stripe_setxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setxattr, + loc, dict, flags, xdata); + } + + return 0; +err: + STRIPE_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL); + return 0; +} + + +int +stripe_fsetxattr_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, dict_t *xdata) +{ + STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata); + return 0; +} + + +int +stripe_is_special_key (dict_t *this, + char *key, + data_t *value, + void *data) +{ + gf_boolean_t *is_special = NULL; + + if (data == NULL) { + goto out; + } + + is_special = data; + + if (XATTR_IS_LOCKINFO (key) || XATTR_IS_BD (key)) + *is_special = _gf_true; + +out: + return 0; +} + +int32_t +stripe_fsetxattr_everyone_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dict_t *xdata) +{ + int call_count = 0; + stripe_local_t *local = NULL; + + local = frame->local; + + LOCK (&frame->lock); + { + call_count = --local->wind_count; - pattern = strtok_r (dup_str, ":", &tmp_str1); - num = strtok_r (NULL, ":", &tmp_str1); - if (!num) { - num = pattern; - pattern = "*"; + if (op_ret < 0) { + local->op_ret = op_ret; + local->op_errno = op_errno; } - if (gf_string2bytesize (num, &stripe_opt->block_size) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "invalid number format \"%s\"", num); + } + UNLOCK (&frame->lock); + + if (call_count == 0) { + STRIPE_STACK_UNWIND (fsetxattr, frame, local->op_ret, + local->op_errno, NULL); + } + return 0; +} + +int +stripe_fsetxattr_to_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd, + dict_t *dict, int flags, dict_t *xdata) +{ + xlator_list_t *trav = NULL; + stripe_private_t *priv = NULL; + int ret = -1; + stripe_local_t *local = NULL; + + priv = this->private; + + local = mem_get0 (this->local_pool); + if (local == NULL) { + goto out; + } + + frame->local = local; + + local->wind_count = priv->child_count; + + trav = this->children; + + while (trav) { + STACK_WIND (frame, stripe_fsetxattr_everyone_cbk, + trav->xlator, trav->xlator->fops->fsetxattr, + fd, dict, flags, xdata); + trav = trav->next; + } + + ret = 0; +out: + return ret; +} + +inline gf_boolean_t +stripe_fsetxattr_is_special (dict_t *dict) +{ + gf_boolean_t is_spl = _gf_false; + + if (dict == NULL) { + goto out; + } + + dict_foreach (dict, stripe_is_special_key, &is_spl); + +out: + return is_spl; +} + +int +stripe_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + dict_t *dict, int flags, dict_t *xdata) +{ + int32_t op_ret = -1, ret = -1, op_errno = EINVAL; + gf_boolean_t is_spl = _gf_false; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict, + op_errno, err); + + is_spl = stripe_fsetxattr_is_special (dict); + if (is_spl) { + ret = stripe_fsetxattr_to_everyone (frame, this, fd, dict, + flags, xdata); + if (ret < 0) { + op_errno = ENOMEM; + goto err; + } + + goto out; + } + + STACK_WIND (frame, stripe_fsetxattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetxattr, + fd, dict, flags, xdata); +out: + return 0; +err: + STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL); + return 0; +} + +int +stripe_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + STRIPE_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata); + return 0; +} + +int +stripe_removexattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name, dict_t *xdata) +{ + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO (this, err); + + GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*", + name, op_errno, err); + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (loc, err); + + STACK_WIND (frame, stripe_removexattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->removexattr, + loc, name, xdata); + return 0; +err: + STRIPE_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL); + return 0; +} + + +int +stripe_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata); + return 0; +} + +int +stripe_fremovexattr (call_frame_t *frame, xlator_t *this, + fd_t *fd, const char *name, dict_t *xdata) +{ + int32_t op_ret = -1; + int32_t op_errno = EINVAL; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*", + name, op_errno, err); + + STACK_WIND (frame, stripe_fremovexattr_cbk, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fremovexattr, + fd, name, xdata); + return 0; + err: + STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata); + return 0; +} + +int32_t +stripe_readdirp_lookup_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + inode_t *inode, struct iatt *stbuf, + dict_t *xattr, struct iatt *parent) +{ + stripe_local_t *local = NULL; + call_frame_t *main_frame = NULL; + stripe_local_t *main_local = NULL; + gf_dirent_t *entry = NULL; + call_frame_t *prev = NULL; + int done = 0; + + local = frame->local; + prev = cookie; + + entry = local->dirent; + + main_frame = local->orig_frame; + main_local = main_frame->local; + LOCK (&frame->lock); + { + + local->call_count--; + if (!local->call_count) + done = 1; + if (op_ret == -1) { + local->op_errno = op_errno; + local->op_ret = op_ret; + goto unlock; + } + + if (stripe_ctx_handle(this, prev, local, xattr)) + gf_log(this->name, GF_LOG_ERROR, + "Error getting fctx info from dict."); + + correct_file_size(stbuf, local->fctx, prev); + + stripe_iatt_merge (stbuf, &entry->d_stat); + local->stbuf_blocks += stbuf->ia_blocks; + } +unlock: + UNLOCK(&frame->lock); + + if (done) { + inode_ctx_put (entry->inode, this, + (uint64_t) (long)local->fctx); + + done = 0; + LOCK (&main_frame->lock); + { + main_local->wind_count--; + if (!main_local->wind_count) + done = 1; + if (local->op_ret == -1) { + main_local->op_errno = local->op_errno; + main_local->op_ret = local->op_ret; + } + entry->d_stat.ia_blocks = local->stbuf_blocks; + } + UNLOCK (&main_frame->lock); + if (done) { + main_frame->local = NULL; + STRIPE_STACK_UNWIND (readdir, main_frame, + main_local->op_ret, + main_local->op_errno, + &main_local->entries, NULL); + gf_dirent_free (&main_local->entries); + stripe_local_wipe (main_local); + mem_put (main_local); + } + frame->local = NULL; + stripe_local_wipe (local); + mem_put (local); + STRIPE_STACK_DESTROY (frame); + } + + return 0; +} + +int32_t +stripe_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + gf_dirent_t *orig_entries, dict_t *xdata) +{ + stripe_local_t *local = NULL; + call_frame_t *prev = NULL; + gf_dirent_t *local_entry = NULL; + gf_dirent_t *tmp_entry = NULL; + xlator_list_t *trav = NULL; + loc_t loc = {0, }; + int32_t count = 0; + stripe_private_t *priv = NULL; + int32_t subvols = 0; + dict_t *xattrs = NULL; + call_frame_t *local_frame = NULL; + stripe_local_t *local_ent = NULL; + + if (!this || !frame || !frame->local || !cookie) { + gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref"); + goto out; + } + prev = cookie; + local = frame->local; + trav = this->children; + priv = this->private; + + subvols = priv->child_count; + + LOCK (&frame->lock); + { + if (op_ret == -1) { + gf_log (this->name, GF_LOG_WARNING, + "%s returned error %s", + prev->this->name, strerror (op_errno)); + local->op_errno = op_errno; + local->op_ret = op_ret; + goto unlock; + } else { + local->op_ret = op_ret; + list_splice_init (&orig_entries->list, + &local->entries.list); + local->wind_count = op_ret; + } + + } +unlock: + UNLOCK (&frame->lock); + + if (op_ret == -1) + goto out; + + xattrs = dict_new (); + if (xattrs) + (void) stripe_xattr_request_build (this, xattrs, 0, 0, 0, 0); + count = op_ret; + list_for_each_entry_safe (local_entry, tmp_entry, + (&local->entries.list), list) { + + if (!local_entry) + break; + if (!IA_ISREG (local_entry->d_stat.ia_type)) { + LOCK (&frame->lock); + { + local->wind_count--; + count = local->wind_count; + } + UNLOCK (&frame->lock); + continue; + } + + local_frame = copy_frame (frame); + + if (!local_frame) { + op_errno = ENOMEM; + op_ret = -1; + goto out; + } + + local_ent = mem_get0 (this->local_pool); + if (!local_ent) { + op_errno = ENOMEM; + op_ret = -1; goto out; - } - memcpy (stripe_opt->path_pattern, pattern, strlen (pattern)); - - gf_log (this->name, GF_LOG_DEBUG, - "block-size : pattern %s : size %"PRId64, - stripe_opt->path_pattern, stripe_opt->block_size); - - if (!priv->pattern) { - priv->pattern = stripe_opt; + } + + loc.inode = inode_ref (local_entry->inode); + + uuid_copy (loc.gfid, local_entry->d_stat.ia_gfid); + + local_ent->orig_frame = frame; + + local_ent->call_count = subvols; + + local_ent->dirent = local_entry; + + local_frame->local = local_ent; + + trav = this->children; + while (trav) { + STACK_WIND (local_frame, stripe_readdirp_lookup_cbk, + trav->xlator, trav->xlator->fops->lookup, + &loc, xattrs); + trav = trav->next; + } + loc_wipe (&loc); + } +out: + if (!count) { + /* all entries are directories */ + frame->local = NULL; + STRIPE_STACK_UNWIND (readdir, frame, local->op_ret, + local->op_errno, &local->entries, NULL); + gf_dirent_free (&local->entries); + stripe_local_wipe (local); + mem_put (local); + } + if (xattrs) + dict_unref (xattrs); + return 0; + +} +int32_t +stripe_readdirp (call_frame_t *frame, xlator_t *this, + fd_t *fd, size_t size, off_t off, dict_t *xdata) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + xlator_list_t *trav = NULL; + int op_errno = -1; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (fd, err); + + priv = this->private; + trav = this->children; + + if (priv->first_child_down) { + op_errno = ENOTCONN; + goto err; + } + + /* Initialization */ + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + frame->local = local; + + local->fd = fd_ref (fd); + + local->wind_count = 0; + + local->count = 0; + local->op_ret = -1; + INIT_LIST_HEAD(&local->entries); + + if (!trav) + goto err; + + STACK_WIND (frame, stripe_readdirp_cbk, trav->xlator, + trav->xlator->fops->readdirp, fd, size, off, xdata); + return 0; +err: + op_errno = (op_errno == -1) ? errno : op_errno; + STRIPE_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL); + + return 0; + +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + if (!this) + goto out; + + ret = xlator_mem_acct_init (this, gf_stripe_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + goto out; + } + +out: + return ret; +} + +static int +clear_pattern_list (stripe_private_t *priv) +{ + struct stripe_options *prev = NULL; + struct stripe_options *trav = NULL; + int ret = -1; + + GF_VALIDATE_OR_GOTO ("stripe", priv, out); + + trav = priv->pattern; + priv->pattern = NULL; + while (trav) { + prev = trav; + trav = trav->next; + GF_FREE (prev); + } + + ret = 0; + out: + return ret; + + +} + + +int +reconfigure (xlator_t *this, dict_t *options) +{ + + stripe_private_t *priv = NULL; + data_t *data = NULL; + int ret = -1; + volume_option_t *opt = NULL; + + GF_ASSERT (this); + GF_ASSERT (this->private); + + priv = this->private; + + + ret = 0; + LOCK (&priv->lock); + { + ret = clear_pattern_list (priv); + if (ret) + goto unlock; + + data = dict_get (options, "block-size"); + if (data) { + ret = set_stripe_block_size (this, priv, data->data); + if (ret) + goto unlock; } else { - temp_stripeopt = priv->pattern; - while (temp_stripeopt->next) - temp_stripeopt = temp_stripeopt->next; - temp_stripeopt->next = stripe_opt; + opt = xlator_volume_option_get (this, "block-size"); + if (!opt) { + gf_log (this->name, GF_LOG_WARNING, + "option 'block-size' not found"); + ret = -1; + goto unlock; + } + + if (gf_string2bytesize (opt->default_value, &priv->block_size)){ + gf_log (this->name, GF_LOG_ERROR, + "Unable to set default block-size "); + ret = -1; + goto unlock; + } } - stripe_str = strtok_r (NULL, ",", &tmp_str); + + GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool, + unlock); } + unlock: + UNLOCK (&priv->lock); + if (ret) + goto out; ret = 0; out: return ret; + } /** - * init - This function is called when xlator-graph gets initialized. + * init - This function is called when xlator-graph gets initialized. * The option given in volfiles are parsed here. - * @this - + * @this - */ int32_t init (xlator_t *this) { stripe_private_t *priv = NULL; + volume_option_t *opt = NULL; xlator_list_t *trav = NULL; data_t *data = NULL; int32_t count = 0; int ret = -1; + if (!this) + goto out; + trav = this->children; while (trav) { count++; @@ -3317,16 +5158,27 @@ init (xlator_t *this) gf_log (this->name, GF_LOG_WARNING, "dangling volume. check volfile "); } - - priv = CALLOC (1, sizeof (stripe_private_t)); + + if (count == 1) { + gf_log (this->name, GF_LOG_ERROR, + "stripe configured with only one \"subvolumes\" option." + " please check the volume. exiting"); + goto out; + } + + priv = GF_CALLOC (1, sizeof (stripe_private_t), + gf_stripe_mt_stripe_private_t); + if (!priv) goto out; - priv->xl_array = CALLOC (count, sizeof (xlator_t *)); + priv->xl_array = GF_CALLOC (count, sizeof (xlator_t *), + gf_stripe_mt_xlator_t); if (!priv->xl_array) goto out; - priv->state = CALLOC (count, sizeof (int8_t)); - if (!priv->xl_array) + priv->last_event = GF_CALLOC (count, sizeof (int), + gf_stripe_mt_int32_t); + if (!priv->last_event) goto out; priv->child_count = count; @@ -3346,128 +5198,617 @@ init (xlator_t *this) goto out; } - priv->block_size = (128 * GF_UNIT_KB); - /* option stripe-pattern *avi:1GB,*pdf:4096 */ - data = dict_get (this->options, "block-size"); - if (!data) { - gf_log (this->name, GF_LOG_DEBUG, - "No \"option block-size <x>\" given, defaulting " - "to 128KB"); - } else { - ret = set_stripe_block_size (this, priv, data->data); - if (ret) - goto out; - } - - priv->xattr_supported = 1; - data = dict_get (this->options, "use-xattr"); - if (data) { - if (gf_string2boolean (data->data, - &priv->xattr_supported) == -1) { + ret = 0; + LOCK (&priv->lock); + { + opt = xlator_volume_option_get (this, "block-size"); + if (!opt) { + gf_log (this->name, GF_LOG_WARNING, + "option 'block-size' not found"); + ret = -1; + goto unlock; + } + if (gf_string2bytesize (opt->default_value, &priv->block_size)){ gf_log (this->name, GF_LOG_ERROR, - "error setting hard check for extended " - "attribute"); - //return -1; + "Unable to set default block-size "); + ret = -1; + goto unlock; + } + /* option stripe-pattern *avi:1GB,*pdf:16K */ + data = dict_get (this->options, "block-size"); + if (data) { + ret = set_stripe_block_size (this, priv, data->data); + if (ret) + goto unlock; } } + unlock: + UNLOCK (&priv->lock); + if (ret) + goto out; + GF_OPTION_INIT ("use-xattr", priv->xattr_supported, bool, out); /* notify related */ priv->nodes_down = priv->child_count; + + GF_OPTION_INIT("coalesce", priv->coalesce, bool, out); + + this->local_pool = mem_pool_new (stripe_local_t, 128); + if (!this->local_pool) { + ret = -1; + gf_log (this->name, GF_LOG_ERROR, + "failed to create local_t's memory pool"); + goto out; + } + this->private = priv; ret = 0; - out: +out: if (ret) { if (priv) { - if (priv->xl_array) - FREE (priv->xl_array); - FREE (priv); + GF_FREE (priv->xl_array); + GF_FREE (priv); } } return ret; -} +} -/** +/** * fini - Free all the private variables - * @this - + * @this - */ -void +void fini (xlator_t *this) { stripe_private_t *priv = NULL; struct stripe_options *prev = NULL; struct stripe_options *trav = NULL; + if (!this) + goto out; + priv = this->private; if (priv) { - if (priv->xl_array) - FREE (priv->xl_array); + this->private = NULL; + GF_FREE (priv->xl_array); trav = priv->pattern; while (trav) { prev = trav; trav = trav->next; - FREE (prev); + GF_FREE (prev); } + GF_FREE (priv->last_event); LOCK_DESTROY (&priv->lock); - FREE (priv); + GF_FREE (priv); } +out: return; } +int32_t +stripe_getxattr_unwind (call_frame_t *frame, + int op_ret, int op_errno, dict_t *dict, dict_t *xdata) -struct xlator_fops fops = { - .stat = stripe_stat, - .unlink = stripe_unlink, - .symlink = stripe_symlink, - .rename = stripe_rename, - .link = stripe_link, - .chmod = stripe_chmod, - .chown = stripe_chown, - .truncate = stripe_truncate, - .utimens = stripe_utimens, - .create = stripe_create, - .open = stripe_open, - .readv = stripe_readv, - .writev = stripe_writev, - .statfs = stripe_statfs, - .flush = stripe_flush, - .fsync = stripe_fsync, - .setxattr = stripe_setxattr, - .getxattr = stripe_getxattr, - .removexattr = stripe_removexattr, - .access = stripe_access, - .ftruncate = stripe_ftruncate, - .fstat = stripe_fstat, - .readlink = stripe_readlink, - .mkdir = stripe_mkdir, - .rmdir = stripe_rmdir, - .lk = stripe_lk, - .opendir = stripe_opendir, - .fsyncdir = stripe_fsyncdir, - .fchmod = stripe_fchmod, - .fchown = stripe_fchown, - .lookup = stripe_lookup, - .setdents = stripe_setdents, - .mknod = stripe_mknod, -}; +{ + STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata); + return 0; +} + +int +stripe_internal_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, + dict_t *xdata) +{ + + char size_key[256] = {0,}; + char index_key[256] = {0,}; + char count_key[256] = {0,}; + char coalesce_key[256] = {0,}; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (frame->local, out); + + if (!xattr || (op_ret == -1)) + goto out; + + sprintf (size_key, "trusted.%s.stripe-size", this->name); + sprintf (count_key, "trusted.%s.stripe-count", this->name); + sprintf (index_key, "trusted.%s.stripe-index", this->name); + sprintf (coalesce_key, "trusted.%s.stripe-coalesce", this->name); + + dict_del (xattr, size_key); + dict_del (xattr, count_key); + dict_del (xattr, index_key); + dict_del (xattr, coalesce_key); + +out: + STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata); + + return 0; + +} + +int +stripe_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, dict_t *xattr, dict_t *xdata) +{ + int call_cnt = 0; + stripe_local_t *local = NULL; + + VALIDATE_OR_GOTO (frame, out); + VALIDATE_OR_GOTO (frame->local, out); + + local = frame->local; + + LOCK (&frame->lock); + { + call_cnt = --local->wind_count; + } + UNLOCK (&frame->lock); + + if (!xattr || (op_ret < 0)) + goto out; + + local->op_ret = 0; + + if (!local->xattr) { + local->xattr = dict_ref (xattr); + } else { + stripe_aggregate_xattr (local->xattr, xattr); + } + +out: + if (!call_cnt) { + STRIPE_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno, + local->xattr, xdata); + } + + return 0; +} + +int32_t +stripe_vgetxattr_cbk (call_frame_t *frame, void *cookie, + xlator_t *this, int32_t op_ret, int32_t op_errno, + dict_t *dict, dict_t *xdata) +{ + stripe_local_t *local = NULL; + int32_t callcnt = 0; + int32_t ret = -1; + long cky = 0; + void *xattr_val = NULL; + void *xattr_serz = NULL; + stripe_xattr_sort_t *xattr = NULL; + dict_t *stripe_xattr = NULL; + + if (!frame || !frame->local || !this) { + gf_log ("", GF_LOG_ERROR, "Possible NULL deref"); + return ret; + } + + local = frame->local; + cky = (long) cookie; -struct xlator_mops mops = { - .stats = stripe_stats, + if (local->xsel[0] == '\0') { + gf_log (this->name, GF_LOG_ERROR, "Empty xattr in cbk"); + return ret; + } + + LOCK (&frame->lock); + { + callcnt = --local->wind_count; + + if (!dict || (op_ret < 0)) + goto out; + + if (!local->xattr_list) + local->xattr_list = (stripe_xattr_sort_t *) + GF_CALLOC (local->nallocs, + sizeof (stripe_xattr_sort_t), + gf_stripe_mt_xattr_sort_t); + + if (local->xattr_list) { + xattr = local->xattr_list + (int32_t) cky; + + ret = dict_get_ptr_and_len (dict, local->xsel, + &xattr_val, + &xattr->xattr_len); + if (xattr->xattr_len == 0) + goto out; + + xattr->pos = cky; + xattr->xattr_value = gf_memdup (xattr_val, + xattr->xattr_len); + + if (xattr->xattr_value != NULL) + local->xattr_total_len += xattr->xattr_len + 1; + } + } + out: + UNLOCK (&frame->lock); + + if (!callcnt) { + if (!local->xattr_total_len) + goto unwind; + + stripe_xattr = dict_new (); + if (!stripe_xattr) + goto unwind; + + /* select filler based on ->xsel */ + if (XATTR_IS_PATHINFO (local->xsel)) + ret = stripe_fill_pathinfo_xattr (this, local, + (char **)&xattr_serz); + else if (XATTR_IS_LOCKINFO (local->xsel)) { + ret = stripe_fill_lockinfo_xattr (this, local, + &xattr_serz); + } else { + gf_log (this->name, GF_LOG_WARNING, + "Unknown xattr in xattr request"); + goto unwind; + } + + if (!ret) { + ret = dict_set_dynptr (stripe_xattr, local->xsel, + xattr_serz, + local->xattr_total_len); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "Can't set %s key in dict", + local->xsel); + } + + unwind: + STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, + stripe_xattr, NULL); + + ret = stripe_free_xattr_str (local); + + GF_FREE (local->xattr_list); + + if (stripe_xattr) + dict_unref (stripe_xattr); + } + + return ret; +} + +int32_t +stripe_getxattr (call_frame_t *frame, xlator_t *this, + loc_t *loc, const char *name, dict_t *xdata) +{ + stripe_local_t *local = NULL; + xlator_list_t *trav = NULL; + stripe_private_t *priv = NULL; + int32_t op_errno = EINVAL; + int i = 0; + xlator_t **sub_volumes; + int ret = 0; + + VALIDATE_OR_GOTO (frame, err); + VALIDATE_OR_GOTO (this, err); + VALIDATE_OR_GOTO (loc, err); + VALIDATE_OR_GOTO (loc->path, err); + VALIDATE_OR_GOTO (loc->inode, err); + + priv = this->private; + trav = this->children; + + /* Initialization */ + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + local->op_ret = -1; + frame->local = local; + loc_copy (&local->loc, loc); + + + if (name && (strcmp (GF_XATTR_MARKER_KEY, name) == 0) + && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) { + local->marker.call_count = priv->child_count; + + sub_volumes = alloca ( priv->child_count * + sizeof (xlator_t *)); + for (i = 0, trav = this->children; trav ; + trav = trav->next, i++) { + + *(sub_volumes + i) = trav->xlator; + + } + + if (cluster_getmarkerattr (frame, this, loc, name, + local, stripe_getxattr_unwind, + sub_volumes, priv->child_count, + MARKER_UUID_TYPE, marker_uuid_default_gauge, + priv->vol_uuid)) { + op_errno = EINVAL; + goto err; + } + + return 0; + } + + if (name && strncmp (name, GF_XATTR_QUOTA_SIZE_KEY, + strlen (GF_XATTR_QUOTA_SIZE_KEY)) == 0) { + local->wind_count = priv->child_count; + + for (i = 0, trav=this->children; i < priv->child_count; i++, + trav = trav->next) { + STACK_WIND (frame, stripe_getxattr_cbk, + trav->xlator, trav->xlator->fops->getxattr, + loc, name, xdata); + } + + return 0; + } + + if (name && + ((strncmp (name, GF_XATTR_PATHINFO_KEY, + strlen (GF_XATTR_PATHINFO_KEY)) == 0))) { + if (IA_ISREG (loc->inode->ia_type)) { + ret = inode_ctx_get (loc->inode, this, + (uint64_t *) &local->fctx); + if (ret) + gf_log (this->name, GF_LOG_ERROR, + "stripe size unavailable from fctx" + " relying on pathinfo could lead to" + " wrong results"); + } + + local->nallocs = local->wind_count = priv->child_count; + (void) strncpy (local->xsel, name, strlen (name)); + + /** + * for xattrs that need info from all childs, fill ->xsel + * as above and call the filler function in cbk based on + * it + */ + for (i = 0, trav = this->children; i < priv->child_count; i++, + trav = trav->next) { + STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk, + (void *) (long) i, trav->xlator, + trav->xlator->fops->getxattr, + loc, name, xdata); + } + + return 0; + } + + if (name &&(*priv->vol_uuid)) { + if ((match_uuid_local (name, priv->vol_uuid) == 0) + && (GF_CLIENT_PID_GSYNCD == frame->root->pid)) { + + if (!IA_FILE_OR_DIR (loc->inode->ia_type)) + local->marker.call_count = 1; + else + local->marker.call_count = priv->child_count; + + sub_volumes = alloca (local->marker.call_count * + sizeof (xlator_t *)); + + for (i = 0, trav = this->children; + i < local->marker.call_count; + i++, trav = trav->next) { + *(sub_volumes + i) = trav->xlator; + + } + + if (cluster_getmarkerattr (frame, this, loc, name, + local, + stripe_getxattr_unwind, + sub_volumes, + local->marker.call_count, + MARKER_XTIME_TYPE, + marker_xtime_default_gauge, + priv->vol_uuid)) { + op_errno = EINVAL; + goto err; + } + + return 0; + } + } + + + STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + + return 0; + +err: + STRIPE_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL); + return 0; +} + +inline gf_boolean_t +stripe_is_special_xattr (const char *name) +{ + gf_boolean_t is_spl = _gf_false; + + if (!name) { + goto out; + } + + if (!strncmp (name, GF_XATTR_LOCKINFO_KEY, + strlen (GF_XATTR_LOCKINFO_KEY)) + || !strncmp (name, GF_XATTR_PATHINFO_KEY, + strlen (GF_XATTR_PATHINFO_KEY))) + is_spl = _gf_true; +out: + return is_spl; +} + +int32_t +stripe_fgetxattr_from_everyone (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + stripe_local_t *local = NULL; + stripe_private_t *priv = NULL; + int32_t ret = -1, op_errno = 0; + int i = 0; + xlator_list_t *trav = NULL; + + priv = this->private; + + local = mem_get0 (this->local_pool); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + local->op_ret = -1; + frame->local = local; + + strncpy (local->xsel, name, strlen (name)); + local->nallocs = local->wind_count = priv->child_count; + + for (i = 0, trav = this->children; i < priv->child_count; i++, + trav = trav->next) { + STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk, + (void *) (long) i, trav->xlator, + trav->xlator->fops->fgetxattr, + fd, name, xdata); + } + + return 0; + +err: + STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL, NULL); + return ret; +} + +int32_t +stripe_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, + const char *name, dict_t *xdata) +{ + if (stripe_is_special_xattr (name)) { + stripe_fgetxattr_from_everyone (frame, this, fd, name, xdata); + goto out; + } + + STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata); + +out: + return 0; +} + + + +int32_t +stripe_priv_dump (xlator_t *this) +{ + char key[GF_DUMP_MAX_BUF_LEN]; + int i = 0; + stripe_private_t *priv = NULL; + int ret = -1; + struct stripe_options *options = NULL; + + GF_VALIDATE_OR_GOTO ("stripe", this, out); + + priv = this->private; + if (!priv) + goto out; + + ret = TRY_LOCK (&priv->lock); + if (ret != 0) + goto out; + + gf_proc_dump_add_section("xlator.cluster.stripe.%s.priv", this->name); + gf_proc_dump_write("child_count","%d", priv->child_count); + + for (i = 0; i < priv->child_count; i++) { + sprintf (key, "subvolumes[%d]", i); + gf_proc_dump_write (key, "%s.%s", priv->xl_array[i]->type, + priv->xl_array[i]->name); + } + + options = priv->pattern; + while (options != NULL) { + gf_proc_dump_write ("path_pattern", "%s", priv->pattern->path_pattern); + gf_proc_dump_write ("options_block_size", "%ul", options->block_size); + + options = options->next; + } + + gf_proc_dump_write ("block_size", "%ul", priv->block_size); + gf_proc_dump_write ("nodes-down", "%d", priv->nodes_down); + gf_proc_dump_write ("first-child_down", "%d", priv->first_child_down); + gf_proc_dump_write ("xattr_supported", "%d", priv->xattr_supported); + + UNLOCK (&priv->lock); + +out: + return ret; +} + +struct xlator_fops fops = { + .stat = stripe_stat, + .unlink = stripe_unlink, + .rename = stripe_rename, + .link = stripe_link, + .truncate = stripe_truncate, + .create = stripe_create, + .open = stripe_open, + .readv = stripe_readv, + .writev = stripe_writev, + .statfs = stripe_statfs, + .flush = stripe_flush, + .fsync = stripe_fsync, + .ftruncate = stripe_ftruncate, + .fstat = stripe_fstat, + .mkdir = stripe_mkdir, + .rmdir = stripe_rmdir, + .lk = stripe_lk, + .opendir = stripe_opendir, + .fsyncdir = stripe_fsyncdir, + .setattr = stripe_setattr, + .fsetattr = stripe_fsetattr, + .lookup = stripe_lookup, + .mknod = stripe_mknod, + .setxattr = stripe_setxattr, + .fsetxattr = stripe_fsetxattr, + .getxattr = stripe_getxattr, + .fgetxattr = stripe_fgetxattr, + .removexattr = stripe_removexattr, + .fremovexattr = stripe_fremovexattr, + .readdirp = stripe_readdirp, + .fallocate = stripe_fallocate, + .discard = stripe_discard, + .zerofill = stripe_zerofill, }; struct xlator_cbks cbks = { .release = stripe_release, + .forget = stripe_forget, }; +struct xlator_dumpops dumpops = { + .priv = stripe_priv_dump, +}; struct volume_options options[] = { - { .key = {"block-size"}, - .type = GF_OPTION_TYPE_ANY + { .key = {"block-size"}, + .type = GF_OPTION_TYPE_SIZE_LIST, + .default_value = "128KB", + .min = STRIPE_MIN_BLOCK_SIZE, + .description = "Size of the stripe unit that would be read " + "from or written to the striped servers." }, - { .key = {"use-xattr"}, - .type = GF_OPTION_TYPE_BOOL + { .key = {"use-xattr"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "true" }, + { .key = {"coalesce"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "true", + .description = "Enable/Disable coalesce mode to flatten striped " + "files as stored on the server (i.e., eliminate holes " + "caused by the traditional format)." + }, { .key = {NULL} }, }; |
