diff options
| author | Anuradha Talur <atalur@redhat.com> | 2016-08-25 11:46:25 +0530 | 
|---|---|---|
| committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2016-09-06 10:50:58 -0700 | 
| commit | 90a79b04d52987a52aef8bbb9944bdbd44c9ed3c (patch) | |
| tree | 88b32f2948b2c99a6d422f3b1f617d5b38332ec6 | |
| parent | 2a2a31c0c0bb3cbbd53d96300515aa9e2fe7c067 (diff) | |
afr: Consume compound fops in afr transaction
        Backport of: http://review.gluster.org/15014
        cherry-picked from befae0b48729894282b5b8b174907e24779c3442
Change-Id: I62ff41aa80fe700df2dda8b10606e484ee1f382e
BUG: 1372686
Signed-off-by: Anuradha Talur <atalur@redhat.com>
Reviewed-on: http://review.gluster.org/15388
Tested-by: Krutika Dhananjay <kdhananj@redhat.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
| -rw-r--r-- | libglusterfs/src/globals.h | 4 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 55 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 73 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 26 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 391 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.h | 4 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 12 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.h | 38 | ||||
| -rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 7 | 
9 files changed, 546 insertions, 64 deletions
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h index ab25c7f0ec1..e186d58cea6 100644 --- a/libglusterfs/src/globals.h +++ b/libglusterfs/src/globals.h @@ -68,6 +68,10 @@  #define GD_OP_VERSION_3_8_0    30800 /* Op-version for GlusterFS 3.8.0 */ +#define GD_OP_VERSION_3_8_3    30803 /* Op-version for GlusterFS 3.8.3 */ + +#define GD_OP_VERSION_3_8_4    30804 /* Op-version for GlusterFS 3.8.4 */ +  #define GD_OP_VERSION_3_9_0    30900 /* Op-version for GlusterFS 3.9.0 */  #define GD_OP_VERSION_4_0_0    40000 /* Op-version for GlusterFS 4.0.0 */ diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index dec667fd460..cf838846cbd 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -42,6 +42,7 @@  #include "afr-self-heal.h"  #include "afr-self-heald.h"  #include "afr-messages.h" +#include "compound-fop-utils.h"  gf_boolean_t  afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv, @@ -4592,6 +4593,7 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)          local->need_full_crawl = _gf_false; +        local->compound = _gf_false;          INIT_LIST_HEAD (&local->healer);  	return 0;  out: @@ -4743,6 +4745,7 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this)          if (!local->pending)                  goto out; +        local->compound = _gf_false;  	INIT_LIST_HEAD (&local->transaction.eager_locked);          ret = 0; @@ -5536,3 +5539,55 @@ afr_get_msg_id (char *op_type)                  return AFR_MSG_ADD_BRICK_STATUS;          return -1;  } + +gf_boolean_t +afr_can_compound_pre_op_and_op (afr_private_t *priv, glusterfs_fop_t fop) +{ +        if (priv->arbiter_count != 0) +                return _gf_false; + +        if (!priv->use_compound_fops) +                return _gf_false; + +        switch (fop) { +        case GF_FOP_WRITE: +                return _gf_true; +        default: +                return _gf_false; +        } +} + +afr_compound_cbk_t +afr_pack_fop_args (call_frame_t *frame, compound_args_t *args, +                   glusterfs_fop_t fop, int index) +{ +        afr_local_t     *local  = frame->local; + +        switch (fop) { +        case GF_FOP_WRITE: +                COMPOUND_PACK_ARGS (writev, GF_FOP_WRITE, +                                    args, index, +                                    local->fd, local->cont.writev.vector, +                                    local->cont.writev.count, +                                    local->cont.writev.offset, +                                    local->cont.writev.flags, +                                    local->cont.writev.iobref, +                                    local->xdata_req); +                return afr_pre_op_writev_cbk; +        default: +                break; +        } +        return NULL; +} + +void +afr_compound_cleanup (compound_args_t *args, dict_t *xdata, +                      dict_t *newloc_xdata) +{ +        if (args) +                compound_args_cleanup (args); +	if (xdata) +		dict_unref (xdata); +        if (newloc_xdata) +                dict_unref (newloc_xdata); +} diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 24ab52f0266..200b420f5f0 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -292,21 +292,16 @@ afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this)          }  } -int -afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +void +afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index,                       int32_t op_ret, int32_t op_errno, struct iatt *prebuf,                       struct iatt *postbuf, dict_t *xdata)  { -        afr_local_t *   local = NULL; -        call_frame_t    *fop_frame = NULL; -        int child_index = (long) cookie; -        int call_count  = -1;          int ret = 0; +        afr_local_t *local = frame->local;          uint32_t open_fd_count = 0;          uint32_t write_is_append = 0; -        local = frame->local; -          LOCK (&frame->lock);          {                  __afr_inode_write_fill (frame, this, child_index, op_ret, @@ -324,32 +319,60 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,  				       &open_fd_count);  		if (ret == -1)  			goto unlock; -		if ((open_fd_count > local->open_fd_count)) { -			local->open_fd_count = open_fd_count; -			local->update_open_fd_count = _gf_true; +		if (open_fd_count > local->open_fd_count) { +                        local->open_fd_count = open_fd_count; +                        local->update_open_fd_count = _gf_true;  		}          }  unlock:          UNLOCK (&frame->lock); +} -        call_count = afr_frame_return (frame); +void +afr_process_post_writev (call_frame_t *frame, xlator_t *this) +{ +        afr_local_t     *local = NULL; -        if (call_count == 0) { -		if (!local->stable_write && !local->append_write) -			/* An appended write removes the necessity to -			   fsync() the file. This is because self-heal -			   has the logic to check for larger file when -			   the xattrs are not reliably pointing at -			   a stale file. -			*/ -			afr_fd_report_unstable_write (this, local->fd); +        local = frame->local; -		__afr_inode_write_finalize (frame, this); +        if (!local->stable_write && !local->append_write) +                /* An appended write removes the necessity to +                   fsync() the file. This is because self-heal +                   has the logic to check for larger file when +                   the xattrs are not reliably pointing at +                   a stale file. +                */ +                afr_fd_report_unstable_write (this, local->fd); + +        __afr_inode_write_finalize (frame, this); -                afr_writev_handle_short_writes (frame, this); +        afr_writev_handle_short_writes (frame, this); -                if (local->update_open_fd_count) -                        afr_handle_open_fd_count (frame, this); +        if (local->update_open_fd_count) +                afr_handle_open_fd_count (frame, this); + +} + +int +afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                     int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +                     struct iatt *postbuf, dict_t *xdata) +{ +        afr_local_t     *local = NULL; +        call_frame_t    *fop_frame = NULL; +        int child_index = (long) cookie; +        int call_count  = -1; +        int ret = 0; + +        local = frame->local; + +        afr_inode_write_fill (frame, this, child_index, op_ret, op_errno, +                              prebuf, postbuf, xdata); + +        call_count = afr_frame_return (frame); + +        if (call_count == 0) { +                afr_process_post_writev (frame, this);                  if (!afr_txn_nothing_failed (frame, this)) {                          //Don't unwind until post-op is complete diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index 718ba318cfe..233672898f1 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -463,8 +463,8 @@ transaction_lk_op (afr_local_t *local)  } -static int -is_afr_lock_transaction (afr_local_t *local) +int +afr_is_inodelk_transaction(afr_local_t *local)  {          int ret = 0; @@ -636,13 +636,25 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,          return 0;  } +void +afr_update_uninodelk (afr_local_t *local, afr_internal_lock_t *int_lock, +                    int32_t child_index) +{ +        afr_inodelk_t       *inodelk = NULL; + +        inodelk = afr_get_inodelk (int_lock, int_lock->domain); +        inodelk->locked_nodes[child_index] &= LOCKED_NO; +        if (local->transaction.eager_lock) +                local->transaction.eager_lock[child_index] = 0; + +} +  static int32_t  afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          int32_t op_ret, int32_t op_errno, dict_t *xdata)  {          afr_local_t         *local = NULL;          afr_internal_lock_t *int_lock = NULL; -        afr_inodelk_t       *inodelk = NULL;          int32_t             child_index = (long)cookie;          afr_private_t       *priv = NULL; @@ -665,11 +677,7 @@ afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,                          lkowner_utoa (&frame->root->lk_owner));          } - -        inodelk = afr_get_inodelk (int_lock, int_lock->domain); -        inodelk->locked_nodes[child_index] &= LOCKED_NO; -        if (local->transaction.eager_lock) -                local->transaction.eager_lock[child_index] = 0; +        afr_update_uninodelk (local, int_lock, child_index);          afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata); @@ -1712,7 +1720,7 @@ afr_unlock (call_frame_t *frame, xlator_t *this)          local = frame->local;          if (transaction_lk_op (local)) { -                if (is_afr_lock_transaction (local)) +                if (afr_is_inodelk_transaction(local))                          afr_unlock_inodelk (frame, this);                  else                          afr_unlock_entrylk (frame, this); diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 64a42d9fc7e..db7c6dbd2b0 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -17,6 +17,7 @@  #include "afr-transaction.h"  #include "afr-self-heal.h"  #include "afr-messages.h" +#include "compound-fop-utils.h"  #include <signal.h> @@ -32,6 +33,14 @@ gf_boolean_t  afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this);  int +afr_changelog_call_count (afr_transaction_type type, +                          unsigned char *pre_op_subvols, +                          unsigned int child_count); +int +afr_post_op_unlock_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, +                       afr_changelog_resume_t changelog_resume, +                       afr_xattrop_type_t op); +int  afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,  		  afr_changelog_resume_t changelog_resume,                    afr_xattrop_type_t op); @@ -821,14 +830,16 @@ afr_handle_quorum (call_frame_t *frame)  int  afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)  { -        afr_private_t * priv = this->private; -        int i          = 0; -	int ret = 0; -	int idx = 0; -        afr_local_t *  local = NULL; -        dict_t        *xattr = NULL; -        int            nothing_failed = 1; -	gf_boolean_t   need_undirty = _gf_false; +        afr_private_t           *priv           = this->private; +        afr_local_t             *local          = NULL; +        dict_t                  *xattr          = NULL; +        afr_fd_ctx_t            *fd_ctx         = NULL; +        int                     i               = 0; +        int                     ret             = 0; +        int                     idx             = 0; +        int                     nothing_failed  = 1; +        int                     piggyback       = 0; +        gf_boolean_t            need_undirty    = _gf_false;          afr_handle_quorum (frame);          local = frame->local; @@ -893,8 +904,34 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this)  		goto out;  	} -	afr_changelog_do (frame, this, xattr, afr_changelog_post_op_done, -                          AFR_TRANSACTION_POST_OP); +        if (local->compound && local->fd) { +                LOCK (&local->fd->lock); +                { +                        fd_ctx = __afr_fd_ctx_get (local->fd, this); +                        for (i = 0; i < priv->child_count; i++) { +                                if (local->transaction.pre_op[i] && +                                    local->transaction.eager_lock[i]) { +                                        if (fd_ctx->lock_piggyback[i]) +                                                piggyback = 1; +                                } +                                if (piggyback == 1) +                                        break; +                        } +                } +                UNLOCK (&local->fd->lock); +        } + +        /* Do not compound if any brick got piggybacked lock as +         * unlock should not be done for that. */ +        if (local->compound && !piggyback) { +                afr_post_op_unlock_do (frame, this, xattr, +                                       afr_changelog_post_op_done, +                                       AFR_TRANSACTION_POST_OP); +        } else { +                afr_changelog_do (frame, this, xattr, +                                  afr_changelog_post_op_done, +                                  AFR_TRANSACTION_POST_OP); +        }  out:  	if (xattr)                  dict_unref (xattr); @@ -1189,6 +1226,299 @@ out:  }  int +afr_pre_op_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                       int op_ret, int op_errno, +                       void *data, dict_t *xdata) +{ +        afr_local_t *local = NULL; +        afr_private_t *priv = NULL; +        call_frame_t    *fop_frame = NULL; +        default_args_cbk_t *write_args_cbk = NULL; +        compound_args_cbk_t *args_cbk = data; +        int call_count = -1; +        int child_index = -1; +        int i = 0; + +        local = frame->local; +        priv = this->private; +        child_index = (long) cookie; + +	if (local->pre_op_compat) +		afr_changelog_pre_op_update (frame, this); + +        if (op_ret == -1) { +                local->op_errno = op_errno; +		afr_transaction_fop_failed (frame, this, child_index); +        } +        write_args_cbk = &args_cbk->rsp_list[1]; +        afr_inode_write_fill  (frame, this, (long) i, write_args_cbk->op_ret, +                               write_args_cbk->op_errno, +                               &write_args_cbk->prestat, +                               &write_args_cbk->poststat, +                               write_args_cbk->xdata); + +	call_count = afr_frame_return (frame); + +        if (call_count == 0) { +                afr_process_post_writev (frame, this); +                if (!afr_txn_nothing_failed (frame, this)) { +                        /* Don't unwind until post-op is complete */ +                        local->transaction.resume (frame, this); +                } else { +                /* frame change, place frame in post-op delay and unwind */ +                        fop_frame = afr_transaction_detach_fop_frame (frame); +                        afr_writev_copy_outvars (frame, fop_frame); +                        local->transaction.resume (frame, this); +                        afr_writev_unwind (fop_frame, this); +                } +        } +        return 0; +} + +int +afr_changelog_prepare (xlator_t *this, call_frame_t *frame, int *call_count, +                       afr_changelog_resume_t changelog_resume, +                       afr_xattrop_type_t op, dict_t **xdata, +                       dict_t **newloc_xdata) +{ +        afr_private_t *priv  = NULL; +        afr_local_t   *local = NULL; + +        local = frame->local; +        priv = this->private; + +        *call_count = afr_changelog_call_count (local->transaction.type, +                                               local->transaction.pre_op, +                                               priv->child_count); + +        if (*call_count == 0) { +                changelog_resume (frame, this); +                return -1; +        } + +        afr_changelog_populate_xdata (frame, op, xdata, newloc_xdata); +        local->call_count = *call_count; + +        local->transaction.changelog_resume = changelog_resume; +        return 0; +} + +int +afr_pre_op_fop_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, +                   afr_changelog_resume_t changelog_resume, +                   afr_xattrop_type_t op) +{ +        afr_local_t *local = NULL; +        afr_private_t *priv = NULL; +        dict_t *xdata = NULL; +        dict_t *newloc_xdata = NULL; +        compound_args_t *args = NULL; +        int i = 0, call_count = 0; +        afr_compound_cbk_t compound_cbk; +        int ret = 0; +        int op_errno = ENOMEM; + +        local = frame->local; +        priv = this->private; + +        /* If lock failed on all, just unlock and unwind */ +        ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume, +                                     op, &xdata, &newloc_xdata); + +        if (ret) +                return 0; + +        local->call_count = call_count; + +        afr_save_lk_owner (frame); +        frame->root->lk_owner = +                local->transaction.main_frame->root->lk_owner; + +        args = compound_fop_alloc (2, GF_CFOP_XATTROP_WRITEV, NULL); + +        if (!args) +                goto err; + +        /* pack pre-op part */ +        i = 0; +        COMPOUND_PACK_ARGS (fxattrop, GF_FOP_FXATTROP, +                            args, i, +                            local->fd, GF_XATTROP_ADD_ARRAY, +                            xattr, xdata); +        i++; +        /* pack whatever fop needs to be packed +         * @compound_cbk holds the cbk that would need to be called +         */ +        compound_cbk = afr_pack_fop_args (frame, args, local->op, i); + +        for (i = 0; i < priv->child_count; i++) { +                /* Means lock did not succeed on this brick */ +                if (!local->transaction.pre_op[i]) +                        continue; + +                STACK_WIND_COOKIE (frame, compound_cbk, +                                   (void *) (long) i, +                                   priv->children[i], +                                   priv->children[i]->fops->compound, +                                   args, +                                   NULL); +                if (!--call_count) +                        break; +        } + +        afr_compound_cleanup (args, xdata, newloc_xdata); +        return 0; +err: +	local->internal_lock.lock_cbk = local->transaction.done; +	local->op_ret = -1; +	local->op_errno = op_errno; + +        afr_restore_lk_owner (frame); +	afr_unlock (frame, this); + +        afr_compound_cleanup (args, xdata, newloc_xdata); +	return 0; +} + +int +afr_post_op_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                       int op_ret, int op_errno, +                       void *data, dict_t *xdata) +{ +        afr_local_t *local = NULL; +        afr_private_t *priv = NULL; +        compound_args_cbk_t *args_cbk = data; +        int call_count = -1; +        afr_internal_lock_t *int_lock = NULL; +        afr_inodelk_t       *inodelk = NULL; +        int32_t             child_index = (long)cookie; +        int i = 0; + +        local = frame->local; +        priv = this->private; +        child_index = (long) cookie; + +        local = frame->local; +        int_lock = &local->internal_lock; + +        afr_update_uninodelk (local, int_lock, child_index); + +        LOCK (&frame->lock); +        { +                call_count = --int_lock->lk_call_count; +        } +        UNLOCK (&frame->lock); + +        if (call_count == 0) { +                if (local->transaction.resume_stub) { +                        call_resume (local->transaction.resume_stub); +                        local->transaction.resume_stub = NULL; +                } +                gf_msg_trace (this->name, 0, +                              "All internal locks unlocked"); +                int_lock->lock_cbk (frame, this); +        } + +        return 0; +} + +int +afr_post_op_unlock_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, +		       afr_changelog_resume_t changelog_resume, +                       afr_xattrop_type_t op) +{ +	afr_local_t             *local          = NULL; +	afr_private_t           *priv           = NULL; +        dict_t                  *xdata          = NULL; +        dict_t                  *newloc_xdata   = NULL; +        compound_args_t         *args           = NULL; +        afr_internal_lock_t     *int_lock       = NULL; +        afr_inodelk_t           *inodelk        = NULL; +        struct gf_flock         *flock_use      = NULL; +	int                     i               = 0; +	int                     call_count      = 0; +        struct gf_flock         flock           = {0,}; +        struct gf_flock         full_flock      = {0,}; +        int                     ret             = 0; + +	local = frame->local; +	priv = this->private; +        int_lock = &local->internal_lock; + +        if (afr_is_inodelk_transaction(local)) { +                inodelk = afr_get_inodelk (int_lock, int_lock->domain); + +                flock.l_start = inodelk->flock.l_start; +                flock.l_len   = inodelk->flock.l_len; +                flock.l_type  = F_UNLCK; +                full_flock.l_type = F_UNLCK; + +        } + +        ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume, +                                     op, &xdata, &newloc_xdata); + +        if (ret) +                return 0; + +        int_lock->lk_call_count = call_count; + +        int_lock->lock_cbk = local->transaction.done; + +        args = compound_fop_alloc (2, GF_CFOP_XATTROP_UNLOCK, NULL); + +        if (!args) { +		local->op_ret = -1; +		local->op_errno = ENOMEM; +		afr_changelog_post_op_done (frame, this); +		goto out; +	} + +        i = 0; +        COMPOUND_PACK_ARGS (fxattrop, GF_FOP_FXATTROP, +                            args, i, +                            local->fd, GF_XATTROP_ADD_ARRAY, +                            xattr, xdata); +        i++; +        if (!local->transaction.eager_lock_on) +                flock_use = &flock; +        else +                flock_use = &full_flock; + +        if (afr_is_inodelk_transaction(local)) { +                if (local->fd) { +                        COMPOUND_PACK_ARGS (finodelk, GF_FOP_FINODELK, +                                            args, i, +                                            int_lock->domain, local->fd, +                                            F_SETLK, flock_use, NULL); +                } else { +                        COMPOUND_PACK_ARGS (inodelk, GF_FOP_INODELK, +                                            args, i, +                                            int_lock->domain, &local->loc, +                                            F_SETLK, flock_use, NULL); +                } +        } + +        for (i = 0; i < priv->child_count; i++) { +                /* pre_op[i] has to be true for all nodes that were +                 * successfully locked. */ +                if (!local->transaction.pre_op[i]) +                        continue; +                STACK_WIND_COOKIE (frame, afr_post_op_unlock_cbk, +                                   (void *) (long) i, +                                   priv->children[i], +                                   priv->children[i]->fops->compound, +                                   args, +                                   NULL); +                if (!--call_count) +                        break; +        } +out: +        afr_compound_cleanup (args, xdata, newloc_xdata); +        return 0; +} + +int  afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,  		  afr_changelog_resume_t changelog_resume,                    afr_xattrop_type_t op) @@ -1199,23 +1529,16 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,          dict_t *newloc_xdata = NULL;  	int i = 0;  	int call_count = 0; +        int ret = 0;  	local = frame->local;  	priv = this->private; -        call_count = afr_changelog_call_count (local->transaction.type, -					       local->transaction.pre_op, -					       priv->child_count); - -	if (call_count == 0) { -		changelog_resume (frame, this); -		return 0; -	} - -        afr_changelog_populate_xdata (frame, op, &xdata, &newloc_xdata); -	local->call_count = call_count; +        ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume, +                                     op, &xdata, &newloc_xdata); -	local->transaction.changelog_resume = changelog_resume; +        if (ret) +                return 0;          for (i = 0; i < priv->child_count; i++) {                  if (!local->transaction.pre_op[i]) @@ -1380,8 +1703,21 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)  		goto next;  	} -	afr_changelog_do (frame, this, xdata_req, afr_transaction_perform_fop, -                          AFR_TRANSACTION_PRE_OP); +	/* Till here we have already decided if pre-op needs to be done, +         * based on various criteria. The only thing that needs to be checked +         * now on is whether compound-fops is enabled or not. +         * If it is, then perform pre-op and fop together for writev op. +         */ +        if (afr_can_compound_pre_op_and_op (priv, local->op)) { +                local->compound = _gf_true; +                afr_pre_op_fop_do (frame, this, xdata_req, +                                   afr_transaction_perform_fop, +                                   AFR_TRANSACTION_PRE_OP); +        } else { +                afr_changelog_do (frame, this, xdata_req, +                                  afr_transaction_perform_fop, +                                  AFR_TRANSACTION_PRE_OP); +        }  	if (xdata_req)  		dict_unref (xdata_req); @@ -1738,10 +2074,6 @@ out:  void -afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd, -                               call_stub_t *stub); - -void  afr_delayed_changelog_wake_up_cbk (void *data)  {          fd_t           *fd = NULL; @@ -2031,7 +2363,6 @@ afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd)          afr_delayed_changelog_post_op (this, NULL, fd, NULL);  } -  int  afr_transaction_resume (call_frame_t *frame, xlator_t *this)  { @@ -2082,7 +2413,7 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, -        static gf_boolean_t +static gf_boolean_t  afr_locals_overlap (afr_local_t *local1, afr_local_t *local2)  {          uint64_t start1 = local1->transaction.start; diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index ca8fcfefa89..db8245691ba 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -59,4 +59,8 @@ void  afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv,                        inode_t *inode1, unsigned char *readable1,                        inode_t *inode2, unsigned char *readable2); +int +afr_pre_op_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                       int op_ret, int op_errno, +                       void *data, dict_t *xdata);  #endif /* __TRANSACTION_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index bf5a0d16fc7..24526313e89 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -210,6 +210,9 @@ reconfigure (xlator_t *this, dict_t *options)                            out);          GF_OPTION_RECONF ("locking-scheme", priv->locking_scheme, options, str,                            out); +        GF_OPTION_RECONF ("use-compound-fops", priv->use_compound_fops, +                          options, bool, +                          out);          GF_OPTION_RECONF ("granular-entry-heal", priv->esh_granular, options,                            bool, out); @@ -479,6 +482,8 @@ init (xlator_t *this)          GF_OPTION_INIT ("pre-op-compat", priv->pre_op_compat, bool, out);          GF_OPTION_INIT ("locking-scheme", priv->locking_scheme, str, out); +        GF_OPTION_INIT ("use-compound-fops", priv->use_compound_fops, +                        bool, out);          GF_OPTION_INIT ("granular-entry-heal", priv->esh_granular, bool, out);          GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out); @@ -1004,5 +1009,12 @@ struct volume_options options[] = {            .description = "If this option is enabled, i/o will fail even if "                           "one of the bricks is down in the replicas",          }, +        { .key   = {"use-compound-fops"}, +          .type  = GF_OPTION_TYPE_BOOL, +          .default_value = "no", +          .description = "Use compound fops framework to modify afr " +                         "transaction such that network roundtrips are " +                         "reduced, thus improving the performance.", +        },          { .key  = {NULL} },  }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 983f07fcce9..ff136c0b093 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -45,6 +45,10 @@ typedef int (*afr_inode_refresh_cbk_t) (call_frame_t *frame, xlator_t *this, int  typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this); +typedef int (*afr_compound_cbk_t) (call_frame_t *frame, void *cookie, +                                   xlator_t *this, int op_ret, int op_errno, +                                   void *data, dict_t *xdata); +  #define AFR_COUNT(array,max) ({int __i; int __res = 0; for (__i = 0; __i < max; __i++) if (array[__i]) __res++; __res;})  #define AFR_INTERSECT(dst,src1,src2,max) ({int __i; for (__i = 0; __i < max; __i++) dst[__i] = src1[__i] && src2[__i];})  #define AFR_CMP(a1,a2,len) ({int __cmp = 0; int __i; for (__i = 0; __i < len; __i++) if (a1[__i] != a2[__i]) { __cmp = 1; break;} __cmp;}) @@ -153,6 +157,7 @@ typedef struct _afr_private {  	char                   *locking_scheme;          gf_boolean_t            esh_granular;          gf_boolean_t           consistent_io; +        gf_boolean_t            use_compound_fops;  } afr_private_t; @@ -786,6 +791,7 @@ typedef struct _afr_local {          call_frame_t *heal_frame;          gf_boolean_t need_full_crawl; +        gf_boolean_t compound;  } afr_local_t; @@ -1181,4 +1187,36 @@ afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv,  void  afr_handle_inconsistent_fop (call_frame_t *frame, int32_t *op_ret,                               int32_t *op_errno); + +void +afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index, +                     int32_t op_ret, int32_t op_errno, struct iatt *prebuf, +                     struct iatt *postbuf, dict_t *xdata); +void +afr_process_post_writev (call_frame_t *frame, xlator_t *this); + +void +afr_writev_unwind (call_frame_t *frame, xlator_t *this); + +void +afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame); + +void +afr_update_uninodelk (afr_local_t *local, afr_internal_lock_t *int_lock, +                    int32_t child_index); +gf_boolean_t +afr_can_compound_pre_op_and_op (afr_private_t *priv, glusterfs_fop_t fop); + +afr_compound_cbk_t +afr_pack_fop_args (call_frame_t *frame, compound_args_t *args, +                   glusterfs_fop_t fop, int index); +int +afr_is_inodelk_transaction(afr_local_t *local); + +afr_fd_ctx_t * +__afr_fd_ctx_get (fd_t *fd, xlator_t *this); + +void +afr_compound_cleanup (compound_args_t *args, dict_t *xdata, +                      dict_t *newloc_xdata);  #endif /* __AFR_H__ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 5a00811ec6f..ce34ffd2b05 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -3029,6 +3029,13 @@ struct volopt_map_entry glusterd_volopt_map[] = {            .op_version = GD_OP_VERSION_3_9_0,            .flags      = OPT_FLAG_CLIENT_OPT          }, +        { .key        = "cluster.use-compound-fops", +          .voltype    = "cluster/replicate", +          .value      = "off", +          .type       = DOC, +          .op_version = GD_OP_VERSION_3_8_4, +          .flags      = OPT_FLAG_CLIENT_OPT +        },          { .key         = NULL          }  };  | 
