diff options
-rw-r--r-- | libglusterfs/src/globals.h | 4 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 55 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 73 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-lk-common.c | 26 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 391 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-transaction.h | 4 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.c | 12 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 38 | ||||
-rw-r--r-- | xlators/mgmt/glusterd/src/glusterd-volume-set.c | 7 |
9 files changed, 546 insertions, 64 deletions
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h index ab25c7f0ec1..e186d58cea6 100644 --- a/libglusterfs/src/globals.h +++ b/libglusterfs/src/globals.h @@ -68,6 +68,10 @@ #define GD_OP_VERSION_3_8_0 30800 /* Op-version for GlusterFS 3.8.0 */ +#define GD_OP_VERSION_3_8_3 30803 /* Op-version for GlusterFS 3.8.3 */ + +#define GD_OP_VERSION_3_8_4 30804 /* Op-version for GlusterFS 3.8.4 */ + #define GD_OP_VERSION_3_9_0 30900 /* Op-version for GlusterFS 3.9.0 */ #define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */ diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index dec667fd460..cf838846cbd 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -42,6 +42,7 @@ #include "afr-self-heal.h" #include "afr-self-heald.h" #include "afr-messages.h" +#include "compound-fop-utils.h" gf_boolean_t afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv, @@ -4592,6 +4593,7 @@ afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno) local->need_full_crawl = _gf_false; + local->compound = _gf_false; INIT_LIST_HEAD (&local->healer); return 0; out: @@ -4743,6 +4745,7 @@ afr_transaction_local_init (afr_local_t *local, xlator_t *this) if (!local->pending) goto out; + local->compound = _gf_false; INIT_LIST_HEAD (&local->transaction.eager_locked); ret = 0; @@ -5536,3 +5539,55 @@ afr_get_msg_id (char *op_type) return AFR_MSG_ADD_BRICK_STATUS; return -1; } + +gf_boolean_t +afr_can_compound_pre_op_and_op (afr_private_t *priv, glusterfs_fop_t fop) +{ + if (priv->arbiter_count != 0) + return _gf_false; + + if (!priv->use_compound_fops) + return _gf_false; + + switch (fop) { + case GF_FOP_WRITE: + return _gf_true; + default: + return _gf_false; + } +} + +afr_compound_cbk_t +afr_pack_fop_args (call_frame_t *frame, compound_args_t *args, + glusterfs_fop_t fop, int index) +{ + afr_local_t *local = frame->local; + + switch (fop) { + case GF_FOP_WRITE: + COMPOUND_PACK_ARGS (writev, GF_FOP_WRITE, + args, index, + local->fd, local->cont.writev.vector, + local->cont.writev.count, + local->cont.writev.offset, + local->cont.writev.flags, + local->cont.writev.iobref, + local->xdata_req); + return afr_pre_op_writev_cbk; + default: + break; + } + return NULL; +} + +void +afr_compound_cleanup (compound_args_t *args, dict_t *xdata, + dict_t *newloc_xdata) +{ + if (args) + compound_args_cleanup (args); + if (xdata) + dict_unref (xdata); + if (newloc_xdata) + dict_unref (newloc_xdata); +} diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index 24ab52f0266..200b420f5f0 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -292,21 +292,16 @@ afr_writev_handle_short_writes (call_frame_t *frame, xlator_t *this) } } -int -afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +void +afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index, int32_t op_ret, int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - afr_local_t * local = NULL; - call_frame_t *fop_frame = NULL; - int child_index = (long) cookie; - int call_count = -1; int ret = 0; + afr_local_t *local = frame->local; uint32_t open_fd_count = 0; uint32_t write_is_append = 0; - local = frame->local; - LOCK (&frame->lock); { __afr_inode_write_fill (frame, this, child_index, op_ret, @@ -324,32 +319,60 @@ afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, &open_fd_count); if (ret == -1) goto unlock; - if ((open_fd_count > local->open_fd_count)) { - local->open_fd_count = open_fd_count; - local->update_open_fd_count = _gf_true; + if (open_fd_count > local->open_fd_count) { + local->open_fd_count = open_fd_count; + local->update_open_fd_count = _gf_true; } } unlock: UNLOCK (&frame->lock); +} - call_count = afr_frame_return (frame); +void +afr_process_post_writev (call_frame_t *frame, xlator_t *this) +{ + afr_local_t *local = NULL; - if (call_count == 0) { - if (!local->stable_write && !local->append_write) - /* An appended write removes the necessity to - fsync() the file. This is because self-heal - has the logic to check for larger file when - the xattrs are not reliably pointing at - a stale file. - */ - afr_fd_report_unstable_write (this, local->fd); + local = frame->local; - __afr_inode_write_finalize (frame, this); + if (!local->stable_write && !local->append_write) + /* An appended write removes the necessity to + fsync() the file. This is because self-heal + has the logic to check for larger file when + the xattrs are not reliably pointing at + a stale file. + */ + afr_fd_report_unstable_write (this, local->fd); + + __afr_inode_write_finalize (frame, this); - afr_writev_handle_short_writes (frame, this); + afr_writev_handle_short_writes (frame, this); - if (local->update_open_fd_count) - afr_handle_open_fd_count (frame, this); + if (local->update_open_fd_count) + afr_handle_open_fd_count (frame, this); + +} + +int +afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) +{ + afr_local_t *local = NULL; + call_frame_t *fop_frame = NULL; + int child_index = (long) cookie; + int call_count = -1; + int ret = 0; + + local = frame->local; + + afr_inode_write_fill (frame, this, child_index, op_ret, op_errno, + prebuf, postbuf, xdata); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + afr_process_post_writev (frame, this); if (!afr_txn_nothing_failed (frame, this)) { //Don't unwind until post-op is complete diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c index 718ba318cfe..233672898f1 100644 --- a/xlators/cluster/afr/src/afr-lk-common.c +++ b/xlators/cluster/afr/src/afr-lk-common.c @@ -463,8 +463,8 @@ transaction_lk_op (afr_local_t *local) } -static int -is_afr_lock_transaction (afr_local_t *local) +int +afr_is_inodelk_transaction(afr_local_t *local) { int ret = 0; @@ -636,13 +636,25 @@ afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this, return 0; } +void +afr_update_uninodelk (afr_local_t *local, afr_internal_lock_t *int_lock, + int32_t child_index) +{ + afr_inodelk_t *inodelk = NULL; + + inodelk = afr_get_inodelk (int_lock, int_lock->domain); + inodelk->locked_nodes[child_index] &= LOCKED_NO; + if (local->transaction.eager_lock) + local->transaction.eager_lock[child_index] = 0; + +} + static int32_t afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata) { afr_local_t *local = NULL; afr_internal_lock_t *int_lock = NULL; - afr_inodelk_t *inodelk = NULL; int32_t child_index = (long)cookie; afr_private_t *priv = NULL; @@ -665,11 +677,7 @@ afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this, lkowner_utoa (&frame->root->lk_owner)); } - - inodelk = afr_get_inodelk (int_lock, int_lock->domain); - inodelk->locked_nodes[child_index] &= LOCKED_NO; - if (local->transaction.eager_lock) - local->transaction.eager_lock[child_index] = 0; + afr_update_uninodelk (local, int_lock, child_index); afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata); @@ -1712,7 +1720,7 @@ afr_unlock (call_frame_t *frame, xlator_t *this) local = frame->local; if (transaction_lk_op (local)) { - if (is_afr_lock_transaction (local)) + if (afr_is_inodelk_transaction(local)) afr_unlock_inodelk (frame, this); else afr_unlock_entrylk (frame, this); diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 64a42d9fc7e..db7c6dbd2b0 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -17,6 +17,7 @@ #include "afr-transaction.h" #include "afr-self-heal.h" #include "afr-messages.h" +#include "compound-fop-utils.h" #include <signal.h> @@ -32,6 +33,14 @@ gf_boolean_t afr_changelog_pre_op_update (call_frame_t *frame, xlator_t *this); int +afr_changelog_call_count (afr_transaction_type type, + unsigned char *pre_op_subvols, + unsigned int child_count); +int +afr_post_op_unlock_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, + afr_changelog_resume_t changelog_resume, + afr_xattrop_type_t op); +int afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, afr_changelog_resume_t changelog_resume, afr_xattrop_type_t op); @@ -821,14 +830,16 @@ afr_handle_quorum (call_frame_t *frame) int afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this) { - afr_private_t * priv = this->private; - int i = 0; - int ret = 0; - int idx = 0; - afr_local_t * local = NULL; - dict_t *xattr = NULL; - int nothing_failed = 1; - gf_boolean_t need_undirty = _gf_false; + afr_private_t *priv = this->private; + afr_local_t *local = NULL; + dict_t *xattr = NULL; + afr_fd_ctx_t *fd_ctx = NULL; + int i = 0; + int ret = 0; + int idx = 0; + int nothing_failed = 1; + int piggyback = 0; + gf_boolean_t need_undirty = _gf_false; afr_handle_quorum (frame); local = frame->local; @@ -893,8 +904,34 @@ afr_changelog_post_op_now (call_frame_t *frame, xlator_t *this) goto out; } - afr_changelog_do (frame, this, xattr, afr_changelog_post_op_done, - AFR_TRANSACTION_POST_OP); + if (local->compound && local->fd) { + LOCK (&local->fd->lock); + { + fd_ctx = __afr_fd_ctx_get (local->fd, this); + for (i = 0; i < priv->child_count; i++) { + if (local->transaction.pre_op[i] && + local->transaction.eager_lock[i]) { + if (fd_ctx->lock_piggyback[i]) + piggyback = 1; + } + if (piggyback == 1) + break; + } + } + UNLOCK (&local->fd->lock); + } + + /* Do not compound if any brick got piggybacked lock as + * unlock should not be done for that. */ + if (local->compound && !piggyback) { + afr_post_op_unlock_do (frame, this, xattr, + afr_changelog_post_op_done, + AFR_TRANSACTION_POST_OP); + } else { + afr_changelog_do (frame, this, xattr, + afr_changelog_post_op_done, + AFR_TRANSACTION_POST_OP); + } out: if (xattr) dict_unref (xattr); @@ -1189,6 +1226,299 @@ out: } int +afr_pre_op_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + void *data, dict_t *xdata) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + call_frame_t *fop_frame = NULL; + default_args_cbk_t *write_args_cbk = NULL; + compound_args_cbk_t *args_cbk = data; + int call_count = -1; + int child_index = -1; + int i = 0; + + local = frame->local; + priv = this->private; + child_index = (long) cookie; + + if (local->pre_op_compat) + afr_changelog_pre_op_update (frame, this); + + if (op_ret == -1) { + local->op_errno = op_errno; + afr_transaction_fop_failed (frame, this, child_index); + } + write_args_cbk = &args_cbk->rsp_list[1]; + afr_inode_write_fill (frame, this, (long) i, write_args_cbk->op_ret, + write_args_cbk->op_errno, + &write_args_cbk->prestat, + &write_args_cbk->poststat, + write_args_cbk->xdata); + + call_count = afr_frame_return (frame); + + if (call_count == 0) { + afr_process_post_writev (frame, this); + if (!afr_txn_nothing_failed (frame, this)) { + /* Don't unwind until post-op is complete */ + local->transaction.resume (frame, this); + } else { + /* frame change, place frame in post-op delay and unwind */ + fop_frame = afr_transaction_detach_fop_frame (frame); + afr_writev_copy_outvars (frame, fop_frame); + local->transaction.resume (frame, this); + afr_writev_unwind (fop_frame, this); + } + } + return 0; +} + +int +afr_changelog_prepare (xlator_t *this, call_frame_t *frame, int *call_count, + afr_changelog_resume_t changelog_resume, + afr_xattrop_type_t op, dict_t **xdata, + dict_t **newloc_xdata) +{ + afr_private_t *priv = NULL; + afr_local_t *local = NULL; + + local = frame->local; + priv = this->private; + + *call_count = afr_changelog_call_count (local->transaction.type, + local->transaction.pre_op, + priv->child_count); + + if (*call_count == 0) { + changelog_resume (frame, this); + return -1; + } + + afr_changelog_populate_xdata (frame, op, xdata, newloc_xdata); + local->call_count = *call_count; + + local->transaction.changelog_resume = changelog_resume; + return 0; +} + +int +afr_pre_op_fop_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, + afr_changelog_resume_t changelog_resume, + afr_xattrop_type_t op) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + dict_t *xdata = NULL; + dict_t *newloc_xdata = NULL; + compound_args_t *args = NULL; + int i = 0, call_count = 0; + afr_compound_cbk_t compound_cbk; + int ret = 0; + int op_errno = ENOMEM; + + local = frame->local; + priv = this->private; + + /* If lock failed on all, just unlock and unwind */ + ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume, + op, &xdata, &newloc_xdata); + + if (ret) + return 0; + + local->call_count = call_count; + + afr_save_lk_owner (frame); + frame->root->lk_owner = + local->transaction.main_frame->root->lk_owner; + + args = compound_fop_alloc (2, GF_CFOP_XATTROP_WRITEV, NULL); + + if (!args) + goto err; + + /* pack pre-op part */ + i = 0; + COMPOUND_PACK_ARGS (fxattrop, GF_FOP_FXATTROP, + args, i, + local->fd, GF_XATTROP_ADD_ARRAY, + xattr, xdata); + i++; + /* pack whatever fop needs to be packed + * @compound_cbk holds the cbk that would need to be called + */ + compound_cbk = afr_pack_fop_args (frame, args, local->op, i); + + for (i = 0; i < priv->child_count; i++) { + /* Means lock did not succeed on this brick */ + if (!local->transaction.pre_op[i]) + continue; + + STACK_WIND_COOKIE (frame, compound_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->compound, + args, + NULL); + if (!--call_count) + break; + } + + afr_compound_cleanup (args, xdata, newloc_xdata); + return 0; +err: + local->internal_lock.lock_cbk = local->transaction.done; + local->op_ret = -1; + local->op_errno = op_errno; + + afr_restore_lk_owner (frame); + afr_unlock (frame, this); + + afr_compound_cleanup (args, xdata, newloc_xdata); + return 0; +} + +int +afr_post_op_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + void *data, dict_t *xdata) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + compound_args_cbk_t *args_cbk = data; + int call_count = -1; + afr_internal_lock_t *int_lock = NULL; + afr_inodelk_t *inodelk = NULL; + int32_t child_index = (long)cookie; + int i = 0; + + local = frame->local; + priv = this->private; + child_index = (long) cookie; + + local = frame->local; + int_lock = &local->internal_lock; + + afr_update_uninodelk (local, int_lock, child_index); + + LOCK (&frame->lock); + { + call_count = --int_lock->lk_call_count; + } + UNLOCK (&frame->lock); + + if (call_count == 0) { + if (local->transaction.resume_stub) { + call_resume (local->transaction.resume_stub); + local->transaction.resume_stub = NULL; + } + gf_msg_trace (this->name, 0, + "All internal locks unlocked"); + int_lock->lock_cbk (frame, this); + } + + return 0; +} + +int +afr_post_op_unlock_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, + afr_changelog_resume_t changelog_resume, + afr_xattrop_type_t op) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + dict_t *xdata = NULL; + dict_t *newloc_xdata = NULL; + compound_args_t *args = NULL; + afr_internal_lock_t *int_lock = NULL; + afr_inodelk_t *inodelk = NULL; + struct gf_flock *flock_use = NULL; + int i = 0; + int call_count = 0; + struct gf_flock flock = {0,}; + struct gf_flock full_flock = {0,}; + int ret = 0; + + local = frame->local; + priv = this->private; + int_lock = &local->internal_lock; + + if (afr_is_inodelk_transaction(local)) { + inodelk = afr_get_inodelk (int_lock, int_lock->domain); + + flock.l_start = inodelk->flock.l_start; + flock.l_len = inodelk->flock.l_len; + flock.l_type = F_UNLCK; + full_flock.l_type = F_UNLCK; + + } + + ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume, + op, &xdata, &newloc_xdata); + + if (ret) + return 0; + + int_lock->lk_call_count = call_count; + + int_lock->lock_cbk = local->transaction.done; + + args = compound_fop_alloc (2, GF_CFOP_XATTROP_UNLOCK, NULL); + + if (!args) { + local->op_ret = -1; + local->op_errno = ENOMEM; + afr_changelog_post_op_done (frame, this); + goto out; + } + + i = 0; + COMPOUND_PACK_ARGS (fxattrop, GF_FOP_FXATTROP, + args, i, + local->fd, GF_XATTROP_ADD_ARRAY, + xattr, xdata); + i++; + if (!local->transaction.eager_lock_on) + flock_use = &flock; + else + flock_use = &full_flock; + + if (afr_is_inodelk_transaction(local)) { + if (local->fd) { + COMPOUND_PACK_ARGS (finodelk, GF_FOP_FINODELK, + args, i, + int_lock->domain, local->fd, + F_SETLK, flock_use, NULL); + } else { + COMPOUND_PACK_ARGS (inodelk, GF_FOP_INODELK, + args, i, + int_lock->domain, &local->loc, + F_SETLK, flock_use, NULL); + } + } + + for (i = 0; i < priv->child_count; i++) { + /* pre_op[i] has to be true for all nodes that were + * successfully locked. */ + if (!local->transaction.pre_op[i]) + continue; + STACK_WIND_COOKIE (frame, afr_post_op_unlock_cbk, + (void *) (long) i, + priv->children[i], + priv->children[i]->fops->compound, + args, + NULL); + if (!--call_count) + break; + } +out: + afr_compound_cleanup (args, xdata, newloc_xdata); + return 0; +} + +int afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, afr_changelog_resume_t changelog_resume, afr_xattrop_type_t op) @@ -1199,23 +1529,16 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, dict_t *newloc_xdata = NULL; int i = 0; int call_count = 0; + int ret = 0; local = frame->local; priv = this->private; - call_count = afr_changelog_call_count (local->transaction.type, - local->transaction.pre_op, - priv->child_count); - - if (call_count == 0) { - changelog_resume (frame, this); - return 0; - } - - afr_changelog_populate_xdata (frame, op, &xdata, &newloc_xdata); - local->call_count = call_count; + ret = afr_changelog_prepare (this, frame, &call_count, changelog_resume, + op, &xdata, &newloc_xdata); - local->transaction.changelog_resume = changelog_resume; + if (ret) + return 0; for (i = 0; i < priv->child_count; i++) { if (!local->transaction.pre_op[i]) @@ -1380,8 +1703,21 @@ afr_changelog_pre_op (call_frame_t *frame, xlator_t *this) goto next; } - afr_changelog_do (frame, this, xdata_req, afr_transaction_perform_fop, - AFR_TRANSACTION_PRE_OP); + /* Till here we have already decided if pre-op needs to be done, + * based on various criteria. The only thing that needs to be checked + * now on is whether compound-fops is enabled or not. + * If it is, then perform pre-op and fop together for writev op. + */ + if (afr_can_compound_pre_op_and_op (priv, local->op)) { + local->compound = _gf_true; + afr_pre_op_fop_do (frame, this, xdata_req, + afr_transaction_perform_fop, + AFR_TRANSACTION_PRE_OP); + } else { + afr_changelog_do (frame, this, xdata_req, + afr_transaction_perform_fop, + AFR_TRANSACTION_PRE_OP); + } if (xdata_req) dict_unref (xdata_req); @@ -1738,10 +2074,6 @@ out: void -afr_delayed_changelog_post_op (xlator_t *this, call_frame_t *frame, fd_t *fd, - call_stub_t *stub); - -void afr_delayed_changelog_wake_up_cbk (void *data) { fd_t *fd = NULL; @@ -2031,7 +2363,6 @@ afr_delayed_changelog_wake_up (xlator_t *this, fd_t *fd) afr_delayed_changelog_post_op (this, NULL, fd, NULL); } - int afr_transaction_resume (call_frame_t *frame, xlator_t *this) { @@ -2082,7 +2413,7 @@ afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, - static gf_boolean_t +static gf_boolean_t afr_locals_overlap (afr_local_t *local1, afr_local_t *local2) { uint64_t start1 = local1->transaction.start; diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index ca8fcfefa89..db8245691ba 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -59,4 +59,8 @@ void afr_pick_error_xdata (afr_local_t *local, afr_private_t *priv, inode_t *inode1, unsigned char *readable1, inode_t *inode2, unsigned char *readable2); +int +afr_pre_op_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, + void *data, dict_t *xdata); #endif /* __TRANSACTION_H__ */ diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index bf5a0d16fc7..24526313e89 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -210,6 +210,9 @@ reconfigure (xlator_t *this, dict_t *options) out); GF_OPTION_RECONF ("locking-scheme", priv->locking_scheme, options, str, out); + GF_OPTION_RECONF ("use-compound-fops", priv->use_compound_fops, + options, bool, + out); GF_OPTION_RECONF ("granular-entry-heal", priv->esh_granular, options, bool, out); @@ -479,6 +482,8 @@ init (xlator_t *this) GF_OPTION_INIT ("pre-op-compat", priv->pre_op_compat, bool, out); GF_OPTION_INIT ("locking-scheme", priv->locking_scheme, str, out); + GF_OPTION_INIT ("use-compound-fops", priv->use_compound_fops, + bool, out); GF_OPTION_INIT ("granular-entry-heal", priv->esh_granular, bool, out); GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out); @@ -1004,5 +1009,12 @@ struct volume_options options[] = { .description = "If this option is enabled, i/o will fail even if " "one of the bricks is down in the replicas", }, + { .key = {"use-compound-fops"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "no", + .description = "Use compound fops framework to modify afr " + "transaction such that network roundtrips are " + "reduced, thus improving the performance.", + }, { .key = {NULL} }, }; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index 983f07fcce9..ff136c0b093 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -45,6 +45,10 @@ typedef int (*afr_inode_refresh_cbk_t) (call_frame_t *frame, xlator_t *this, int typedef int (*afr_changelog_resume_t) (call_frame_t *frame, xlator_t *this); +typedef int (*afr_compound_cbk_t) (call_frame_t *frame, void *cookie, + xlator_t *this, int op_ret, int op_errno, + void *data, dict_t *xdata); + #define AFR_COUNT(array,max) ({int __i; int __res = 0; for (__i = 0; __i < max; __i++) if (array[__i]) __res++; __res;}) #define AFR_INTERSECT(dst,src1,src2,max) ({int __i; for (__i = 0; __i < max; __i++) dst[__i] = src1[__i] && src2[__i];}) #define AFR_CMP(a1,a2,len) ({int __cmp = 0; int __i; for (__i = 0; __i < len; __i++) if (a1[__i] != a2[__i]) { __cmp = 1; break;} __cmp;}) @@ -153,6 +157,7 @@ typedef struct _afr_private { char *locking_scheme; gf_boolean_t esh_granular; gf_boolean_t consistent_io; + gf_boolean_t use_compound_fops; } afr_private_t; @@ -786,6 +791,7 @@ typedef struct _afr_local { call_frame_t *heal_frame; gf_boolean_t need_full_crawl; + gf_boolean_t compound; } afr_local_t; @@ -1181,4 +1187,36 @@ afr_is_consistent_io_possible (afr_local_t *local, afr_private_t *priv, void afr_handle_inconsistent_fop (call_frame_t *frame, int32_t *op_ret, int32_t *op_errno); + +void +afr_inode_write_fill (call_frame_t *frame, xlator_t *this, int child_index, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata); +void +afr_process_post_writev (call_frame_t *frame, xlator_t *this); + +void +afr_writev_unwind (call_frame_t *frame, xlator_t *this); + +void +afr_writev_copy_outvars (call_frame_t *src_frame, call_frame_t *dst_frame); + +void +afr_update_uninodelk (afr_local_t *local, afr_internal_lock_t *int_lock, + int32_t child_index); +gf_boolean_t +afr_can_compound_pre_op_and_op (afr_private_t *priv, glusterfs_fop_t fop); + +afr_compound_cbk_t +afr_pack_fop_args (call_frame_t *frame, compound_args_t *args, + glusterfs_fop_t fop, int index); +int +afr_is_inodelk_transaction(afr_local_t *local); + +afr_fd_ctx_t * +__afr_fd_ctx_get (fd_t *fd, xlator_t *this); + +void +afr_compound_cleanup (compound_args_t *args, dict_t *xdata, + dict_t *newloc_xdata); #endif /* __AFR_H__ */ diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 5a00811ec6f..ce34ffd2b05 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -3029,6 +3029,13 @@ struct volopt_map_entry glusterd_volopt_map[] = { .op_version = GD_OP_VERSION_3_9_0, .flags = OPT_FLAG_CLIENT_OPT }, + { .key = "cluster.use-compound-fops", + .voltype = "cluster/replicate", + .value = "off", + .type = DOC, + .op_version = GD_OP_VERSION_3_8_4, + .flags = OPT_FLAG_CLIENT_OPT + }, { .key = NULL } }; |