diff options
| author | Jeff Darcy <jdarcy@fb.com> | 2017-08-31 12:33:59 -0700 |
|---|---|---|
| committer | Jeff Darcy <jdarcy@fb.com> | 2017-08-31 12:33:59 -0700 |
| commit | ed23e379ee397b3fed479c15b7551d2dbba9a05f (patch) | |
| tree | fe9bc23b851e0ee5502a48f1362b3ef9b10052f3 /xlators/cluster | |
| parent | f2d57485d57e14a064c9ca6e83fe6c92131a8e37 (diff) | |
| parent | d174f021a4e0667e60ce6abc038106ad9b74dc74 (diff) | |
Merge remote-tracking branch 'origin/release-3.8' into release-3.8-fb
Change-Id: Ie35cd1c8c7808949ddf79b3189f1f8bf0ff70ed8
Diffstat (limited to 'xlators/cluster')
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 91 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-write.c | 1 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 1 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-read-txn.c | 5 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 6 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 35 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.h | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 9 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 81 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-inode-read.c | 37 | ||||
| -rw-r--r-- | xlators/cluster/dht/src/dht-inode-write.c | 32 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-heal.c | 12 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec-inode-read.c | 23 | ||||
| -rw-r--r-- | xlators/cluster/ec/src/ec.c | 14 |
16 files changed, 200 insertions, 153 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 6a9b20d4443..4c2343f8e9b 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1083,7 +1083,7 @@ refresh_done: } int -afr_inode_refresh_done (call_frame_t *frame, xlator_t *this) +afr_inode_refresh_done (call_frame_t *frame, xlator_t *this, int error) { afr_private_t *priv = NULL; call_frame_t *heal_frame = NULL; @@ -1094,6 +1094,11 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this) int ret = 0; int err = 0; + if (error != 0) { + err = error; + goto refresh_done; + } + local = frame->local; priv = this->private; @@ -1159,7 +1164,7 @@ afr_inode_refresh_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_count = afr_frame_return (frame); if (call_count == 0) { afr_set_need_heal (this, local); - afr_inode_refresh_done (frame, this); + afr_inode_refresh_done (frame, this, 0); } } @@ -1250,20 +1255,21 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this) if (local->fd) { fd_ctx = afr_fd_ctx_get (local->fd, this); if (!fd_ctx) { - afr_inode_refresh_done (frame, this); + afr_inode_refresh_done (frame, this, EINVAL); return 0; } } xdata = dict_new (); if (!xdata) { - afr_inode_refresh_done (frame, this); + afr_inode_refresh_done (frame, this, ENOMEM); return 0; } - if (afr_xattr_req_prepare (this, xdata) != 0) { + ret = afr_xattr_req_prepare (this, xdata); + if (ret != 0) { dict_unref (xdata); - afr_inode_refresh_done (frame, this); + afr_inode_refresh_done (frame, this, -ret); return 0; } @@ -1296,7 +1302,10 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this) call_count = local->call_count; if (!call_count) { dict_unref (xdata); - afr_inode_refresh_done (frame, this); + if (local->fd && AFR_COUNT(local->child_up, priv->child_count)) + afr_inode_refresh_done (frame, this, EBADFD); + else + afr_inode_refresh_done (frame, this, ENOTCONN); return 0; } for (i = 0; i < priv->child_count; i++) { @@ -3230,47 +3239,65 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, struct iatt *postbuf, dict_t *xdata) { afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; int call_count = -1; int child_index = (long) cookie; int read_subvol = 0; call_stub_t *stub = NULL; local = frame->local; - - read_subvol = afr_data_subvol_get (local->inode, this, NULL, NULL, - NULL, NULL); + priv = this->private; LOCK (&frame->lock); { + local->replies[child_index].valid = 1; + local->replies[child_index].op_ret = op_ret; + local->replies[child_index].op_errno = op_errno; if (op_ret == 0) { - if (local->op_ret == -1) { - local->op_ret = 0; - - local->cont.inode_wfop.prebuf = *prebuf; - local->cont.inode_wfop.postbuf = *postbuf; - - if (xdata) - local->xdata_rsp = dict_ref (xdata); - } - - if (child_index == read_subvol) { - local->cont.inode_wfop.prebuf = *prebuf; - local->cont.inode_wfop.postbuf = *postbuf; - if (xdata) { - if (local->xdata_rsp) - dict_unref (local->xdata_rsp); - local->xdata_rsp = dict_ref (xdata); - } - } - } else { - local->op_errno = op_errno; - } + if (prebuf) + local->replies[child_index].prestat = *prebuf; + if (postbuf) + local->replies[child_index].poststat = *postbuf; + if (xdata) + local->replies[child_index].xdata = + dict_ref (xdata); + } } UNLOCK (&frame->lock); call_count = afr_frame_return (frame); if (call_count == 0) { + local->op_ret = -1; + local->op_errno = afr_final_errno (local, priv); + read_subvol = afr_data_subvol_get (local->inode, this, NULL, + local->readable, NULL, NULL); + /* Pick a reply that is valid and readable, with a preference + * given to read_subvol. */ + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + if (local->replies[i].op_ret != 0) + continue; + if (!local->readable[i]) + continue; + local->op_ret = local->replies[i].op_ret; + local->op_errno = local->replies[i].op_errno; + local->cont.inode_wfop.prebuf = + local->replies[i].prestat; + local->cont.inode_wfop.postbuf = + local->replies[i].poststat; + if (local->replies[i].xdata) { + if (local->xdata_rsp) + dict_unref (local->xdata_rsp); + local->xdata_rsp = + dict_ref (local->replies[i].xdata); + } + if (i == read_subvol) + break; + } + /* Make a stub out of the frame, and register it with the waking up post-op. When the call-stub resumes, we are guaranteed that there was no post-op pending diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 8e483c382c4..9099b8c1eee 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -183,7 +183,6 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this) } } - afr_txn_arbitrate_fop_cbk (frame, this); } diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index ddc257dbde4..8c312a89e53 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -131,7 +131,6 @@ __afr_inode_write_finalize (call_frame_t *frame, xlator_t *this) } } - afr_txn_arbitrate_fop_cbk (frame, this); afr_set_in_flight_sb_status (this, local, local->inode); } diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 2390764bccd..a7a2d2999bf 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -222,9 +222,8 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode, local->readable, NULL); if (read_subvol < 0 || read_subvol > priv->child_count) { - gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN, - "Unreadable subvolume %d found with event generation " - "%d for gfid %s. (Possible split-brain)", + gf_msg_debug (this->name, 0, "Unreadable subvolume %d found " + "with event generation %d for gfid %s.", read_subvol, event_generation, uuid_utoa(inode->gfid)); goto refresh; } diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 1df3ddde1cb..629f1c6a7da 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -247,7 +247,7 @@ afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode, output_matrix[i][j] = 1; if (type == AFR_ENTRY_TRANSACTION) full_heal_mtx_out[i][j] = 1; - } else { + } else if (locked_on[j]) { output_matrix[i][j] = -input_matrix[i][j]; if (type == AFR_ENTRY_TRANSACTION) full_heal_mtx_out[i][j] = -full_heal_mtx_in[i][j]; diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 0b4d58dbabc..c1e945bfd82 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -562,7 +562,7 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this, AFR_DATA_TRANSACTION, locked_on, replies); - return source; + goto out; } /* No split brain at this point. If we were called from diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index f3fa5d39506..4570ace7ef7 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -241,7 +241,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, undid_pending, AFR_METADATA_TRANSACTION, locked_on, replies); - return source; + goto out; } /* If this is a directory mtime/ctime only split brain @@ -255,7 +255,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, uuid_utoa (replies[source].poststat.ia_gfid)); sources[source] = 1; healed_sinks[source] = 0; - return source; + goto out; } if (!priv->metadata_splitbrain_forced_heal) { @@ -314,6 +314,8 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, } } +out: + afr_mark_active_sinks (this, sources, locked_on, healed_sinks); return source; } diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 8178fc0d18b..9b5063d8aa8 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -301,22 +301,21 @@ afr_compute_pre_op_sources (call_frame_t *frame, xlator_t *this) } } -void -afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this) +gf_boolean_t +afr_has_arbiter_fop_cbk_quorum (call_frame_t *frame) { afr_local_t *local = NULL; afr_private_t *priv = NULL; + xlator_t *this = NULL; gf_boolean_t fop_failed = _gf_false; unsigned char *pre_op_sources = NULL; int i = 0; local = frame->local; + this = frame->this; priv = this->private; pre_op_sources = local->transaction.pre_op_sources; - if (priv->arbiter_count != 1 || local->op_ret < 0) - return; - /* If the fop failed on the brick, it is not a source. */ for (i = 0; i < priv->child_count; i++) if (local->transaction.failed_subvols[i]) @@ -332,12 +331,10 @@ afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this) break; } - if (fop_failed) { - local->op_ret = -1; - local->op_errno = ENOTCONN; - } + if (fop_failed) + return _gf_false; - return; + return _gf_true; } void @@ -588,11 +585,17 @@ afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock) int afr_changelog_call_count (afr_transaction_type type, unsigned char *pre_op_subvols, + unsigned char *failed_subvols, unsigned int child_count) { + int i = 0; int call_count = 0; - call_count = AFR_COUNT(pre_op_subvols, child_count); + for (i = 0; i < child_count; i++) { + if (pre_op_subvols[i] && !failed_subvols[i]) { + call_count++; + } + } if (type == AFR_ENTRY_RENAME_TRANSACTION) call_count *= 2; @@ -779,8 +782,12 @@ afr_handle_quorum (call_frame_t *frame) * no split-brain with the fix. The problem is eliminated completely. */ - if (afr_has_fop_cbk_quorum (frame)) + if (priv->arbiter_count) { + if (afr_has_arbiter_fop_cbk_quorum (frame)) + return; + } else if (afr_has_fop_cbk_quorum (frame)) { return; + } for (i = 0; i < priv->child_count; i++) { if (local->transaction.pre_op[i]) @@ -1244,6 +1251,7 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, call_count = afr_changelog_call_count (local->transaction.type, local->transaction.pre_op, + local->transaction.failed_subvols, priv->child_count); if (call_count == 0) { @@ -1257,7 +1265,8 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, local->transaction.changelog_resume = changelog_resume; for (i = 0; i < priv->child_count; i++) { - if (!local->transaction.pre_op[i]) + if (!local->transaction.pre_op[i] || + local->transaction.failed_subvols[i]) continue; switch (local->transaction.type) { diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index ca8fcfefa89..dcdadbc84f4 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -16,8 +16,6 @@ void afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index); -void -afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this); int afr_lock_server_count (afr_private_t *priv, afr_transaction_type type); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index fc5fda6844f..86f667116af 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -930,16 +930,17 @@ struct volume_options options[] = { { .key = {"eager-lock"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "on", - .description = "Lock phase of a transaction has two sub-phases. " + .description = "Enable/Disable eager lock for replica volume. " + "Lock phase of a transaction has two sub-phases. " "First is an attempt to acquire locks in parallel by " "broadcasting non-blocking lock requests. If lock " "acquisition fails on any server, then the held locks " - "are unlocked and revert to a blocking locked mode " + "are unlocked and we revert to a blocking locks mode " "sequentially on one server after another. If this " "option is enabled the initial broadcasting lock " - "request attempt to acquire lock on the entire file. " + "request attempts to acquire a full lock on the entire file. " "If this fails, we revert back to the sequential " - "\"regional\" blocking lock as before. In the case " + "\"regional\" blocking locks as before. In the case " "where such an \"eager\" lock is granted in the " "non-blocking phase, it gives rise to an opportunity " "for optimization. i.e, if the next write transaction " diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index c120dffbf23..cd35080e243 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -809,8 +809,6 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (!op_ret && gf_uuid_is_null (local->gfid)) memcpy (local->gfid, stbuf->ia_gfid, 16); - memcpy (local->loc.gfid, local->gfid, 16); - /* Check if the gfid is different for file from other node */ if (!op_ret && gf_uuid_compare (local->gfid, stbuf->ia_gfid)) { @@ -879,6 +877,8 @@ unlock: this_call_cnt = dht_frame_return (frame); if (is_last_call (this_call_cnt)) { + gf_uuid_copy (local->loc.gfid, local->gfid); + if (local->need_selfheal) { local->need_selfheal = 0; dht_lookup_everywhere (frame, this, &local->loc); @@ -919,7 +919,6 @@ unlock: selfheal: FRAME_SU_DO (frame, dht_local_t); - gf_uuid_copy (local->loc.gfid, local->gfid); ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk, &local->loc, layout); out: @@ -3826,6 +3825,7 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this, goto err; } + local->xattr_req = xdata ? dict_ref (xdata) : dict_new (); local->call_cnt = call_cnt = layout->cnt; if (IA_ISDIR (fd->inode->ia_type)) { @@ -3833,7 +3833,7 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this, STACK_WIND (frame, dht_err_cbk, layout->list[i].xlator, layout->list[i].xlator->fops->fsetxattr, - fd, xattr, flags, NULL); + fd, xattr, flags, xdata); } } else { @@ -3842,10 +3842,8 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this, local->rebalance.xattr = dict_ref (xattr); local->rebalance.flags = flags; - xdata = xdata ? dict_ref (xdata) : dict_new (); - if (xdata) - ret = dict_set_dynstr_with_alloc (xdata, - DHT_IATT_IN_XDATA_KEY, "yes"); + ret = dict_set_dynstr_with_alloc (local->xattr_req, + DHT_IATT_IN_XDATA_KEY, "yes"); if (ret) { gf_msg_debug (this->name, 0, "Failed to set dictionary key %s for fd=%p", @@ -3853,11 +3851,8 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this, } STACK_WIND (frame, dht_file_setxattr_cbk, subvol, - subvol->fops->fsetxattr, fd, xattr, flags, xdata); - - if (xdata) - dict_unref (xdata); - + subvol->fops->fsetxattr, fd, xattr, flags, + local->xattr_req); } return 0; @@ -3953,12 +3948,12 @@ dht_setxattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) STACK_WIND (frame, dht_file_setxattr_cbk, subvol, subvol->fops->setxattr, &local->loc, local->rebalance.xattr, local->rebalance.flags, - NULL); + local->xattr_req); } else { STACK_WIND (frame, dht_file_setxattr_cbk, subvol, subvol->fops->fsetxattr, local->fd, local->rebalance.xattr, local->rebalance.flags, - NULL); + local->xattr_req); } return 0; @@ -4242,6 +4237,7 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, if (tmp) { return dht_nuke_dir (frame, this, loc, tmp); } + local->xattr_req = xdata ? dict_ref (xdata) : dict_new (); if (IA_ISDIR (loc->inode->ia_type)) { @@ -4258,17 +4254,12 @@ dht_setxattr (call_frame_t *frame, xlator_t *this, local->rebalance.flags = flags; local->call_cnt = 1; - xdata = xdata ? dict_ref (xdata) : dict_new (); - if (xdata) - ret = dict_set_dynstr_with_alloc (xdata, - DHT_IATT_IN_XDATA_KEY, "yes"); + ret = dict_set_dynstr_with_alloc (local->xattr_req, + DHT_IATT_IN_XDATA_KEY, "yes"); STACK_WIND (frame, dht_file_setxattr_cbk, subvol, subvol->fops->setxattr, - loc, xattr, flags, xdata); - - if (xdata) - dict_unref (xdata); + loc, xattr, flags, local->xattr_req); } return 0; @@ -4387,11 +4378,11 @@ dht_removexattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, if (local->fop == GF_FOP_REMOVEXATTR) { STACK_WIND (frame, dht_file_removexattr_cbk, subvol, subvol->fops->removexattr, &local->loc, - local->key, NULL); + local->key, local->xattr_req); } else { STACK_WIND (frame, dht_file_removexattr_cbk, subvol, subvol->fops->fremovexattr, local->fd, - local->key, NULL); + local->key, local->xattr_req); } return 0; @@ -4428,8 +4419,6 @@ dht_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, unlock: UNLOCK (&frame->lock); - - this_call_cnt = dht_frame_return (frame); if (is_last_call (this_call_cnt)) { DHT_STACK_UNWIND (removexattr, frame, local->op_ret, @@ -4485,6 +4474,7 @@ dht_removexattr (call_frame_t *frame, xlator_t *this, op_errno = EINVAL; goto err; } + local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new (); local->call_cnt = call_cnt = layout->cnt; local->key = gf_strdup (key); @@ -4494,16 +4484,15 @@ dht_removexattr (call_frame_t *frame, xlator_t *this, STACK_WIND (frame, dht_removexattr_cbk, layout->list[i].xlator, layout->list[i].xlator->fops->removexattr, - loc, key, NULL); + loc, key, local->xattr_req); } } else { local->call_cnt = 1; - xdata = xdata ? dict_ref (xdata) : dict_new (); - if (xdata) - ret = dict_set_dynstr_with_alloc (xdata, - DHT_IATT_IN_XDATA_KEY, "yes"); + + ret = dict_set_dynstr_with_alloc (local->xattr_req, + DHT_IATT_IN_XDATA_KEY, "yes"); if (ret) { gf_msg (this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DICT_SET_FAILED, "Failed to " @@ -4513,10 +4502,7 @@ dht_removexattr (call_frame_t *frame, xlator_t *this, STACK_WIND (frame, dht_file_removexattr_cbk, subvol, subvol->fops->removexattr, - loc, key, xdata); - - if (xdata) - dict_unref (xdata); + loc, key, local->xattr_req); } return 0; @@ -4574,6 +4560,7 @@ dht_fremovexattr (call_frame_t *frame, xlator_t *this, op_errno = EINVAL; goto err; } + local->xattr_req = xdata ? dict_ref (xdata) : dict_new (); local->call_cnt = call_cnt = layout->cnt; local->key = gf_strdup (key); @@ -4583,29 +4570,23 @@ dht_fremovexattr (call_frame_t *frame, xlator_t *this, STACK_WIND (frame, dht_removexattr_cbk, layout->list[i].xlator, layout->list[i].xlator->fops->fremovexattr, - fd, key, NULL); + fd, key, local->xattr_req); } } else { local->call_cnt = 1; - xdata = xdata ? dict_ref (xdata) : dict_new (); - if (xdata) - ret = dict_set_dynstr_with_alloc (xdata, - DHT_IATT_IN_XDATA_KEY, "yes"); + ret = dict_set_dynstr_with_alloc (local->xattr_req, + DHT_IATT_IN_XDATA_KEY, "yes"); if (ret) { gf_msg (this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DICT_SET_FAILED, "Failed to " "set dictionary key %s for fd=%p", DHT_IATT_IN_XDATA_KEY, fd); } - STACK_WIND (frame, dht_file_removexattr_cbk, subvol, subvol->fops->fremovexattr, - fd, key, xdata); - - if (xdata) - dict_unref (xdata); + fd, key, local->xattr_req); } return 0; @@ -6527,7 +6508,7 @@ dht_link2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) local->call_cnt = 2; STACK_WIND (frame, dht_link_cbk, subvol, subvol->fops->link, - &local->loc, &local->loc2, NULL); + &local->loc, &local->loc2, local->xattr_req); return 0; err: @@ -6554,7 +6535,7 @@ dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this, srcvol = local->linkfile.srcvol; STACK_WIND (frame, dht_link_cbk, srcvol, srcvol->fops->link, - &local->loc, &local->loc2, xdata); + &local->loc, &local->loc2, local->xattr_req); return 0; @@ -6563,7 +6544,7 @@ err: dht_set_fixed_dir_stat (preparent); dht_set_fixed_dir_stat (postparent); DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent, - postparent, NULL); + postparent, xdata); return 0; } @@ -6614,6 +6595,8 @@ dht_link (call_frame_t *frame, xlator_t *this, op_errno = ENOMEM; goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); if (hashed_subvol != cached_subvol) { gf_uuid_copy (local->gfid, oldloc->inode->gfid); diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c index ac0f0e186fa..05f71fbcc86 100644 --- a/xlators/cluster/dht/src/dht-inode-read.c +++ b/xlators/cluster/dht/src/dht-inode-read.c @@ -76,7 +76,7 @@ dht_open2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) if (we_are_not_migrating (ret)) { /* This DHT layer is not migrating the file */ DHT_STACK_UNWIND (open, frame, -1, local->op_errno, - NULL, NULL); + NULL, local->rebalance.xdata); return 0; } @@ -88,7 +88,7 @@ dht_open2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open, &local->loc, local->rebalance.flags, local->fd, - NULL); + local->xattr_req); return 0; out: @@ -132,6 +132,8 @@ dht_open (call_frame_t *frame, xlator_t *this, op_errno = ENOSPC; goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); local->rebalance.flags = flags; local->call_cnt = 1; @@ -249,10 +251,10 @@ dht_attr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) if (local->fop == GF_FOP_FSTAT) { STACK_WIND (frame, dht_file_attr_cbk, subvol, - subvol->fops->fstat, local->fd, NULL); + subvol->fops->fstat, local->fd, local->xattr_req); } else { STACK_WIND (frame, dht_file_attr_cbk, subvol, - subvol->fops->stat, &local->loc, NULL); + subvol->fops->stat, &local->loc, local->xattr_req); } return 0; @@ -335,6 +337,8 @@ dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) op_errno = EINVAL; goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); if (IA_ISREG (loc->inode->ia_type)) { local->call_cnt = 1; @@ -395,6 +399,8 @@ dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) op_errno = EINVAL; goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); if (IA_ISREG (fd->inode->ia_type)) { local->call_cnt = 1; @@ -516,7 +522,7 @@ dht_readv2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv, local->fd, local->rebalance.size, local->rebalance.offset, - local->rebalance.flags, NULL); + local->rebalance.flags, local->xattr_req); return 0; @@ -551,6 +557,8 @@ dht_readv (call_frame_t *frame, xlator_t *this, op_errno = EINVAL; goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); local->rebalance.offset = off; local->rebalance.size = size; @@ -645,7 +653,7 @@ dht_access2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) local->call_cnt = 2; STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access, - &local->loc, local->rebalance.flags, NULL); + &local->loc, local->rebalance.flags, local->xattr_req); return 0; @@ -684,6 +692,8 @@ dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, op_errno = EINVAL; goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access, loc, mask, xdata); @@ -718,9 +728,6 @@ dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->op_ret = op_ret; local->op_errno = op_errno; - if (xdata) - local->rebalance.xdata = dict_ref (xdata); - /* If context is set, then send flush() it to the destination */ dht_inode_ctx_get_mig_info (this, local->fd->inode, NULL, &subvol); if (subvol && dht_fd_open_on_dst (this, local->fd, subvol)) { @@ -761,7 +768,7 @@ dht_flush2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) STACK_WIND (frame, dht_flush_cbk, subvol, subvol->fops->flush, local->fd, - local->rebalance.xdata); + local->xattr_req); return 0; @@ -795,6 +802,8 @@ dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) op_errno = EINVAL; goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); local->call_cnt = 1; @@ -917,7 +926,7 @@ dht_fsync2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) local->call_cnt = 2; /* This is the second attempt */ STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync, - local->fd, local->rebalance.flags, NULL); + local->fd, local->rebalance.flags, local->xattr_req); return 0; @@ -944,6 +953,8 @@ dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync, goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); local->call_cnt = 1; local->rebalance.flags = datasync; @@ -1034,7 +1045,7 @@ dht_lk2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, local->fd, local->rebalance.lock_cmd, &local->rebalance.flock, - local->rebalance.xdata); + local->xattr_req); return 0; @@ -1069,6 +1080,8 @@ dht_lk (call_frame_t *frame, xlator_t *this, op_errno = EINVAL; goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); local->rebalance.flock = *flock; local->rebalance.lock_cmd = cmd; diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index 7420461da76..48d49dd3475 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -143,7 +143,7 @@ dht_writev2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) subvol, subvol->fops->writev, local->fd, local->rebalance.vector, local->rebalance.count, local->rebalance.offset, local->rebalance.flags, - local->rebalance.iobref, NULL); + local->rebalance.iobref, local->xattr_req); return 0; @@ -192,6 +192,8 @@ dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, op_errno = ENOSPC; goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); local->rebalance.vector = iov_dup (vector, count); local->rebalance.offset = off; @@ -334,11 +336,11 @@ dht_truncate2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) if (local->fop == GF_FOP_TRUNCATE) { STACK_WIND (frame, dht_truncate_cbk, subvol, subvol->fops->truncate, &local->loc, - local->rebalance.offset, NULL); + local->rebalance.offset, local->xattr_req); } else { STACK_WIND (frame, dht_truncate_cbk, subvol, subvol->fops->ftruncate, local->fd, - local->rebalance.offset, NULL); + local->rebalance.offset, local->xattr_req); } return 0; @@ -377,6 +379,8 @@ dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, op_errno = EINVAL; goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); STACK_WIND (frame, dht_truncate_cbk, subvol, subvol->fops->truncate, @@ -418,6 +422,8 @@ dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, op_errno = EINVAL; goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); STACK_WIND (frame, dht_truncate_cbk, subvol, subvol->fops->ftruncate, @@ -544,7 +550,7 @@ dht_fallocate2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) STACK_WIND(frame, dht_fallocate_cbk, subvol, subvol->fops->fallocate, local->fd, local->rebalance.flags, local->rebalance.offset, - local->rebalance.size, NULL); + local->rebalance.size, local->xattr_req); return 0; @@ -583,6 +589,8 @@ dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode, op_errno = EINVAL; goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); STACK_WIND (frame, dht_fallocate_cbk, subvol, subvol->fops->fallocate, @@ -709,7 +717,7 @@ dht_discard2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) STACK_WIND(frame, dht_discard_cbk, subvol, subvol->fops->discard, local->fd, local->rebalance.offset, local->rebalance.size, - NULL); + local->xattr_req); return 0; @@ -747,6 +755,8 @@ dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, op_errno = EINVAL; goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); STACK_WIND (frame, dht_discard_cbk, subvol, subvol->fops->discard, fd, offset, len, xdata); @@ -872,7 +882,7 @@ dht_zerofill2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) STACK_WIND(frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill, local->fd, local->rebalance.offset, local->rebalance.size, - NULL); + local->xattr_req); return 0; @@ -911,6 +921,8 @@ dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, op_errno = EINVAL; goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); STACK_WIND (frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill, fd, offset, len, xdata); @@ -1015,12 +1027,12 @@ dht_setattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) STACK_WIND (frame, dht_file_setattr_cbk, subvol, subvol->fops->setattr, &local->loc, &local->rebalance.stbuf, local->rebalance.flags, - NULL); + local->xattr_req); } else { STACK_WIND (frame, dht_file_setattr_cbk, subvol, subvol->fops->fsetattr, local->fd, &local->rebalance.stbuf, local->rebalance.flags, - NULL); + local->xattr_req); } return 0; @@ -1113,6 +1125,8 @@ dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, op_errno = EINVAL; goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); if (IA_ISREG (loc->inode->ia_type)) { /* in the regular file _cbk(), we need to check for @@ -1184,6 +1198,8 @@ dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf, op_errno = EINVAL; goto err; } + if (xdata) + local->xattr_req = dict_ref (xdata); if (IA_ISREG (fd->inode->ia_type)) { /* in the regular file _cbk(), we need to check for diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c index da08f6c9a75..2373e6f93d5 100644 --- a/xlators/cluster/ec/src/ec-heal.c +++ b/xlators/cluster/ec/src/ec-heal.c @@ -410,6 +410,8 @@ ec_adjust_versions (call_frame_t *frame, ec_t *ec, ec_txn_t type, if (EC_COUNT (sources, ec->nodes) + EC_COUNT (healed_sinks, ec->nodes) == ec->nodes) erase_dirty = _gf_true; + else + op_ret = -ENOTCONN; for (i = 0; i < ec->nodes; i++) { if (!sources[i] && !healed_sinks[i]) @@ -509,7 +511,7 @@ ec_heal_metadata_find_direction (ec_t *ec, default_args_cbk_t *replies, if (!are_dicts_equal(replies[i].xdata, replies[j].xdata, ec_sh_key_match, NULL)) continue; - groups[j] = i; /*If iatts match put them into a group*/ + groups[j] = i; same_count++; } @@ -541,7 +543,6 @@ out: return ret; } - int __ec_heal_metadata_prepare (call_frame_t *frame, ec_t *ec, inode_t *inode, unsigned char *locked_on, default_args_cbk_t *replies, @@ -679,15 +680,12 @@ __ec_heal_metadata (call_frame_t *frame, ec_t *ec, inode_t *inode, goto out; } - if (EC_COUNT (sources, ec->nodes) == ec->nodes) { + if ((EC_COUNT (sources, ec->nodes) == ec->nodes) || + (EC_COUNT (healed_sinks, ec->nodes) == 0)) { ret = 0; goto erase_dirty; } - if (EC_COUNT (healed_sinks, ec->nodes) == 0) { - ret = -ENOTCONN; - goto out; - } source_buf = replies[source].stat; ret = cluster_setattr (ec->xl_list, healed_sinks, ec->nodes, sreplies, output, frame, ec->xl, &loc, diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c index c3d9c879eb7..fc121bcceb2 100644 --- a/xlators/cluster/ec/src/ec-inode-read.c +++ b/xlators/cluster/ec/src/ec-inode-read.c @@ -1577,23 +1577,16 @@ int32_t ec_manager_seek(ec_fop_data_t *fop, int32_t state) return EC_STATE_PREPARE_ANSWER; case EC_STATE_PREPARE_ANSWER: - cbk = fop->answer; - if (cbk != NULL) { - if (ec_dispatch_one_retry(fop, &cbk)) { - return EC_STATE_DISPATCH; - } - if (cbk->op_ret >= 0) { - ec_t *ec = fop->xl->private; + if (ec_dispatch_one_retry(fop, &cbk)) { + return EC_STATE_DISPATCH; + } + if ((cbk != NULL) && (cbk->op_ret >= 0)) { + ec_t *ec = fop->xl->private; - cbk->offset *= ec->fragments; - if (cbk->offset < fop->user_size) { - cbk->offset = fop->user_size; - } - } else { - ec_fop_set_error(fop, cbk->op_errno); + cbk->offset *= ec->fragments; + if (cbk->offset < fop->user_size) { + cbk->offset = fop->user_size; } - } else { - ec_fop_set_error(fop, EIO); } return EC_STATE_REPORT; diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c index b5e6bc08216..0a3a3cce391 100644 --- a/xlators/cluster/ec/src/ec.c +++ b/xlators/cluster/ec/src/ec.c @@ -1328,8 +1328,18 @@ struct volume_options options[] = { .key = {"eager-lock"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "on", - .description = "This option will enable/diable eager lock for" - "disperse volume " + .description = "Enable/Disable eager lock for disperse volume. " + "If a fop takes a lock and completes its operation, " + "it waits for next 1 second before releasing the lock, " + "to see if the lock can be reused for next fop from " + "the same client. If ec finds any lock contention within " + "1 second it releases the lock immediately before time " + "expires. This improves the performance of file operations." + "However, as it takes lock on first brick, for few operations " + "like read, discovery of lock contention might take long time " + "and can actually degrade the performance. " + "If eager lock is disabled, lock will be released as soon as fop " + "completes. " }, { .key = {"background-heals"}, .type = GF_OPTION_TYPE_INT, |
