diff options
| author | Jeff Darcy <jdarcy@fb.com> | 2017-08-31 12:33:59 -0700 |
|---|---|---|
| committer | Jeff Darcy <jdarcy@fb.com> | 2017-08-31 12:33:59 -0700 |
| commit | ed23e379ee397b3fed479c15b7551d2dbba9a05f (patch) | |
| tree | fe9bc23b851e0ee5502a48f1362b3ef9b10052f3 /xlators/cluster/afr/src | |
| parent | f2d57485d57e14a064c9ca6e83fe6c92131a8e37 (diff) | |
| parent | d174f021a4e0667e60ce6abc038106ad9b74dc74 (diff) | |
Merge remote-tracking branch 'origin/release-3.8' into release-3.8-fb
Change-Id: Ie35cd1c8c7808949ddf79b3189f1f8bf0ff70ed8
Diffstat (limited to 'xlators/cluster/afr/src')
| -rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 91 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-dir-write.c | 1 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-inode-write.c | 1 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-read-txn.c | 5 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-common.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-data.c | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-self-heal-metadata.c | 6 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 35 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr-transaction.h | 2 | ||||
| -rw-r--r-- | xlators/cluster/afr/src/afr.c | 9 |
10 files changed, 94 insertions, 60 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 6a9b20d4443..4c2343f8e9b 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -1083,7 +1083,7 @@ refresh_done: } int -afr_inode_refresh_done (call_frame_t *frame, xlator_t *this) +afr_inode_refresh_done (call_frame_t *frame, xlator_t *this, int error) { afr_private_t *priv = NULL; call_frame_t *heal_frame = NULL; @@ -1094,6 +1094,11 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this) int ret = 0; int err = 0; + if (error != 0) { + err = error; + goto refresh_done; + } + local = frame->local; priv = this->private; @@ -1159,7 +1164,7 @@ afr_inode_refresh_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this, call_count = afr_frame_return (frame); if (call_count == 0) { afr_set_need_heal (this, local); - afr_inode_refresh_done (frame, this); + afr_inode_refresh_done (frame, this, 0); } } @@ -1250,20 +1255,21 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this) if (local->fd) { fd_ctx = afr_fd_ctx_get (local->fd, this); if (!fd_ctx) { - afr_inode_refresh_done (frame, this); + afr_inode_refresh_done (frame, this, EINVAL); return 0; } } xdata = dict_new (); if (!xdata) { - afr_inode_refresh_done (frame, this); + afr_inode_refresh_done (frame, this, ENOMEM); return 0; } - if (afr_xattr_req_prepare (this, xdata) != 0) { + ret = afr_xattr_req_prepare (this, xdata); + if (ret != 0) { dict_unref (xdata); - afr_inode_refresh_done (frame, this); + afr_inode_refresh_done (frame, this, -ret); return 0; } @@ -1296,7 +1302,10 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this) call_count = local->call_count; if (!call_count) { dict_unref (xdata); - afr_inode_refresh_done (frame, this); + if (local->fd && AFR_COUNT(local->child_up, priv->child_count)) + afr_inode_refresh_done (frame, this, EBADFD); + else + afr_inode_refresh_done (frame, this, ENOTCONN); return 0; } for (i = 0; i < priv->child_count; i++) { @@ -3230,47 +3239,65 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, struct iatt *postbuf, dict_t *xdata) { afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int i = 0; int call_count = -1; int child_index = (long) cookie; int read_subvol = 0; call_stub_t *stub = NULL; local = frame->local; - - read_subvol = afr_data_subvol_get (local->inode, this, NULL, NULL, - NULL, NULL); + priv = this->private; LOCK (&frame->lock); { + local->replies[child_index].valid = 1; + local->replies[child_index].op_ret = op_ret; + local->replies[child_index].op_errno = op_errno; if (op_ret == 0) { - if (local->op_ret == -1) { - local->op_ret = 0; - - local->cont.inode_wfop.prebuf = *prebuf; - local->cont.inode_wfop.postbuf = *postbuf; - - if (xdata) - local->xdata_rsp = dict_ref (xdata); - } - - if (child_index == read_subvol) { - local->cont.inode_wfop.prebuf = *prebuf; - local->cont.inode_wfop.postbuf = *postbuf; - if (xdata) { - if (local->xdata_rsp) - dict_unref (local->xdata_rsp); - local->xdata_rsp = dict_ref (xdata); - } - } - } else { - local->op_errno = op_errno; - } + if (prebuf) + local->replies[child_index].prestat = *prebuf; + if (postbuf) + local->replies[child_index].poststat = *postbuf; + if (xdata) + local->replies[child_index].xdata = + dict_ref (xdata); + } } UNLOCK (&frame->lock); call_count = afr_frame_return (frame); if (call_count == 0) { + local->op_ret = -1; + local->op_errno = afr_final_errno (local, priv); + read_subvol = afr_data_subvol_get (local->inode, this, NULL, + local->readable, NULL, NULL); + /* Pick a reply that is valid and readable, with a preference + * given to read_subvol. */ + for (i = 0; i < priv->child_count; i++) { + if (!local->replies[i].valid) + continue; + if (local->replies[i].op_ret != 0) + continue; + if (!local->readable[i]) + continue; + local->op_ret = local->replies[i].op_ret; + local->op_errno = local->replies[i].op_errno; + local->cont.inode_wfop.prebuf = + local->replies[i].prestat; + local->cont.inode_wfop.postbuf = + local->replies[i].poststat; + if (local->replies[i].xdata) { + if (local->xdata_rsp) + dict_unref (local->xdata_rsp); + local->xdata_rsp = + dict_ref (local->replies[i].xdata); + } + if (i == read_subvol) + break; + } + /* Make a stub out of the frame, and register it with the waking up post-op. When the call-stub resumes, we are guaranteed that there was no post-op pending diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c index 8e483c382c4..9099b8c1eee 100644 --- a/xlators/cluster/afr/src/afr-dir-write.c +++ b/xlators/cluster/afr/src/afr-dir-write.c @@ -183,7 +183,6 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this) } } - afr_txn_arbitrate_fop_cbk (frame, this); } diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c index ddc257dbde4..8c312a89e53 100644 --- a/xlators/cluster/afr/src/afr-inode-write.c +++ b/xlators/cluster/afr/src/afr-inode-write.c @@ -131,7 +131,6 @@ __afr_inode_write_finalize (call_frame_t *frame, xlator_t *this) } } - afr_txn_arbitrate_fop_cbk (frame, this); afr_set_in_flight_sb_status (this, local, local->inode); } diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index 2390764bccd..a7a2d2999bf 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -222,9 +222,8 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode, local->readable, NULL); if (read_subvol < 0 || read_subvol > priv->child_count) { - gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN, - "Unreadable subvolume %d found with event generation " - "%d for gfid %s. (Possible split-brain)", + gf_msg_debug (this->name, 0, "Unreadable subvolume %d found " + "with event generation %d for gfid %s.", read_subvol, event_generation, uuid_utoa(inode->gfid)); goto refresh; } diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c index 1df3ddde1cb..629f1c6a7da 100644 --- a/xlators/cluster/afr/src/afr-self-heal-common.c +++ b/xlators/cluster/afr/src/afr-self-heal-common.c @@ -247,7 +247,7 @@ afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode, output_matrix[i][j] = 1; if (type == AFR_ENTRY_TRANSACTION) full_heal_mtx_out[i][j] = 1; - } else { + } else if (locked_on[j]) { output_matrix[i][j] = -input_matrix[i][j]; if (type == AFR_ENTRY_TRANSACTION) full_heal_mtx_out[i][j] = -full_heal_mtx_in[i][j]; diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c index 0b4d58dbabc..c1e945bfd82 100644 --- a/xlators/cluster/afr/src/afr-self-heal-data.c +++ b/xlators/cluster/afr/src/afr-self-heal-data.c @@ -562,7 +562,7 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this, AFR_DATA_TRANSACTION, locked_on, replies); - return source; + goto out; } /* No split brain at this point. If we were called from diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c index f3fa5d39506..4570ace7ef7 100644 --- a/xlators/cluster/afr/src/afr-self-heal-metadata.c +++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c @@ -241,7 +241,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, undid_pending, AFR_METADATA_TRANSACTION, locked_on, replies); - return source; + goto out; } /* If this is a directory mtime/ctime only split brain @@ -255,7 +255,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, uuid_utoa (replies[source].poststat.ia_gfid)); sources[source] = 1; healed_sinks[source] = 0; - return source; + goto out; } if (!priv->metadata_splitbrain_forced_heal) { @@ -314,6 +314,8 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this, } } +out: + afr_mark_active_sinks (this, sources, locked_on, healed_sinks); return source; } diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index 8178fc0d18b..9b5063d8aa8 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -301,22 +301,21 @@ afr_compute_pre_op_sources (call_frame_t *frame, xlator_t *this) } } -void -afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this) +gf_boolean_t +afr_has_arbiter_fop_cbk_quorum (call_frame_t *frame) { afr_local_t *local = NULL; afr_private_t *priv = NULL; + xlator_t *this = NULL; gf_boolean_t fop_failed = _gf_false; unsigned char *pre_op_sources = NULL; int i = 0; local = frame->local; + this = frame->this; priv = this->private; pre_op_sources = local->transaction.pre_op_sources; - if (priv->arbiter_count != 1 || local->op_ret < 0) - return; - /* If the fop failed on the brick, it is not a source. */ for (i = 0; i < priv->child_count; i++) if (local->transaction.failed_subvols[i]) @@ -332,12 +331,10 @@ afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this) break; } - if (fop_failed) { - local->op_ret = -1; - local->op_errno = ENOTCONN; - } + if (fop_failed) + return _gf_false; - return; + return _gf_true; } void @@ -588,11 +585,17 @@ afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock) int afr_changelog_call_count (afr_transaction_type type, unsigned char *pre_op_subvols, + unsigned char *failed_subvols, unsigned int child_count) { + int i = 0; int call_count = 0; - call_count = AFR_COUNT(pre_op_subvols, child_count); + for (i = 0; i < child_count; i++) { + if (pre_op_subvols[i] && !failed_subvols[i]) { + call_count++; + } + } if (type == AFR_ENTRY_RENAME_TRANSACTION) call_count *= 2; @@ -779,8 +782,12 @@ afr_handle_quorum (call_frame_t *frame) * no split-brain with the fix. The problem is eliminated completely. */ - if (afr_has_fop_cbk_quorum (frame)) + if (priv->arbiter_count) { + if (afr_has_arbiter_fop_cbk_quorum (frame)) + return; + } else if (afr_has_fop_cbk_quorum (frame)) { return; + } for (i = 0; i < priv->child_count; i++) { if (local->transaction.pre_op[i]) @@ -1244,6 +1251,7 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, call_count = afr_changelog_call_count (local->transaction.type, local->transaction.pre_op, + local->transaction.failed_subvols, priv->child_count); if (call_count == 0) { @@ -1257,7 +1265,8 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr, local->transaction.changelog_resume = changelog_resume; for (i = 0; i < priv->child_count; i++) { - if (!local->transaction.pre_op[i]) + if (!local->transaction.pre_op[i] || + local->transaction.failed_subvols[i]) continue; switch (local->transaction.type) { diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h index ca8fcfefa89..dcdadbc84f4 100644 --- a/xlators/cluster/afr/src/afr-transaction.h +++ b/xlators/cluster/afr/src/afr-transaction.h @@ -16,8 +16,6 @@ void afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index); -void -afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this); int afr_lock_server_count (afr_private_t *priv, afr_transaction_type type); diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c index fc5fda6844f..86f667116af 100644 --- a/xlators/cluster/afr/src/afr.c +++ b/xlators/cluster/afr/src/afr.c @@ -930,16 +930,17 @@ struct volume_options options[] = { { .key = {"eager-lock"}, .type = GF_OPTION_TYPE_BOOL, .default_value = "on", - .description = "Lock phase of a transaction has two sub-phases. " + .description = "Enable/Disable eager lock for replica volume. " + "Lock phase of a transaction has two sub-phases. " "First is an attempt to acquire locks in parallel by " "broadcasting non-blocking lock requests. If lock " "acquisition fails on any server, then the held locks " - "are unlocked and revert to a blocking locked mode " + "are unlocked and we revert to a blocking locks mode " "sequentially on one server after another. If this " "option is enabled the initial broadcasting lock " - "request attempt to acquire lock on the entire file. " + "request attempts to acquire a full lock on the entire file. " "If this fails, we revert back to the sequential " - "\"regional\" blocking lock as before. In the case " + "\"regional\" blocking locks as before. In the case " "where such an \"eager\" lock is granted in the " "non-blocking phase, it gives rise to an opportunity " "for optimization. i.e, if the next write transaction " |
