summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr/src
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@fb.com>2017-08-31 12:33:59 -0700
committerJeff Darcy <jdarcy@fb.com>2017-08-31 12:33:59 -0700
commited23e379ee397b3fed479c15b7551d2dbba9a05f (patch)
treefe9bc23b851e0ee5502a48f1362b3ef9b10052f3 /xlators/cluster/afr/src
parentf2d57485d57e14a064c9ca6e83fe6c92131a8e37 (diff)
parentd174f021a4e0667e60ce6abc038106ad9b74dc74 (diff)
Merge remote-tracking branch 'origin/release-3.8' into release-3.8-fb
Change-Id: Ie35cd1c8c7808949ddf79b3189f1f8bf0ff70ed8
Diffstat (limited to 'xlators/cluster/afr/src')
-rw-r--r--xlators/cluster/afr/src/afr-common.c91
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c1
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c1
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c5
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c6
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c35
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h2
-rw-r--r--xlators/cluster/afr/src/afr.c9
10 files changed, 94 insertions, 60 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 6a9b20d4443..4c2343f8e9b 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1083,7 +1083,7 @@ refresh_done:
}
int
-afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
+afr_inode_refresh_done (call_frame_t *frame, xlator_t *this, int error)
{
afr_private_t *priv = NULL;
call_frame_t *heal_frame = NULL;
@@ -1094,6 +1094,11 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
int ret = 0;
int err = 0;
+ if (error != 0) {
+ err = error;
+ goto refresh_done;
+ }
+
local = frame->local;
priv = this->private;
@@ -1159,7 +1164,7 @@ afr_inode_refresh_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
afr_set_need_heal (this, local);
- afr_inode_refresh_done (frame, this);
+ afr_inode_refresh_done (frame, this, 0);
}
}
@@ -1250,20 +1255,21 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this)
if (local->fd) {
fd_ctx = afr_fd_ctx_get (local->fd, this);
if (!fd_ctx) {
- afr_inode_refresh_done (frame, this);
+ afr_inode_refresh_done (frame, this, EINVAL);
return 0;
}
}
xdata = dict_new ();
if (!xdata) {
- afr_inode_refresh_done (frame, this);
+ afr_inode_refresh_done (frame, this, ENOMEM);
return 0;
}
- if (afr_xattr_req_prepare (this, xdata) != 0) {
+ ret = afr_xattr_req_prepare (this, xdata);
+ if (ret != 0) {
dict_unref (xdata);
- afr_inode_refresh_done (frame, this);
+ afr_inode_refresh_done (frame, this, -ret);
return 0;
}
@@ -1296,7 +1302,10 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this)
call_count = local->call_count;
if (!call_count) {
dict_unref (xdata);
- afr_inode_refresh_done (frame, this);
+ if (local->fd && AFR_COUNT(local->child_up, priv->child_count))
+ afr_inode_refresh_done (frame, this, EBADFD);
+ else
+ afr_inode_refresh_done (frame, this, ENOTCONN);
return 0;
}
for (i = 0; i < priv->child_count; i++) {
@@ -3230,47 +3239,65 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *postbuf, dict_t *xdata)
{
afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
int call_count = -1;
int child_index = (long) cookie;
int read_subvol = 0;
call_stub_t *stub = NULL;
local = frame->local;
-
- read_subvol = afr_data_subvol_get (local->inode, this, NULL, NULL,
- NULL, NULL);
+ priv = this->private;
LOCK (&frame->lock);
{
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
if (op_ret == 0) {
- if (local->op_ret == -1) {
- local->op_ret = 0;
-
- local->cont.inode_wfop.prebuf = *prebuf;
- local->cont.inode_wfop.postbuf = *postbuf;
-
- if (xdata)
- local->xdata_rsp = dict_ref (xdata);
- }
-
- if (child_index == read_subvol) {
- local->cont.inode_wfop.prebuf = *prebuf;
- local->cont.inode_wfop.postbuf = *postbuf;
- if (xdata) {
- if (local->xdata_rsp)
- dict_unref (local->xdata_rsp);
- local->xdata_rsp = dict_ref (xdata);
- }
- }
- } else {
- local->op_errno = op_errno;
- }
+ if (prebuf)
+ local->replies[child_index].prestat = *prebuf;
+ if (postbuf)
+ local->replies[child_index].poststat = *postbuf;
+ if (xdata)
+ local->replies[child_index].xdata =
+ dict_ref (xdata);
+ }
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno (local, priv);
+ read_subvol = afr_data_subvol_get (local->inode, this, NULL,
+ local->readable, NULL, NULL);
+ /* Pick a reply that is valid and readable, with a preference
+ * given to read_subvol. */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret != 0)
+ continue;
+ if (!local->readable[i])
+ continue;
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+ local->cont.inode_wfop.prebuf =
+ local->replies[i].prestat;
+ local->cont.inode_wfop.postbuf =
+ local->replies[i].poststat;
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp =
+ dict_ref (local->replies[i].xdata);
+ }
+ if (i == read_subvol)
+ break;
+ }
+
/* Make a stub out of the frame, and register it
with the waking up post-op. When the call-stub resumes,
we are guaranteed that there was no post-op pending
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 8e483c382c4..9099b8c1eee 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -183,7 +183,6 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)
}
}
- afr_txn_arbitrate_fop_cbk (frame, this);
}
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index ddc257dbde4..8c312a89e53 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -131,7 +131,6 @@ __afr_inode_write_finalize (call_frame_t *frame, xlator_t *this)
}
}
- afr_txn_arbitrate_fop_cbk (frame, this);
afr_set_in_flight_sb_status (this, local, local->inode);
}
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index 2390764bccd..a7a2d2999bf 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -222,9 +222,8 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
local->readable, NULL);
if (read_subvol < 0 || read_subvol > priv->child_count) {
- gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN,
- "Unreadable subvolume %d found with event generation "
- "%d for gfid %s. (Possible split-brain)",
+ gf_msg_debug (this->name, 0, "Unreadable subvolume %d found "
+ "with event generation %d for gfid %s.",
read_subvol, event_generation, uuid_utoa(inode->gfid));
goto refresh;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 1df3ddde1cb..629f1c6a7da 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -247,7 +247,7 @@ afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
output_matrix[i][j] = 1;
if (type == AFR_ENTRY_TRANSACTION)
full_heal_mtx_out[i][j] = 1;
- } else {
+ } else if (locked_on[j]) {
output_matrix[i][j] = -input_matrix[i][j];
if (type == AFR_ENTRY_TRANSACTION)
full_heal_mtx_out[i][j] = -full_heal_mtx_in[i][j];
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 0b4d58dbabc..c1e945bfd82 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -562,7 +562,7 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this,
AFR_DATA_TRANSACTION,
locked_on, replies);
- return source;
+ goto out;
}
/* No split brain at this point. If we were called from
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index f3fa5d39506..4570ace7ef7 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -241,7 +241,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
undid_pending,
AFR_METADATA_TRANSACTION,
locked_on, replies);
- return source;
+ goto out;
}
/* If this is a directory mtime/ctime only split brain
@@ -255,7 +255,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
uuid_utoa (replies[source].poststat.ia_gfid));
sources[source] = 1;
healed_sinks[source] = 0;
- return source;
+ goto out;
}
if (!priv->metadata_splitbrain_forced_heal) {
@@ -314,6 +314,8 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
}
}
+out:
+ afr_mark_active_sinks (this, sources, locked_on, healed_sinks);
return source;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 8178fc0d18b..9b5063d8aa8 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -301,22 +301,21 @@ afr_compute_pre_op_sources (call_frame_t *frame, xlator_t *this)
}
}
-void
-afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_has_arbiter_fop_cbk_quorum (call_frame_t *frame)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ xlator_t *this = NULL;
gf_boolean_t fop_failed = _gf_false;
unsigned char *pre_op_sources = NULL;
int i = 0;
local = frame->local;
+ this = frame->this;
priv = this->private;
pre_op_sources = local->transaction.pre_op_sources;
- if (priv->arbiter_count != 1 || local->op_ret < 0)
- return;
-
/* If the fop failed on the brick, it is not a source. */
for (i = 0; i < priv->child_count; i++)
if (local->transaction.failed_subvols[i])
@@ -332,12 +331,10 @@ afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this)
break;
}
- if (fop_failed) {
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- }
+ if (fop_failed)
+ return _gf_false;
- return;
+ return _gf_true;
}
void
@@ -588,11 +585,17 @@ afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock)
int
afr_changelog_call_count (afr_transaction_type type,
unsigned char *pre_op_subvols,
+ unsigned char *failed_subvols,
unsigned int child_count)
{
+ int i = 0;
int call_count = 0;
- call_count = AFR_COUNT(pre_op_subvols, child_count);
+ for (i = 0; i < child_count; i++) {
+ if (pre_op_subvols[i] && !failed_subvols[i]) {
+ call_count++;
+ }
+ }
if (type == AFR_ENTRY_RENAME_TRANSACTION)
call_count *= 2;
@@ -779,8 +782,12 @@ afr_handle_quorum (call_frame_t *frame)
* no split-brain with the fix. The problem is eliminated completely.
*/
- if (afr_has_fop_cbk_quorum (frame))
+ if (priv->arbiter_count) {
+ if (afr_has_arbiter_fop_cbk_quorum (frame))
+ return;
+ } else if (afr_has_fop_cbk_quorum (frame)) {
return;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.pre_op[i])
@@ -1244,6 +1251,7 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
call_count = afr_changelog_call_count (local->transaction.type,
local->transaction.pre_op,
+ local->transaction.failed_subvols,
priv->child_count);
if (call_count == 0) {
@@ -1257,7 +1265,8 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
local->transaction.changelog_resume = changelog_resume;
for (i = 0; i < priv->child_count; i++) {
- if (!local->transaction.pre_op[i])
+ if (!local->transaction.pre_op[i] ||
+ local->transaction.failed_subvols[i])
continue;
switch (local->transaction.type) {
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index ca8fcfefa89..dcdadbc84f4 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -16,8 +16,6 @@
void
afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
int child_index);
-void
-afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this);
int
afr_lock_server_count (afr_private_t *priv, afr_transaction_type type);
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index fc5fda6844f..86f667116af 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -930,16 +930,17 @@ struct volume_options options[] = {
{ .key = {"eager-lock"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
- .description = "Lock phase of a transaction has two sub-phases. "
+ .description = "Enable/Disable eager lock for replica volume. "
+ "Lock phase of a transaction has two sub-phases. "
"First is an attempt to acquire locks in parallel by "
"broadcasting non-blocking lock requests. If lock "
"acquisition fails on any server, then the held locks "
- "are unlocked and revert to a blocking locked mode "
+ "are unlocked and we revert to a blocking locks mode "
"sequentially on one server after another. If this "
"option is enabled the initial broadcasting lock "
- "request attempt to acquire lock on the entire file. "
+ "request attempts to acquire a full lock on the entire file. "
"If this fails, we revert back to the sequential "
- "\"regional\" blocking lock as before. In the case "
+ "\"regional\" blocking locks as before. In the case "
"where such an \"eager\" lock is granted in the "
"non-blocking phase, it gives rise to an opportunity "
"for optimization. i.e, if the next write transaction "