summaryrefslogtreecommitdiffstats
path: root/xlators/cluster
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@fb.com>2017-08-31 12:33:59 -0700
committerJeff Darcy <jdarcy@fb.com>2017-08-31 12:33:59 -0700
commited23e379ee397b3fed479c15b7551d2dbba9a05f (patch)
treefe9bc23b851e0ee5502a48f1362b3ef9b10052f3 /xlators/cluster
parentf2d57485d57e14a064c9ca6e83fe6c92131a8e37 (diff)
parentd174f021a4e0667e60ce6abc038106ad9b74dc74 (diff)
Merge remote-tracking branch 'origin/release-3.8' into release-3.8-fb
Change-Id: Ie35cd1c8c7808949ddf79b3189f1f8bf0ff70ed8
Diffstat (limited to 'xlators/cluster')
-rw-r--r--xlators/cluster/afr/src/afr-common.c91
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c1
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c1
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c5
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c2
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c6
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c35
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h2
-rw-r--r--xlators/cluster/afr/src/afr.c9
-rw-r--r--xlators/cluster/dht/src/dht-common.c81
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c37
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c32
-rw-r--r--xlators/cluster/ec/src/ec-heal.c12
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c23
-rw-r--r--xlators/cluster/ec/src/ec.c14
16 files changed, 200 insertions, 153 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 6a9b20d4443..4c2343f8e9b 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1083,7 +1083,7 @@ refresh_done:
}
int
-afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
+afr_inode_refresh_done (call_frame_t *frame, xlator_t *this, int error)
{
afr_private_t *priv = NULL;
call_frame_t *heal_frame = NULL;
@@ -1094,6 +1094,11 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this)
int ret = 0;
int err = 0;
+ if (error != 0) {
+ err = error;
+ goto refresh_done;
+ }
+
local = frame->local;
priv = this->private;
@@ -1159,7 +1164,7 @@ afr_inode_refresh_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
afr_set_need_heal (this, local);
- afr_inode_refresh_done (frame, this);
+ afr_inode_refresh_done (frame, this, 0);
}
}
@@ -1250,20 +1255,21 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this)
if (local->fd) {
fd_ctx = afr_fd_ctx_get (local->fd, this);
if (!fd_ctx) {
- afr_inode_refresh_done (frame, this);
+ afr_inode_refresh_done (frame, this, EINVAL);
return 0;
}
}
xdata = dict_new ();
if (!xdata) {
- afr_inode_refresh_done (frame, this);
+ afr_inode_refresh_done (frame, this, ENOMEM);
return 0;
}
- if (afr_xattr_req_prepare (this, xdata) != 0) {
+ ret = afr_xattr_req_prepare (this, xdata);
+ if (ret != 0) {
dict_unref (xdata);
- afr_inode_refresh_done (frame, this);
+ afr_inode_refresh_done (frame, this, -ret);
return 0;
}
@@ -1296,7 +1302,10 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this)
call_count = local->call_count;
if (!call_count) {
dict_unref (xdata);
- afr_inode_refresh_done (frame, this);
+ if (local->fd && AFR_COUNT(local->child_up, priv->child_count))
+ afr_inode_refresh_done (frame, this, EBADFD);
+ else
+ afr_inode_refresh_done (frame, this, ENOTCONN);
return 0;
}
for (i = 0; i < priv->child_count; i++) {
@@ -3230,47 +3239,65 @@ afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
struct iatt *postbuf, dict_t *xdata)
{
afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
int call_count = -1;
int child_index = (long) cookie;
int read_subvol = 0;
call_stub_t *stub = NULL;
local = frame->local;
-
- read_subvol = afr_data_subvol_get (local->inode, this, NULL, NULL,
- NULL, NULL);
+ priv = this->private;
LOCK (&frame->lock);
{
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
if (op_ret == 0) {
- if (local->op_ret == -1) {
- local->op_ret = 0;
-
- local->cont.inode_wfop.prebuf = *prebuf;
- local->cont.inode_wfop.postbuf = *postbuf;
-
- if (xdata)
- local->xdata_rsp = dict_ref (xdata);
- }
-
- if (child_index == read_subvol) {
- local->cont.inode_wfop.prebuf = *prebuf;
- local->cont.inode_wfop.postbuf = *postbuf;
- if (xdata) {
- if (local->xdata_rsp)
- dict_unref (local->xdata_rsp);
- local->xdata_rsp = dict_ref (xdata);
- }
- }
- } else {
- local->op_errno = op_errno;
- }
+ if (prebuf)
+ local->replies[child_index].prestat = *prebuf;
+ if (postbuf)
+ local->replies[child_index].poststat = *postbuf;
+ if (xdata)
+ local->replies[child_index].xdata =
+ dict_ref (xdata);
+ }
}
UNLOCK (&frame->lock);
call_count = afr_frame_return (frame);
if (call_count == 0) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno (local, priv);
+ read_subvol = afr_data_subvol_get (local->inode, this, NULL,
+ local->readable, NULL, NULL);
+ /* Pick a reply that is valid and readable, with a preference
+ * given to read_subvol. */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret != 0)
+ continue;
+ if (!local->readable[i])
+ continue;
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+ local->cont.inode_wfop.prebuf =
+ local->replies[i].prestat;
+ local->cont.inode_wfop.postbuf =
+ local->replies[i].poststat;
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref (local->xdata_rsp);
+ local->xdata_rsp =
+ dict_ref (local->replies[i].xdata);
+ }
+ if (i == read_subvol)
+ break;
+ }
+
/* Make a stub out of the frame, and register it
with the waking up post-op. When the call-stub resumes,
we are guaranteed that there was no post-op pending
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 8e483c382c4..9099b8c1eee 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -183,7 +183,6 @@ __afr_dir_write_finalize (call_frame_t *frame, xlator_t *this)
}
}
- afr_txn_arbitrate_fop_cbk (frame, this);
}
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index ddc257dbde4..8c312a89e53 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -131,7 +131,6 @@ __afr_inode_write_finalize (call_frame_t *frame, xlator_t *this)
}
}
- afr_txn_arbitrate_fop_cbk (frame, this);
afr_set_in_flight_sb_status (this, local, local->inode);
}
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index 2390764bccd..a7a2d2999bf 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -222,9 +222,8 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
local->readable, NULL);
if (read_subvol < 0 || read_subvol > priv->child_count) {
- gf_msg (this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN,
- "Unreadable subvolume %d found with event generation "
- "%d for gfid %s. (Possible split-brain)",
+ gf_msg_debug (this->name, 0, "Unreadable subvolume %d found "
+ "with event generation %d for gfid %s.",
read_subvol, event_generation, uuid_utoa(inode->gfid));
goto refresh;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index 1df3ddde1cb..629f1c6a7da 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -247,7 +247,7 @@ afr_selfheal_undo_pending (call_frame_t *frame, xlator_t *this, inode_t *inode,
output_matrix[i][j] = 1;
if (type == AFR_ENTRY_TRANSACTION)
full_heal_mtx_out[i][j] = 1;
- } else {
+ } else if (locked_on[j]) {
output_matrix[i][j] = -input_matrix[i][j];
if (type == AFR_ENTRY_TRANSACTION)
full_heal_mtx_out[i][j] = -full_heal_mtx_in[i][j];
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 0b4d58dbabc..c1e945bfd82 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -562,7 +562,7 @@ __afr_selfheal_data_finalize_source (call_frame_t *frame, xlator_t *this,
AFR_DATA_TRANSACTION,
locked_on, replies);
- return source;
+ goto out;
}
/* No split brain at this point. If we were called from
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index f3fa5d39506..4570ace7ef7 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -241,7 +241,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
undid_pending,
AFR_METADATA_TRANSACTION,
locked_on, replies);
- return source;
+ goto out;
}
/* If this is a directory mtime/ctime only split brain
@@ -255,7 +255,7 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
uuid_utoa (replies[source].poststat.ia_gfid));
sources[source] = 1;
healed_sinks[source] = 0;
- return source;
+ goto out;
}
if (!priv->metadata_splitbrain_forced_heal) {
@@ -314,6 +314,8 @@ __afr_selfheal_metadata_finalize_source (call_frame_t *frame, xlator_t *this,
}
}
+out:
+ afr_mark_active_sinks (this, sources, locked_on, healed_sinks);
return source;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 8178fc0d18b..9b5063d8aa8 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -301,22 +301,21 @@ afr_compute_pre_op_sources (call_frame_t *frame, xlator_t *this)
}
}
-void
-afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_has_arbiter_fop_cbk_quorum (call_frame_t *frame)
{
afr_local_t *local = NULL;
afr_private_t *priv = NULL;
+ xlator_t *this = NULL;
gf_boolean_t fop_failed = _gf_false;
unsigned char *pre_op_sources = NULL;
int i = 0;
local = frame->local;
+ this = frame->this;
priv = this->private;
pre_op_sources = local->transaction.pre_op_sources;
- if (priv->arbiter_count != 1 || local->op_ret < 0)
- return;
-
/* If the fop failed on the brick, it is not a source. */
for (i = 0; i < priv->child_count; i++)
if (local->transaction.failed_subvols[i])
@@ -332,12 +331,10 @@ afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this)
break;
}
- if (fop_failed) {
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- }
+ if (fop_failed)
+ return _gf_false;
- return;
+ return _gf_true;
}
void
@@ -588,11 +585,17 @@ afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock)
int
afr_changelog_call_count (afr_transaction_type type,
unsigned char *pre_op_subvols,
+ unsigned char *failed_subvols,
unsigned int child_count)
{
+ int i = 0;
int call_count = 0;
- call_count = AFR_COUNT(pre_op_subvols, child_count);
+ for (i = 0; i < child_count; i++) {
+ if (pre_op_subvols[i] && !failed_subvols[i]) {
+ call_count++;
+ }
+ }
if (type == AFR_ENTRY_RENAME_TRANSACTION)
call_count *= 2;
@@ -779,8 +782,12 @@ afr_handle_quorum (call_frame_t *frame)
* no split-brain with the fix. The problem is eliminated completely.
*/
- if (afr_has_fop_cbk_quorum (frame))
+ if (priv->arbiter_count) {
+ if (afr_has_arbiter_fop_cbk_quorum (frame))
+ return;
+ } else if (afr_has_fop_cbk_quorum (frame)) {
return;
+ }
for (i = 0; i < priv->child_count; i++) {
if (local->transaction.pre_op[i])
@@ -1244,6 +1251,7 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
call_count = afr_changelog_call_count (local->transaction.type,
local->transaction.pre_op,
+ local->transaction.failed_subvols,
priv->child_count);
if (call_count == 0) {
@@ -1257,7 +1265,8 @@ afr_changelog_do (call_frame_t *frame, xlator_t *this, dict_t *xattr,
local->transaction.changelog_resume = changelog_resume;
for (i = 0; i < priv->child_count; i++) {
- if (!local->transaction.pre_op[i])
+ if (!local->transaction.pre_op[i] ||
+ local->transaction.failed_subvols[i])
continue;
switch (local->transaction.type) {
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index ca8fcfefa89..dcdadbc84f4 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -16,8 +16,6 @@
void
afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
int child_index);
-void
-afr_txn_arbitrate_fop_cbk (call_frame_t *frame, xlator_t *this);
int
afr_lock_server_count (afr_private_t *priv, afr_transaction_type type);
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index fc5fda6844f..86f667116af 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -930,16 +930,17 @@ struct volume_options options[] = {
{ .key = {"eager-lock"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
- .description = "Lock phase of a transaction has two sub-phases. "
+ .description = "Enable/Disable eager lock for replica volume. "
+ "Lock phase of a transaction has two sub-phases. "
"First is an attempt to acquire locks in parallel by "
"broadcasting non-blocking lock requests. If lock "
"acquisition fails on any server, then the held locks "
- "are unlocked and revert to a blocking locked mode "
+ "are unlocked and we revert to a blocking locks mode "
"sequentially on one server after another. If this "
"option is enabled the initial broadcasting lock "
- "request attempt to acquire lock on the entire file. "
+ "request attempts to acquire a full lock on the entire file. "
"If this fails, we revert back to the sequential "
- "\"regional\" blocking lock as before. In the case "
+ "\"regional\" blocking locks as before. In the case "
"where such an \"eager\" lock is granted in the "
"non-blocking phase, it gives rise to an opportunity "
"for optimization. i.e, if the next write transaction "
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index c120dffbf23..cd35080e243 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -809,8 +809,6 @@ dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
if (!op_ret && gf_uuid_is_null (local->gfid))
memcpy (local->gfid, stbuf->ia_gfid, 16);
- memcpy (local->loc.gfid, local->gfid, 16);
-
/* Check if the gfid is different for file from other node */
if (!op_ret && gf_uuid_compare (local->gfid, stbuf->ia_gfid)) {
@@ -879,6 +877,8 @@ unlock:
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
+ gf_uuid_copy (local->loc.gfid, local->gfid);
+
if (local->need_selfheal) {
local->need_selfheal = 0;
dht_lookup_everywhere (frame, this, &local->loc);
@@ -919,7 +919,6 @@ unlock:
selfheal:
FRAME_SU_DO (frame, dht_local_t);
- gf_uuid_copy (local->loc.gfid, local->gfid);
ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk,
&local->loc, layout);
out:
@@ -3826,6 +3825,7 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this,
goto err;
}
+ local->xattr_req = xdata ? dict_ref (xdata) : dict_new ();
local->call_cnt = call_cnt = layout->cnt;
if (IA_ISDIR (fd->inode->ia_type)) {
@@ -3833,7 +3833,7 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_err_cbk,
layout->list[i].xlator,
layout->list[i].xlator->fops->fsetxattr,
- fd, xattr, flags, NULL);
+ fd, xattr, flags, xdata);
}
} else {
@@ -3842,10 +3842,8 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this,
local->rebalance.xattr = dict_ref (xattr);
local->rebalance.flags = flags;
- xdata = xdata ? dict_ref (xdata) : dict_new ();
- if (xdata)
- ret = dict_set_dynstr_with_alloc (xdata,
- DHT_IATT_IN_XDATA_KEY, "yes");
+ ret = dict_set_dynstr_with_alloc (local->xattr_req,
+ DHT_IATT_IN_XDATA_KEY, "yes");
if (ret) {
gf_msg_debug (this->name, 0,
"Failed to set dictionary key %s for fd=%p",
@@ -3853,11 +3851,8 @@ dht_fsetxattr (call_frame_t *frame, xlator_t *this,
}
STACK_WIND (frame, dht_file_setxattr_cbk, subvol,
- subvol->fops->fsetxattr, fd, xattr, flags, xdata);
-
- if (xdata)
- dict_unref (xdata);
-
+ subvol->fops->fsetxattr, fd, xattr, flags,
+ local->xattr_req);
}
return 0;
@@ -3953,12 +3948,12 @@ dht_setxattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
STACK_WIND (frame, dht_file_setxattr_cbk, subvol,
subvol->fops->setxattr, &local->loc,
local->rebalance.xattr, local->rebalance.flags,
- NULL);
+ local->xattr_req);
} else {
STACK_WIND (frame, dht_file_setxattr_cbk, subvol,
subvol->fops->fsetxattr, local->fd,
local->rebalance.xattr, local->rebalance.flags,
- NULL);
+ local->xattr_req);
}
return 0;
@@ -4242,6 +4237,7 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
if (tmp) {
return dht_nuke_dir (frame, this, loc, tmp);
}
+ local->xattr_req = xdata ? dict_ref (xdata) : dict_new ();
if (IA_ISDIR (loc->inode->ia_type)) {
@@ -4258,17 +4254,12 @@ dht_setxattr (call_frame_t *frame, xlator_t *this,
local->rebalance.flags = flags;
local->call_cnt = 1;
- xdata = xdata ? dict_ref (xdata) : dict_new ();
- if (xdata)
- ret = dict_set_dynstr_with_alloc (xdata,
- DHT_IATT_IN_XDATA_KEY, "yes");
+ ret = dict_set_dynstr_with_alloc (local->xattr_req,
+ DHT_IATT_IN_XDATA_KEY, "yes");
STACK_WIND (frame, dht_file_setxattr_cbk,
subvol, subvol->fops->setxattr,
- loc, xattr, flags, xdata);
-
- if (xdata)
- dict_unref (xdata);
+ loc, xattr, flags, local->xattr_req);
}
return 0;
@@ -4387,11 +4378,11 @@ dht_removexattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame,
if (local->fop == GF_FOP_REMOVEXATTR) {
STACK_WIND (frame, dht_file_removexattr_cbk, subvol,
subvol->fops->removexattr, &local->loc,
- local->key, NULL);
+ local->key, local->xattr_req);
} else {
STACK_WIND (frame, dht_file_removexattr_cbk, subvol,
subvol->fops->fremovexattr, local->fd,
- local->key, NULL);
+ local->key, local->xattr_req);
}
return 0;
@@ -4428,8 +4419,6 @@ dht_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
unlock:
UNLOCK (&frame->lock);
-
-
this_call_cnt = dht_frame_return (frame);
if (is_last_call (this_call_cnt)) {
DHT_STACK_UNWIND (removexattr, frame, local->op_ret,
@@ -4485,6 +4474,7 @@ dht_removexattr (call_frame_t *frame, xlator_t *this,
op_errno = EINVAL;
goto err;
}
+ local->xattr_req = (xdata) ? dict_ref (xdata) : dict_new ();
local->call_cnt = call_cnt = layout->cnt;
local->key = gf_strdup (key);
@@ -4494,16 +4484,15 @@ dht_removexattr (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_removexattr_cbk,
layout->list[i].xlator,
layout->list[i].xlator->fops->removexattr,
- loc, key, NULL);
+ loc, key, local->xattr_req);
}
} else {
local->call_cnt = 1;
- xdata = xdata ? dict_ref (xdata) : dict_new ();
- if (xdata)
- ret = dict_set_dynstr_with_alloc (xdata,
- DHT_IATT_IN_XDATA_KEY, "yes");
+
+ ret = dict_set_dynstr_with_alloc (local->xattr_req,
+ DHT_IATT_IN_XDATA_KEY, "yes");
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
DHT_MSG_DICT_SET_FAILED, "Failed to "
@@ -4513,10 +4502,7 @@ dht_removexattr (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_file_removexattr_cbk,
subvol, subvol->fops->removexattr,
- loc, key, xdata);
-
- if (xdata)
- dict_unref (xdata);
+ loc, key, local->xattr_req);
}
return 0;
@@ -4574,6 +4560,7 @@ dht_fremovexattr (call_frame_t *frame, xlator_t *this,
op_errno = EINVAL;
goto err;
}
+ local->xattr_req = xdata ? dict_ref (xdata) : dict_new ();
local->call_cnt = call_cnt = layout->cnt;
local->key = gf_strdup (key);
@@ -4583,29 +4570,23 @@ dht_fremovexattr (call_frame_t *frame, xlator_t *this,
STACK_WIND (frame, dht_removexattr_cbk,
layout->list[i].xlator,
layout->list[i].xlator->fops->fremovexattr,
- fd, key, NULL);
+ fd, key, local->xattr_req);
}
} else {
local->call_cnt = 1;
- xdata = xdata ? dict_ref (xdata) : dict_new ();
- if (xdata)
- ret = dict_set_dynstr_with_alloc (xdata,
- DHT_IATT_IN_XDATA_KEY, "yes");
+ ret = dict_set_dynstr_with_alloc (local->xattr_req,
+ DHT_IATT_IN_XDATA_KEY, "yes");
if (ret) {
gf_msg (this->name, GF_LOG_ERROR, ENOMEM,
DHT_MSG_DICT_SET_FAILED, "Failed to "
"set dictionary key %s for fd=%p",
DHT_IATT_IN_XDATA_KEY, fd);
}
-
STACK_WIND (frame, dht_file_removexattr_cbk,
subvol, subvol->fops->fremovexattr,
- fd, key, xdata);
-
- if (xdata)
- dict_unref (xdata);
+ fd, key, local->xattr_req);
}
return 0;
@@ -6527,7 +6508,7 @@ dht_link2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
local->call_cnt = 2;
STACK_WIND (frame, dht_link_cbk, subvol, subvol->fops->link,
- &local->loc, &local->loc2, NULL);
+ &local->loc, &local->loc2, local->xattr_req);
return 0;
err:
@@ -6554,7 +6535,7 @@ dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
srcvol = local->linkfile.srcvol;
STACK_WIND (frame, dht_link_cbk, srcvol, srcvol->fops->link,
- &local->loc, &local->loc2, xdata);
+ &local->loc, &local->loc2, local->xattr_req);
return 0;
@@ -6563,7 +6544,7 @@ err:
dht_set_fixed_dir_stat (preparent);
dht_set_fixed_dir_stat (postparent);
DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent, NULL);
+ postparent, xdata);
return 0;
}
@@ -6614,6 +6595,8 @@ dht_link (call_frame_t *frame, xlator_t *this,
op_errno = ENOMEM;
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
if (hashed_subvol != cached_subvol) {
gf_uuid_copy (local->gfid, oldloc->inode->gfid);
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index ac0f0e186fa..05f71fbcc86 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -76,7 +76,7 @@ dht_open2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
if (we_are_not_migrating (ret)) {
/* This DHT layer is not migrating the file */
DHT_STACK_UNWIND (open, frame, -1, local->op_errno,
- NULL, NULL);
+ NULL, local->rebalance.xdata);
return 0;
}
@@ -88,7 +88,7 @@ dht_open2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
&local->loc, local->rebalance.flags, local->fd,
- NULL);
+ local->xattr_req);
return 0;
out:
@@ -132,6 +132,8 @@ dht_open (call_frame_t *frame, xlator_t *this,
op_errno = ENOSPC;
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
local->rebalance.flags = flags;
local->call_cnt = 1;
@@ -249,10 +251,10 @@ dht_attr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
if (local->fop == GF_FOP_FSTAT) {
STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->fstat, local->fd, NULL);
+ subvol->fops->fstat, local->fd, local->xattr_req);
} else {
STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->stat, &local->loc, NULL);
+ subvol->fops->stat, &local->loc, local->xattr_req);
}
return 0;
@@ -335,6 +337,8 @@ dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
op_errno = EINVAL;
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
if (IA_ISREG (loc->inode->ia_type)) {
local->call_cnt = 1;
@@ -395,6 +399,8 @@ dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
op_errno = EINVAL;
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
if (IA_ISREG (fd->inode->ia_type)) {
local->call_cnt = 1;
@@ -516,7 +522,7 @@ dht_readv2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv,
local->fd, local->rebalance.size, local->rebalance.offset,
- local->rebalance.flags, NULL);
+ local->rebalance.flags, local->xattr_req);
return 0;
@@ -551,6 +557,8 @@ dht_readv (call_frame_t *frame, xlator_t *this,
op_errno = EINVAL;
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
local->rebalance.offset = off;
local->rebalance.size = size;
@@ -645,7 +653,7 @@ dht_access2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
local->call_cnt = 2;
STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
- &local->loc, local->rebalance.flags, NULL);
+ &local->loc, local->rebalance.flags, local->xattr_req);
return 0;
@@ -684,6 +692,8 @@ dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
op_errno = EINVAL;
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
loc, mask, xdata);
@@ -718,9 +728,6 @@ dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
local->op_ret = op_ret;
local->op_errno = op_errno;
- if (xdata)
- local->rebalance.xdata = dict_ref (xdata);
-
/* If context is set, then send flush() it to the destination */
dht_inode_ctx_get_mig_info (this, local->fd->inode, NULL, &subvol);
if (subvol && dht_fd_open_on_dst (this, local->fd, subvol)) {
@@ -761,7 +768,7 @@ dht_flush2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
STACK_WIND (frame, dht_flush_cbk,
subvol, subvol->fops->flush, local->fd,
- local->rebalance.xdata);
+ local->xattr_req);
return 0;
@@ -795,6 +802,8 @@ dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
op_errno = EINVAL;
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
local->call_cnt = 1;
@@ -917,7 +926,7 @@ dht_fsync2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
local->call_cnt = 2; /* This is the second attempt */
STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
- local->fd, local->rebalance.flags, NULL);
+ local->fd, local->rebalance.flags, local->xattr_req);
return 0;
@@ -944,6 +953,8 @@ dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
local->call_cnt = 1;
local->rebalance.flags = datasync;
@@ -1034,7 +1045,7 @@ dht_lk2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, local->fd,
local->rebalance.lock_cmd, &local->rebalance.flock,
- local->rebalance.xdata);
+ local->xattr_req);
return 0;
@@ -1069,6 +1080,8 @@ dht_lk (call_frame_t *frame, xlator_t *this,
op_errno = EINVAL;
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
local->rebalance.flock = *flock;
local->rebalance.lock_cmd = cmd;
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
index 7420461da76..48d49dd3475 100644
--- a/xlators/cluster/dht/src/dht-inode-write.c
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -143,7 +143,7 @@ dht_writev2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
subvol, subvol->fops->writev,
local->fd, local->rebalance.vector, local->rebalance.count,
local->rebalance.offset, local->rebalance.flags,
- local->rebalance.iobref, NULL);
+ local->rebalance.iobref, local->xattr_req);
return 0;
@@ -192,6 +192,8 @@ dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
op_errno = ENOSPC;
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
local->rebalance.vector = iov_dup (vector, count);
local->rebalance.offset = off;
@@ -334,11 +336,11 @@ dht_truncate2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
if (local->fop == GF_FOP_TRUNCATE) {
STACK_WIND (frame, dht_truncate_cbk, subvol,
subvol->fops->truncate, &local->loc,
- local->rebalance.offset, NULL);
+ local->rebalance.offset, local->xattr_req);
} else {
STACK_WIND (frame, dht_truncate_cbk, subvol,
subvol->fops->ftruncate, local->fd,
- local->rebalance.offset, NULL);
+ local->rebalance.offset, local->xattr_req);
}
return 0;
@@ -377,6 +379,8 @@ dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
op_errno = EINVAL;
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
STACK_WIND (frame, dht_truncate_cbk,
subvol, subvol->fops->truncate,
@@ -418,6 +422,8 @@ dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
op_errno = EINVAL;
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
STACK_WIND (frame, dht_truncate_cbk,
subvol, subvol->fops->ftruncate,
@@ -544,7 +550,7 @@ dht_fallocate2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
STACK_WIND(frame, dht_fallocate_cbk, subvol, subvol->fops->fallocate,
local->fd, local->rebalance.flags, local->rebalance.offset,
- local->rebalance.size, NULL);
+ local->rebalance.size, local->xattr_req);
return 0;
@@ -583,6 +589,8 @@ dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
op_errno = EINVAL;
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
STACK_WIND (frame, dht_fallocate_cbk,
subvol, subvol->fops->fallocate,
@@ -709,7 +717,7 @@ dht_discard2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
STACK_WIND(frame, dht_discard_cbk, subvol, subvol->fops->discard,
local->fd, local->rebalance.offset, local->rebalance.size,
- NULL);
+ local->xattr_req);
return 0;
@@ -747,6 +755,8 @@ dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
op_errno = EINVAL;
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
STACK_WIND (frame, dht_discard_cbk, subvol, subvol->fops->discard,
fd, offset, len, xdata);
@@ -872,7 +882,7 @@ dht_zerofill2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
STACK_WIND(frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill,
local->fd, local->rebalance.offset, local->rebalance.size,
- NULL);
+ local->xattr_req);
return 0;
@@ -911,6 +921,8 @@ dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
op_errno = EINVAL;
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
STACK_WIND (frame, dht_zerofill_cbk, subvol, subvol->fops->zerofill,
fd, offset, len, xdata);
@@ -1015,12 +1027,12 @@ dht_setattr2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
STACK_WIND (frame, dht_file_setattr_cbk, subvol,
subvol->fops->setattr, &local->loc,
&local->rebalance.stbuf, local->rebalance.flags,
- NULL);
+ local->xattr_req);
} else {
STACK_WIND (frame, dht_file_setattr_cbk, subvol,
subvol->fops->fsetattr, local->fd,
&local->rebalance.stbuf, local->rebalance.flags,
- NULL);
+ local->xattr_req);
}
return 0;
@@ -1113,6 +1125,8 @@ dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
op_errno = EINVAL;
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
if (IA_ISREG (loc->inode->ia_type)) {
/* in the regular file _cbk(), we need to check for
@@ -1184,6 +1198,8 @@ dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
op_errno = EINVAL;
goto err;
}
+ if (xdata)
+ local->xattr_req = dict_ref (xdata);
if (IA_ISREG (fd->inode->ia_type)) {
/* in the regular file _cbk(), we need to check for
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
index da08f6c9a75..2373e6f93d5 100644
--- a/xlators/cluster/ec/src/ec-heal.c
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -410,6 +410,8 @@ ec_adjust_versions (call_frame_t *frame, ec_t *ec, ec_txn_t type,
if (EC_COUNT (sources, ec->nodes) +
EC_COUNT (healed_sinks, ec->nodes) == ec->nodes)
erase_dirty = _gf_true;
+ else
+ op_ret = -ENOTCONN;
for (i = 0; i < ec->nodes; i++) {
if (!sources[i] && !healed_sinks[i])
@@ -509,7 +511,7 @@ ec_heal_metadata_find_direction (ec_t *ec, default_args_cbk_t *replies,
if (!are_dicts_equal(replies[i].xdata, replies[j].xdata,
ec_sh_key_match, NULL))
continue;
- groups[j] = i; /*If iatts match put them into a group*/
+ groups[j] = i;
same_count++;
}
@@ -541,7 +543,6 @@ out:
return ret;
}
-
int
__ec_heal_metadata_prepare (call_frame_t *frame, ec_t *ec, inode_t *inode,
unsigned char *locked_on, default_args_cbk_t *replies,
@@ -679,15 +680,12 @@ __ec_heal_metadata (call_frame_t *frame, ec_t *ec, inode_t *inode,
goto out;
}
- if (EC_COUNT (sources, ec->nodes) == ec->nodes) {
+ if ((EC_COUNT (sources, ec->nodes) == ec->nodes) ||
+ (EC_COUNT (healed_sinks, ec->nodes) == 0)) {
ret = 0;
goto erase_dirty;
}
- if (EC_COUNT (healed_sinks, ec->nodes) == 0) {
- ret = -ENOTCONN;
- goto out;
- }
source_buf = replies[source].stat;
ret = cluster_setattr (ec->xl_list, healed_sinks, ec->nodes, sreplies,
output, frame, ec->xl, &loc,
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
index c3d9c879eb7..fc121bcceb2 100644
--- a/xlators/cluster/ec/src/ec-inode-read.c
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -1577,23 +1577,16 @@ int32_t ec_manager_seek(ec_fop_data_t *fop, int32_t state)
return EC_STATE_PREPARE_ANSWER;
case EC_STATE_PREPARE_ANSWER:
- cbk = fop->answer;
- if (cbk != NULL) {
- if (ec_dispatch_one_retry(fop, &cbk)) {
- return EC_STATE_DISPATCH;
- }
- if (cbk->op_ret >= 0) {
- ec_t *ec = fop->xl->private;
+ if (ec_dispatch_one_retry(fop, &cbk)) {
+ return EC_STATE_DISPATCH;
+ }
+ if ((cbk != NULL) && (cbk->op_ret >= 0)) {
+ ec_t *ec = fop->xl->private;
- cbk->offset *= ec->fragments;
- if (cbk->offset < fop->user_size) {
- cbk->offset = fop->user_size;
- }
- } else {
- ec_fop_set_error(fop, cbk->op_errno);
+ cbk->offset *= ec->fragments;
+ if (cbk->offset < fop->user_size) {
+ cbk->offset = fop->user_size;
}
- } else {
- ec_fop_set_error(fop, EIO);
}
return EC_STATE_REPORT;
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index b5e6bc08216..0a3a3cce391 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -1328,8 +1328,18 @@ struct volume_options options[] =
{ .key = {"eager-lock"},
.type = GF_OPTION_TYPE_BOOL,
.default_value = "on",
- .description = "This option will enable/diable eager lock for"
- "disperse volume "
+ .description = "Enable/Disable eager lock for disperse volume. "
+ "If a fop takes a lock and completes its operation, "
+ "it waits for next 1 second before releasing the lock, "
+ "to see if the lock can be reused for next fop from "
+ "the same client. If ec finds any lock contention within "
+ "1 second it releases the lock immediately before time "
+ "expires. This improves the performance of file operations."
+ "However, as it takes lock on first brick, for few operations "
+ "like read, discovery of lock contention might take long time "
+ "and can actually degrade the performance. "
+ "If eager lock is disabled, lock will be released as soon as fop "
+ "completes. "
},
{ .key = {"background-heals"},
.type = GF_OPTION_TYPE_INT,