summaryrefslogtreecommitdiffstats
path: root/xlators/cluster/afr
diff options
context:
space:
mode:
authorRavishankar N <ravishankar@redhat.com>2018-05-17 08:23:33 +0530
committerShyamsundar Ranganathan <srangana@redhat.com>2018-07-02 17:24:47 +0000
commitdf511f7738e179d0f9b5761e635ace872832e9b2 (patch)
treefbf9200a4cf45e57ed8e73f1a853fc7a9b07271b /xlators/cluster/afr
parent695470765d4d047102fc9d2db7574a6fdb2b3eb3 (diff)
afr: don't update readables if inode refresh failed on all children
Problem: If inode refresh failed on all children of afr due to ENOENT (say file migrated by dht), it resets the readables to zero. Any inflight txn which then later comes on the inode fails with EIO because no readable children present for the inode. Fix: Don't update readables when inode refresh fails on *all* children of afr. In that way any inflight txns will either proceed with its own inode refresh if needed and fail it with the right errno or use the old value of readables and continue with the txn. Also, add quorum checks to the beginning of afr_transaction(). Otherwise, we seem to be winding the lock and checking for quorum only in pre-op pahse. Note: This should ideally fix BZ 1329505 since the stop gap fix for it is has been reverted at https://review.gluster.org/#/c/20028. Change-Id: Ia638c092d8d12dc27afb3cdad133394845061319 updates: bz#1597116 Signed-off-by: Ravishankar N <ravishankar@redhat.com> (cherry picked from commit 0f13eed0c1fa74cefed486538b02e0c8a8708456)
Diffstat (limited to 'xlators/cluster/afr')
-rw-r--r--xlators/cluster/afr/src/afr-common.c67
-rw-r--r--xlators/cluster/afr/src/afr-open.c2
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c6
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c7
4 files changed, 50 insertions, 32 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index ad037cd7060..1e0c1a7b265 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -1176,7 +1176,7 @@ afr_inode_refresh_err (call_frame_t *frame, xlator_t *this)
err = afr_final_errno (local, priv);
ret:
- return -err;
+ return err;
}
gf_boolean_t
@@ -1226,31 +1226,31 @@ afr_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
if (ret == -EIO || (local->is_read_txn && !event_generation)) {
/* No readable subvolume even after refresh ==> splitbrain.*/
if (!priv->fav_child_policy) {
- err = -EIO;
+ err = EIO;
goto refresh_done;
}
read_subvol = afr_sh_get_fav_by_policy (this, local->replies,
inode, NULL);
if (read_subvol == -1) {
- err = -EIO;
+ err = EIO;
goto refresh_done;
}
heal_frame = copy_frame (frame);
if (!heal_frame) {
- err = -EIO;
+ err = EIO;
goto refresh_done;
}
heal_frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
heal_local = AFR_FRAME_INIT (heal_frame, op_errno);
if (!heal_local) {
- err = -EIO;
+ err = EIO;
AFR_STACK_DESTROY (heal_frame);
goto refresh_done;
}
heal_local->xdata_req = dict_new();
if (!heal_local->xdata_req) {
- err = -EIO;
+ err = EIO;
AFR_STACK_DESTROY (heal_frame);
goto refresh_done;
}
@@ -1270,29 +1270,49 @@ refresh_done:
return 0;
}
+static void
+afr_fill_success_replies (afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies)
+{
+ int i = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].op_ret == 0)
+ replies[i] = 1;
+ }
+}
+
int
afr_inode_refresh_done (call_frame_t *frame, xlator_t *this, int error)
{
call_frame_t *heal_frame = NULL;
afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
gf_boolean_t start_heal = _gf_false;
afr_local_t *heal_local = NULL;
+ unsigned char *success_replies = NULL;
int op_errno = ENOMEM;
int ret = 0;
- int err = 0;
if (error != 0) {
- err = error;
goto refresh_done;
}
local = frame->local;
+ priv = this->private;
+ success_replies = alloca0 (priv->child_count);
+ afr_fill_success_replies (local, priv, success_replies);
+
+ if (!afr_has_quorum (success_replies, this)) {
+ error = afr_final_errno (frame->local, this->private);
+ if (!error)
+ error = afr_quorum_errno(priv);
+ goto refresh_done;
+ }
ret = afr_replies_interpret (frame, this, local->refreshinode,
&start_heal);
- err = afr_inode_refresh_err (frame, this);
-
if (ret && afr_selfheal_enabled (this) && start_heal) {
heal_frame = copy_frame (frame);
if (!heal_frame)
@@ -1312,7 +1332,7 @@ afr_inode_refresh_done (call_frame_t *frame, xlator_t *this, int error)
}
refresh_done:
- afr_txn_refresh_done (frame, this, err);
+ afr_txn_refresh_done (frame, this, error);
return 0;
}
@@ -1326,7 +1346,7 @@ afr_inode_refresh_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int call_child = (long) cookie;
int8_t need_heal = 1;
int call_count = 0;
- GF_UNUSED int ret = 0;
+ int ret = 0;
local = frame->local;
local->replies[call_child].valid = 1;
@@ -1349,7 +1369,8 @@ afr_inode_refresh_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
call_count = afr_frame_return (frame);
if (call_count == 0) {
afr_set_need_heal (this, local);
- afr_inode_refresh_done (frame, this, 0);
+ ret = afr_inode_refresh_err (frame, this);
+ afr_inode_refresh_done (frame, this, ret);
}
}
@@ -2359,9 +2380,7 @@ afr_lookup_done (call_frame_t *frame, xlator_t *this)
local->op_errno = op_errno;
read_subvol = -1;
- for (i = 0; i < priv->child_count; i++)
- if (replies[i].valid && replies[i].op_ret == 0)
- success_replies[i] = 1;
+ afr_fill_success_replies (local, priv, success_replies);
for (i = 0; i < priv->child_count; i++) {
if (!replies[i].valid)
@@ -2906,7 +2925,6 @@ afr_discover_unwind (call_frame_t *frame, xlator_t *this)
{
afr_private_t *priv = NULL;
afr_local_t *local = NULL;
- int i = -1;
int op_errno = 0;
int read_subvol = -1;
unsigned char *data_readable = NULL;
@@ -2917,14 +2935,9 @@ afr_discover_unwind (call_frame_t *frame, xlator_t *this)
data_readable = alloca0 (priv->child_count);
success_replies = alloca0 (priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- if (!local->replies[i].valid)
- continue;
- if (local->replies[i].op_ret == 0) {
- success_replies[i] = 1;
- local->op_ret = 0;
- }
- }
+ afr_fill_success_replies (local, priv, success_replies);
+ if (AFR_COUNT (success_replies, priv->child_count) > 0)
+ local->op_ret = 0;
op_errno = afr_final_errno (frame->local, this->private);
@@ -3101,7 +3114,7 @@ afr_discover_do (call_frame_t *frame, xlator_t *this, int err)
priv = this->private;
if (err) {
- local->op_errno = -err;
+ local->op_errno = err;
goto out;
}
@@ -3214,7 +3227,7 @@ afr_lookup_do (call_frame_t *frame, xlator_t *this, int err)
priv = this->private;
if (err < 0) {
- local->op_errno = -err;
+ local->op_errno = err;
ret = -1;
goto out;
}
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index de125296bb3..3057deed604 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -120,7 +120,7 @@ afr_open_continue (call_frame_t *frame, xlator_t *this, int err)
priv = this->private;
if (err) {
- AFR_STACK_UNWIND (open, frame, -1, -err, NULL, NULL);
+ AFR_STACK_UNWIND (open, frame, -1, err, NULL, NULL);
} else {
local->call_count = AFR_COUNT (local->child_up,
priv->child_count);
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index a8a4090efd1..a66a6660b0d 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -101,7 +101,7 @@ afr_read_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
read_subvol = afr_read_subvol_select_by_policy (inode, this,
local->readable, NULL);
if (read_subvol == -1) {
- err = -EIO;
+ err = EIO;
goto readfn;
}
@@ -120,7 +120,7 @@ readfn:
}
if (read_subvol == -1) {
- AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN (-1, -err);
+ AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN (-1, err);
}
afr_read_txn_wind (frame, this, read_subvol);
@@ -228,7 +228,7 @@ afr_read_txn (call_frame_t *frame, xlator_t *this, inode_t *inode,
if (priv->quorum_count && !afr_has_quorum (local->child_up, this)) {
local->op_ret = -1;
- local->op_errno = ENOTCONN;
+ local->op_errno = afr_quorum_errno(priv);
read_subvol = -1;
goto read;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 9c0963781e3..bfea096cf3b 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -2452,7 +2452,7 @@ afr_write_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
afr_local_t *local = frame->local;
if (err) {
- AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(-1, -err);
+ AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(-1, err);
goto fail;
}
@@ -2478,6 +2478,11 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
local->transaction.type = type;
+ if (priv->quorum_count && !afr_has_quorum (local->child_up, this)) {
+ ret = -afr_quorum_errno(priv);
+ goto out;
+ }
+
if (!afr_is_consistent_io_possible (local, priv, &ret)) {
ret = -ret; /*op_errno to ret conversion*/
goto out;