summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPranith Kumar K <pkarampu@redhat.com>2016-05-16 15:05:36 +0530
committerPranith Kumar Karampuri <pkarampu@redhat.com>2016-05-19 21:27:59 -0700
commit8a71e498fdcedacd1a32e121b3e081c61ee57a2e (patch)
tree7b0fa16ca59fb8008604dd3a6b7fcb9003529ce9
parent61c1b2cee973b11897a37d508910012e616033bc (diff)
cluster/afr: Refresh inode for inode-write fops in need
Problem: If a named fresh-lookup is done on an loc and the fop fails on one of the bricks or not sent on one of the bricks, but by the time response comes to afr, if the brick is up, 'can_interpret' will be set to false in afr_lookup_done(), this will lead to inode-ctx for that inode to be not set, this can lead to EIO in case of a transaction as it depends on 'readable' array to be available by that point. Fix: Refresh inode for inode-write fops for the ctx to be set if it is not already done at the time of named fresh-lookup or if the file is in split-brain where we need to perform one more refresh before failing the fop to check if the file is still in split-brain or not. BUG: 1336612 Change-Id: I5c50b62c8de06129b8516039f7c252e5008c47a5 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com> Reviewed-on: http://review.gluster.org/14368 Smoke: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Reviewed-by: Ravishankar N <ravishankar@redhat.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
-rw-r--r--xlators/cluster/afr/src/afr-common.c31
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c4
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c95
-rw-r--r--xlators/cluster/afr/src/afr.h5
4 files changed, 98 insertions, 37 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 08186b0bbcb..d7bc3e29ee2 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -380,10 +380,6 @@ afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this,
int event_generation = 0;
int ret = 0;
- /* We don't care about split-brains for entry transactions. */
- if (type == AFR_ENTRY_TRANSACTION || type == AFR_ENTRY_RENAME_TRANSACTION)
- return 0;
-
ret = afr_inode_read_subvol_get (inode, this, data, metadata,
&event_generation);
if (ret == -1)
@@ -926,7 +922,8 @@ afr_inode_refresh_subvol_with_lookup_cbk (call_frame_t *frame, void *cookie,
int
afr_inode_refresh_subvol_with_lookup (call_frame_t *frame, xlator_t *this,
- int i, inode_t *inode, dict_t *xdata)
+ int i, inode_t *inode, uuid_t gfid,
+ dict_t *xdata)
{
loc_t loc = {0, };
afr_private_t *priv = NULL;
@@ -934,7 +931,13 @@ afr_inode_refresh_subvol_with_lookup (call_frame_t *frame, xlator_t *this,
priv = this->private;
loc.inode = inode;
- gf_uuid_copy (loc.gfid, inode->gfid);
+ if (gf_uuid_is_null (inode->gfid) && gfid) {
+ /* To handle setattr/setxattr on yet to be linked inode from
+ * dht */
+ gf_uuid_copy (loc.gfid, gfid);
+ } else {
+ gf_uuid_copy (loc.gfid, inode->gfid);
+ }
STACK_WIND_COOKIE (frame, afr_inode_refresh_subvol_with_lookup_cbk,
(void *) (long) i, priv->children[i],
@@ -1048,7 +1051,8 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this)
xdata);
else
afr_inode_refresh_subvol_with_lookup (frame, this, i,
- local->refreshinode, xdata);
+ local->refreshinode,
+ local->refreshgfid, xdata);
if (!--call_count)
break;
@@ -1062,7 +1066,7 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this)
int
afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,
- afr_inode_refresh_cbk_t refreshfn)
+ uuid_t gfid, afr_inode_refresh_cbk_t refreshfn)
{
afr_local_t *local = NULL;
@@ -1077,6 +1081,11 @@ afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,
local->refreshinode = inode_ref (inode);
+ if (gfid)
+ gf_uuid_copy (local->refreshgfid, gfid);
+ else
+ gf_uuid_clear (local->refreshgfid);
+
afr_inode_refresh_do (frame, this);
return 0;
@@ -2408,7 +2417,8 @@ afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req
AFR_DATA_TRANSACTION, NULL);
if (event != local->event_generation)
- afr_inode_refresh (frame, this, loc->inode, afr_discover_do);
+ afr_inode_refresh (frame, this, loc->inode, NULL,
+ afr_discover_do);
else
afr_discover_do (frame, this, 0);
@@ -2558,7 +2568,8 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
AFR_DATA_TRANSACTION, NULL);
if (event != local->event_generation)
- afr_inode_refresh (frame, this, loc->parent, afr_lookup_do);
+ afr_inode_refresh (frame, this, loc->parent, NULL,
+ afr_lookup_do);
else
afr_lookup_do (frame, this, 0);
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
index a70565c37a1..32ad6a46d17 100644
--- a/xlators/cluster/afr/src/afr-read-txn.c
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -121,7 +121,7 @@ afr_read_txn_continue (call_frame_t *frame, xlator_t *this, int subvol)
if (!local->refreshed) {
local->refreshed = _gf_true;
- afr_inode_refresh (frame, this, local->inode,
+ afr_inode_refresh (frame, this, local->inode, NULL,
afr_read_txn_refresh_done);
} else {
afr_read_txn_next_subvol (frame, this);
@@ -268,7 +268,7 @@ read:
return 0;
refresh:
- afr_inode_refresh (frame, this, inode, afr_read_txn_refresh_done);
+ afr_inode_refresh (frame, this, inode, NULL, afr_read_txn_refresh_done);
return 0;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index ff2b58c032d..22b6997f2f7 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -2086,32 +2086,13 @@ unlock:
UNLOCK (&local->fd->lock);
}
-
-int
-afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
+void
+afr_transaction_start (call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- fd_t *fd = NULL;
- int ret = -1;
-
- local = frame->local;
- priv = this->private;
-
- local->transaction.resume = afr_transaction_resume;
- local->transaction.type = type;
-
- ret = afr_transaction_local_init (local, this);
- if (ret < 0)
- goto out;
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ fd_t *fd = NULL;
- ret = afr_inode_get_readable (frame, local->inode, this, 0, 0, type);
- if (ret) {
- gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN,
- "Failing %s on gfid %s: split-brain observed.",
- gf_fop_list[local->op], uuid_utoa (local->inode->gfid));
- goto out;
- }
afr_transaction_eager_lock_init (local, this);
if (local->fd && local->transaction.eager_lock_on)
@@ -2135,6 +2116,72 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
} else {
afr_lock (frame, this);
}
+}
+
+int
+afr_write_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int ret = 0;
+
+ if (err) {
+ local->op_errno = -err;
+ local->op_ret = -1;
+ goto fail;
+ }
+ ret = afr_inode_get_readable (frame, local->inode, this,
+ local->readable, NULL,
+ local->transaction.type);
+ if (ret < 0) {
+ gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_SPLIT_BRAIN,
+ "Failing %s on gfid %s: split-brain observed.",
+ gf_fop_list[local->op], uuid_utoa (local->inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto fail;
+ }
+ afr_transaction_start (frame, this);
+ return 0;
+fail:
+ local->transaction.unwind (frame, this);
+ AFR_STACK_DESTROY (frame);
+ return 0;
+}
+
+int
+afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ int event_generation = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->transaction.resume = afr_transaction_resume;
+ local->transaction.type = type;
+
+ ret = afr_transaction_local_init (local, this);
+ if (ret < 0)
+ goto out;
+
+ if (type == AFR_ENTRY_TRANSACTION ||
+ type == AFR_ENTRY_RENAME_TRANSACTION) {
+ afr_transaction_start (frame, this);
+ ret = 0;
+ goto out;
+ }
+
+ ret = afr_inode_get_readable (frame, local->inode, this,
+ local->readable, &event_generation, type);
+ if (ret < 0 || event_generation != priv->event_generation) {
+ afr_inode_refresh (frame, this, local->inode, local->loc.gfid,
+ afr_write_txn_refresh_done);
+ } else {
+ afr_transaction_start (frame, this);
+ }
ret = 0;
out:
return ret;
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index f16f9b4b4ac..7d270ea94e7 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -422,6 +422,9 @@ typedef struct _afr_local {
*/
inode_t *refreshinode;
+ /*To handle setattr/setxattr on yet to be linked inode from dht*/
+ uuid_t refreshgfid;
+
/*
@pre_op_compat:
@@ -855,7 +858,7 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p,
int
afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode,
- afr_inode_refresh_cbk_t cbk);
+ uuid_t gfid, afr_inode_refresh_cbk_t cbk);
int32_t
afr_notify (xlator_t *this, int32_t event, void *data, void *data2);