diff options
author | Pranith Kumar K <pkarampu@redhat.com> | 2016-05-16 15:05:36 +0530 |
---|---|---|
committer | Pranith Kumar Karampuri <pkarampu@redhat.com> | 2016-05-19 21:27:59 -0700 |
commit | 8a71e498fdcedacd1a32e121b3e081c61ee57a2e (patch) | |
tree | 7b0fa16ca59fb8008604dd3a6b7fcb9003529ce9 | |
parent | 61c1b2cee973b11897a37d508910012e616033bc (diff) |
cluster/afr: Refresh inode for inode-write fops in need
Problem:
If a named fresh-lookup is done on an loc and the fop fails on one of the
bricks or not sent on one of the bricks, but by the time response comes to afr,
if the brick is up, 'can_interpret' will be set to false in afr_lookup_done(),
this will lead to inode-ctx for that inode to be not set, this can lead to EIO
in case of a transaction as it depends on 'readable' array to be available by
that point.
Fix:
Refresh inode for inode-write fops for the ctx to be set if it is not already
done at the time of named fresh-lookup or if the file is in split-brain where
we need to perform one more refresh before failing the fop to check if the file
is still in split-brain or not.
BUG: 1336612
Change-Id: I5c50b62c8de06129b8516039f7c252e5008c47a5
Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
Reviewed-on: http://review.gluster.org/14368
Smoke: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Reviewed-by: Ravishankar N <ravishankar@redhat.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
-rw-r--r-- | xlators/cluster/afr/src/afr-common.c | 31 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-read-txn.c | 4 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr-transaction.c | 95 | ||||
-rw-r--r-- | xlators/cluster/afr/src/afr.h | 5 |
4 files changed, 98 insertions, 37 deletions
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c index 08186b0bbcb..d7bc3e29ee2 100644 --- a/xlators/cluster/afr/src/afr-common.c +++ b/xlators/cluster/afr/src/afr-common.c @@ -380,10 +380,6 @@ afr_inode_get_readable (call_frame_t *frame, inode_t *inode, xlator_t *this, int event_generation = 0; int ret = 0; - /* We don't care about split-brains for entry transactions. */ - if (type == AFR_ENTRY_TRANSACTION || type == AFR_ENTRY_RENAME_TRANSACTION) - return 0; - ret = afr_inode_read_subvol_get (inode, this, data, metadata, &event_generation); if (ret == -1) @@ -926,7 +922,8 @@ afr_inode_refresh_subvol_with_lookup_cbk (call_frame_t *frame, void *cookie, int afr_inode_refresh_subvol_with_lookup (call_frame_t *frame, xlator_t *this, - int i, inode_t *inode, dict_t *xdata) + int i, inode_t *inode, uuid_t gfid, + dict_t *xdata) { loc_t loc = {0, }; afr_private_t *priv = NULL; @@ -934,7 +931,13 @@ afr_inode_refresh_subvol_with_lookup (call_frame_t *frame, xlator_t *this, priv = this->private; loc.inode = inode; - gf_uuid_copy (loc.gfid, inode->gfid); + if (gf_uuid_is_null (inode->gfid) && gfid) { + /* To handle setattr/setxattr on yet to be linked inode from + * dht */ + gf_uuid_copy (loc.gfid, gfid); + } else { + gf_uuid_copy (loc.gfid, inode->gfid); + } STACK_WIND_COOKIE (frame, afr_inode_refresh_subvol_with_lookup_cbk, (void *) (long) i, priv->children[i], @@ -1048,7 +1051,8 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this) xdata); else afr_inode_refresh_subvol_with_lookup (frame, this, i, - local->refreshinode, xdata); + local->refreshinode, + local->refreshgfid, xdata); if (!--call_count) break; @@ -1062,7 +1066,7 @@ afr_inode_refresh_do (call_frame_t *frame, xlator_t *this) int afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode, - afr_inode_refresh_cbk_t refreshfn) + uuid_t gfid, afr_inode_refresh_cbk_t refreshfn) { afr_local_t *local = NULL; @@ -1077,6 +1081,11 @@ afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode, local->refreshinode = inode_ref (inode); + if (gfid) + gf_uuid_copy (local->refreshgfid, gfid); + else + gf_uuid_clear (local->refreshgfid); + afr_inode_refresh_do (frame, this); return 0; @@ -2408,7 +2417,8 @@ afr_discover (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req AFR_DATA_TRANSACTION, NULL); if (event != local->event_generation) - afr_inode_refresh (frame, this, loc->inode, afr_discover_do); + afr_inode_refresh (frame, this, loc->inode, NULL, + afr_discover_do); else afr_discover_do (frame, this, 0); @@ -2558,7 +2568,8 @@ afr_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req) AFR_DATA_TRANSACTION, NULL); if (event != local->event_generation) - afr_inode_refresh (frame, this, loc->parent, afr_lookup_do); + afr_inode_refresh (frame, this, loc->parent, NULL, + afr_lookup_do); else afr_lookup_do (frame, this, 0); diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c index a70565c37a1..32ad6a46d17 100644 --- a/xlators/cluster/afr/src/afr-read-txn.c +++ b/xlators/cluster/afr/src/afr-read-txn.c @@ -121,7 +121,7 @@ afr_read_txn_continue (call_frame_t *frame, xlator_t *this, int subvol) if (!local->refreshed) { local->refreshed = _gf_true; - afr_inode_refresh (frame, this, local->inode, + afr_inode_refresh (frame, this, local->inode, NULL, afr_read_txn_refresh_done); } else { afr_read_txn_next_subvol (frame, this); @@ -268,7 +268,7 @@ read: return 0; refresh: - afr_inode_refresh (frame, this, inode, afr_read_txn_refresh_done); + afr_inode_refresh (frame, this, inode, NULL, afr_read_txn_refresh_done); return 0; } diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c index ff2b58c032d..22b6997f2f7 100644 --- a/xlators/cluster/afr/src/afr-transaction.c +++ b/xlators/cluster/afr/src/afr-transaction.c @@ -2086,32 +2086,13 @@ unlock: UNLOCK (&local->fd->lock); } - -int -afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type) +void +afr_transaction_start (call_frame_t *frame, xlator_t *this) { - afr_local_t * local = NULL; - afr_private_t * priv = NULL; - fd_t *fd = NULL; - int ret = -1; - - local = frame->local; - priv = this->private; - - local->transaction.resume = afr_transaction_resume; - local->transaction.type = type; - - ret = afr_transaction_local_init (local, this); - if (ret < 0) - goto out; + afr_local_t *local = frame->local; + afr_private_t *priv = this->private; + fd_t *fd = NULL; - ret = afr_inode_get_readable (frame, local->inode, this, 0, 0, type); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, EIO, AFR_MSG_SPLIT_BRAIN, - "Failing %s on gfid %s: split-brain observed.", - gf_fop_list[local->op], uuid_utoa (local->inode->gfid)); - goto out; - } afr_transaction_eager_lock_init (local, this); if (local->fd && local->transaction.eager_lock_on) @@ -2135,6 +2116,72 @@ afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type) } else { afr_lock (frame, this); } +} + +int +afr_write_txn_refresh_done (call_frame_t *frame, xlator_t *this, int err) +{ + afr_local_t *local = frame->local; + afr_private_t *priv = this->private; + int ret = 0; + + if (err) { + local->op_errno = -err; + local->op_ret = -1; + goto fail; + } + ret = afr_inode_get_readable (frame, local->inode, this, + local->readable, NULL, + local->transaction.type); + if (ret < 0) { + gf_msg (this->name, GF_LOG_ERROR, -ret, AFR_MSG_SPLIT_BRAIN, + "Failing %s on gfid %s: split-brain observed.", + gf_fop_list[local->op], uuid_utoa (local->inode->gfid)); + local->op_ret = -1; + local->op_errno = -ret; + goto fail; + } + afr_transaction_start (frame, this); + return 0; +fail: + local->transaction.unwind (frame, this); + AFR_STACK_DESTROY (frame); + return 0; +} + +int +afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type) +{ + afr_local_t *local = NULL; + afr_private_t *priv = NULL; + int ret = -1; + int event_generation = 0; + + local = frame->local; + priv = this->private; + + local->transaction.resume = afr_transaction_resume; + local->transaction.type = type; + + ret = afr_transaction_local_init (local, this); + if (ret < 0) + goto out; + + if (type == AFR_ENTRY_TRANSACTION || + type == AFR_ENTRY_RENAME_TRANSACTION) { + afr_transaction_start (frame, this); + ret = 0; + goto out; + } + + ret = afr_inode_get_readable (frame, local->inode, this, + local->readable, &event_generation, type); + if (ret < 0 || event_generation != priv->event_generation) { + afr_inode_refresh (frame, this, local->inode, local->loc.gfid, + afr_write_txn_refresh_done); + } else { + afr_transaction_start (frame, this); + } ret = 0; out: return ret; diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h index f16f9b4b4ac..7d270ea94e7 100644 --- a/xlators/cluster/afr/src/afr.h +++ b/xlators/cluster/afr/src/afr.h @@ -422,6 +422,9 @@ typedef struct _afr_local { */ inode_t *refreshinode; + /*To handle setattr/setxattr on yet to be linked inode from dht*/ + uuid_t refreshgfid; + /* @pre_op_compat: @@ -855,7 +858,7 @@ afr_read_subvol_get (inode_t *inode, xlator_t *this, int *subvol_p, int afr_inode_refresh (call_frame_t *frame, xlator_t *this, inode_t *inode, - afr_inode_refresh_cbk_t cbk); + uuid_t gfid, afr_inode_refresh_cbk_t cbk); int32_t afr_notify (xlator_t *this, int32_t event, void *data, void *data2); |