diff options
author | Nithya Balachandran <nbalacha@redhat.com> | 2015-05-19 23:27:35 +0530 |
---|---|---|
committer | Raghavendra G <rgowdapp@redhat.com> | 2015-06-02 22:36:17 -0700 |
commit | 9684b90526d03a15d451e341521d7df44adae73e (patch) | |
tree | 4a4f342391a111d3c3fe1782bdc8ecd8dea1a903 | |
parent | b6eda067d2e2a0b56718ea71522f6c7b06a09f13 (diff) |
cluster/dht: fix incorrect dst subvol info in inode_ctx
Stashing additional information in the inode_ctx to help
decide whether the migration information is stale, which could
happen if a file was migrated several times but FOPs only detected
the P1 migration phase. If no FOP detects the P2 phase, the inode
ctx1 is never reset.
We now save the src subvol as well as the dst subvol in the
inode ctx. The src subvol is the subvol on which the FOP was sent
when the mig info was set in the inode ctx. This information is
considered stale if:
1. The subvol on which the current FOP is sent is the same as
the dst subvol in the ctx
2. The subvol on which the current FOP is sent is not the same
as the src subvol in the ctx
This does not handle the case where the same file might have been
renamed such that the src subvol is the same but the dst subvol
is different. However, that is unlikely to happen very often.
Change-Id: I05a2e9b107ee64750c7ca629aee03b03a02ef75f
BUG: 1142423
Signed-off-by: Nithya Balachandran <nbalacha@redhat.com>
Reviewed-on: http://review.gluster.org/10834
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Raghavendra G <rgowdapp@redhat.com>
Tested-by: Raghavendra G <rgowdapp@redhat.com>
-rw-r--r-- | xlators/cluster/dht/src/dht-common.c | 52 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 11 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 129 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-inode-read.c | 13 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-inode-write.c | 64 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-mem-types.h | 1 |
6 files changed, 182 insertions, 88 deletions
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c index a6f916aa5c4..6dc64eb13ab 100644 --- a/xlators/cluster/dht/src/dht-common.c +++ b/xlators/cluster/dht/src/dht-common.c @@ -3310,13 +3310,12 @@ int dht_file_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) { - int ret = -1; - dht_local_t *local = NULL; - call_frame_t *prev = NULL; - struct iatt *stbuf = NULL; - inode_t *inode = NULL; - xlator_t *subvol = NULL; - + int ret = -1; + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + struct iatt *stbuf = NULL; + inode_t *inode = NULL; + xlator_t *subvol1 = NULL, *subvol2 = NULL; local = frame->local; prev = cookie; @@ -3353,11 +3352,15 @@ dht_file_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, /* Phase 1 of migration */ if (IS_DHT_MIGRATION_PHASE1 (stbuf)) { inode = (local->fd) ? local->fd->inode : local->loc.inode; - dht_inode_ctx_get1 (this, inode, &subvol); - if (subvol) { - dht_setxattr2 (this, subvol, frame); + + ret = dht_inode_ctx_get_mig_info (this, inode, + &subvol1, &subvol2); + if (!dht_mig_info_is_invalid (local->cached_subvol, + subvol1, subvol2)) { + dht_setxattr2 (this, subvol2, frame); return 0; } + ret = dht_rebalance_in_progress_check (this, frame); if (!ret) return 0; @@ -3821,13 +3824,12 @@ int dht_file_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) { - int ret = -1; - dht_local_t *local = NULL; - call_frame_t *prev = NULL; - struct iatt *stbuf = NULL; - inode_t *inode = NULL; - xlator_t *subvol = NULL; - + int ret = -1; + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + struct iatt *stbuf = NULL; + inode_t *inode = NULL; + xlator_t *subvol1 = NULL, *subvol2 = NULL; local = frame->local; prev = cookie; @@ -3864,11 +3866,15 @@ dht_file_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, /* Phase 1 of migration */ if (IS_DHT_MIGRATION_PHASE1 (stbuf)) { inode = (local->fd) ? local->fd->inode : local->loc.inode; - dht_inode_ctx_get1 (this, inode, &subvol); - if (subvol) { - dht_removexattr2 (this, subvol, frame); + + ret = dht_inode_ctx_get_mig_info (this, inode, + &subvol1, &subvol2); + if (!dht_mig_info_is_invalid (local->cached_subvol, + subvol1, subvol2)) { + dht_removexattr2 (this, subvol2, frame); return 0; } + ret = dht_rebalance_in_progress_check (this, frame); if (!ret) return 0; @@ -5236,7 +5242,8 @@ dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->rebalance.target_op_fn = dht_link2; /* Check if the rebalance phase2 is true */ if (IS_DHT_MIGRATION_PHASE2 (stbuf)) { - ret = dht_inode_ctx_get1 (this, local->loc.inode, &subvol); + ret = dht_inode_ctx_get_mig_info (this, local->loc.inode, NULL, + &subvol); if (!subvol) { /* Phase 2 of migration */ ret = dht_rebalance_complete_check (this, frame); @@ -5250,7 +5257,8 @@ dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this, /* Check if the rebalance phase1 is true */ if (IS_DHT_MIGRATION_PHASE1 (stbuf)) { - ret = dht_inode_ctx_get1 (this, local->loc.inode, &subvol); + ret = dht_inode_ctx_get_mig_info (this, local->loc.inode, NULL, + &subvol); if (subvol) { dht_link2 (this, subvol, frame); return 0; diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index d37ca6ce425..45372f7d6c6 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -498,6 +498,11 @@ struct dir_dfmeta { int *fetch_entries; }; +typedef struct dht_migrate_info { + xlator_t *src_subvol; + xlator_t *dst_subvol; +} dht_migrate_info_t; + #define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT) #define is_revalidate(loc) (dht_inode_ctx_layout_get (loc->inode, this, NULL) == 0) @@ -973,7 +978,11 @@ int32_t dht_inodectx_dump (xlator_t *this, inode_t *inode); int -dht_inode_ctx_get1 (xlator_t *this, inode_t *inode, xlator_t **subvol); +dht_inode_ctx_get_mig_info (xlator_t *this, inode_t *inode, + xlator_t **src_subvol, xlator_t **dst_subvol); +gf_boolean_t +dht_mig_info_is_invalid (xlator_t *current, xlator_t *src_subvol, + xlator_t *dst_subvol); int dht_subvol_status (dht_conf_t *conf, xlator_t *subvol); diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index 7aa307da6b8..20f524c9887 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -15,27 +15,81 @@ #include "dht-helper.h" static inline int -dht_inode_ctx_set1 (xlator_t *this, inode_t *inode, xlator_t *subvol) +dht_inode_ctx_set_mig_info (xlator_t *this, inode_t *inode, + xlator_t *src_subvol, xlator_t *dst_subvol) { - uint64_t tmp_subvol = 0; + dht_migrate_info_t *miginfo = NULL; + uint64_t value = 0; + int ret = -1; - tmp_subvol = (long)subvol; - return inode_ctx_set1 (inode, this, &tmp_subvol); + miginfo = GF_CALLOC (1, sizeof (*miginfo), gf_dht_mt_miginfo_t); + if (miginfo == NULL) + goto out; + + miginfo->src_subvol = src_subvol; + miginfo->dst_subvol = dst_subvol; + + value = (uint64_t) miginfo; + + ret = inode_ctx_set1 (inode, this, &value); + if (ret < 0) { + GF_FREE (miginfo); + } + +out: + return ret; } + int -dht_inode_ctx_get1 (xlator_t *this, inode_t *inode, xlator_t **subvol) +dht_inode_ctx_get_mig_info (xlator_t *this, inode_t *inode, + xlator_t **src_subvol, xlator_t **dst_subvol) { - int ret = -1; - uint64_t tmp_subvol = 0; + int ret = -1; + uint64_t tmp_miginfo = 0; + dht_migrate_info_t *miginfo = NULL; - ret = inode_ctx_get1 (inode, this, &tmp_subvol); - if (tmp_subvol && subvol) - *subvol = (xlator_t *)tmp_subvol; + ret = inode_ctx_get1 (inode, this, &tmp_miginfo); + if ((ret < 0) || (tmp_miginfo == 0)) + goto out; + + miginfo = (dht_migrate_info_t *)tmp_miginfo; + + if (src_subvol) + *src_subvol = miginfo->src_subvol; + if (dst_subvol) + *dst_subvol = miginfo->dst_subvol; + +out: return ret; } +gf_boolean_t +dht_mig_info_is_invalid (xlator_t *current, xlator_t *src_subvol, + xlator_t *dst_subvol) +{ + +/* Not set + */ + if (!src_subvol || !dst_subvol) + return _gf_true; + +/* Invalid scenarios: + * The src_subvol does not match the subvol on which the current op was sent + * so the cached subvol has changed between the last mig_info_set and now. + * src_subvol == dst_subvol. The file was migrated without any FOP detecting + * a P2 so the old dst is now the current subvol. + * + * There is still one scenario where the info could be outdated - if + * file has undergone multiple migrations and ends up on the same src_subvol + * on which the mig_info was first set. + */ + if ((current == dst_subvol) || (current != src_subvol)) + return _gf_true; + + return _gf_false; +} int dht_frame_return (call_frame_t *frame) @@ -840,20 +894,20 @@ out: int dht_migration_complete_check_task (void *data) { - int ret = -1; - xlator_t *src_node = NULL; - xlator_t *dst_node = NULL, *linkto_target = NULL; - dht_local_t *local = NULL; - dict_t *dict = NULL; - struct iatt stbuf = {0,}; - xlator_t *this = NULL; - call_frame_t *frame = NULL; - loc_t tmp_loc = {0,}; - char *path = NULL; - dht_conf_t *conf = NULL; - inode_t *inode = NULL; - fd_t *iter_fd = NULL; - uint64_t tmp_subvol = 0; + int ret = -1; + xlator_t *src_node = NULL; + xlator_t *dst_node = NULL, *linkto_target = NULL; + dht_local_t *local = NULL; + dict_t *dict = NULL; + struct iatt stbuf = {0,}; + xlator_t *this = NULL; + call_frame_t *frame = NULL; + loc_t tmp_loc = {0,}; + char *path = NULL; + dht_conf_t *conf = NULL; + inode_t *inode = NULL; + fd_t *iter_fd = NULL; + uint64_t tmp_miginfo = 0; int open_failed = 0; this = THIS; @@ -945,9 +999,11 @@ dht_migration_complete_check_task (void *data) /* once we detect the migration complete, the inode-ctx2 is no more required.. delete the ctx and also, it means, open() already done on all the fd of inode */ - ret = inode_ctx_reset1 (inode, this, &tmp_subvol); - if (tmp_subvol) + ret = inode_ctx_reset1 (inode, this, &tmp_miginfo); + if (tmp_miginfo) { + GF_FREE ((void *)tmp_miginfo); goto out; + } if (list_empty (&inode->fd_list)) goto out; @@ -1006,15 +1062,15 @@ dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame) dht_migration_complete_check_done, frame, frame); return ret; -} +} /* During 'in-progress' state, both nodes should have the file */ static int dht_inprogress_check_done (int op_ret, call_frame_t *frame, void *data) { - dht_local_t *local = NULL; - xlator_t *subvol = NULL; - inode_t *inode = NULL; + dht_local_t *local = NULL; + xlator_t *dst_subvol = NULL, *src_subvol = NULL; + inode_t *inode = NULL; local = frame->local; @@ -1023,17 +1079,18 @@ dht_inprogress_check_done (int op_ret, call_frame_t *frame, void *data) inode = local->loc.inode ? local->loc.inode : local->fd->inode; - dht_inode_ctx_get1 (THIS, inode, &subvol); - if (!subvol) { - subvol = dht_subvol_get_cached (THIS, inode); - if (!subvol) { + dht_inode_ctx_get_mig_info (THIS, inode, &src_subvol, &dst_subvol); + if (dht_mig_info_is_invalid (local->cached_subvol, + src_subvol, dst_subvol)) { + dst_subvol = dht_subvol_get_cached (THIS, inode); + if (!dst_subvol) { local->op_errno = EINVAL; goto out; } } out: - local->rebalance.target_op_fn (THIS, subvol, frame); + local->rebalance.target_op_fn (THIS, dst_subvol, frame); return 0; } @@ -1165,7 +1222,7 @@ dht_rebalance_inprogress_task (void *data) } done: - ret = dht_inode_ctx_set1 (this, inode, dst_node); + ret = dht_inode_ctx_set_mig_info (this, inode, src_node, dst_node); if (ret) { gf_log (this->name, GF_LOG_ERROR, "%s: failed to set inode-ctx target file at %s", diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c index 41cffa8ad1d..17d76acda58 100644 --- a/xlators/cluster/dht/src/dht-inode-read.c +++ b/xlators/cluster/dht/src/dht-inode-read.c @@ -153,7 +153,7 @@ dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, /* Check if the rebalance phase2 is true */ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) { inode = (local->fd) ? local->fd->inode : local->loc.inode; - ret = dht_inode_ctx_get1 (this, inode, &subvol); + ret = dht_inode_ctx_get_mig_info (this, inode, NULL, &subvol); if (!subvol) { /* Phase 2 of migration */ local->rebalance.target_op_fn = dht_attr2; @@ -377,7 +377,6 @@ dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, { dht_local_t *local = NULL; int ret = 0; - inode_t *inode = NULL; xlator_t *subvol = 0; local = frame->local; @@ -397,7 +396,8 @@ dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this, local->op_errno = op_errno; if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) { /* File would be migrated to other node */ - ret = dht_inode_ctx_get1 (this, inode, &subvol); + ret = dht_inode_ctx_get_mig_info (this, local->fd->inode, NULL, + &subvol); if (!subvol) { local->rebalance.target_op_fn = dht_readv2; ret = dht_rebalance_complete_check (this, frame); @@ -612,7 +612,6 @@ dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, dict_t *xdata) { dht_local_t *local = NULL; - inode_t *inode = NULL; xlator_t *subvol = 0; local = frame->local; @@ -623,7 +622,7 @@ dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto out; /* If context is set, then send flush() it to the destination */ - dht_inode_ctx_get1 (this, inode, &subvol); + dht_inode_ctx_get_mig_info (this, local->fd->inode, NULL, &subvol); if (subvol) { dht_flush2 (this, subvol, frame); return 0; @@ -648,6 +647,7 @@ dht_flush2 (xlator_t *this, xlator_t *subvol, call_frame_t *frame) op_errno = local->op_errno; + dht_inode_ctx_get_mig_info (this, local->fd->inode, NULL, &subvol); if (subvol == NULL) goto out; @@ -735,7 +735,8 @@ dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, } local->op_errno = op_errno; - dht_inode_ctx_get1 (this, inode, &subvol); + inode = local->fd->inode; + dht_inode_ctx_get_mig_info (this, inode, NULL, &subvol); if (!subvol) { local->rebalance.target_op_fn = dht_fsync2; diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c index 259baa9e263..ec4029e5bd5 100644 --- a/xlators/cluster/dht/src/dht-inode-write.c +++ b/xlators/cluster/dht/src/dht-inode-write.c @@ -25,7 +25,8 @@ dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, { dht_local_t *local = NULL; int ret = -1; - xlator_t *subvol = NULL; + xlator_t *subvol1 = NULL; + xlator_t *subvol2 = NULL; if (op_ret == -1 && !dht_inode_missing(op_errno)) { goto out; @@ -62,9 +63,11 @@ dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this, dht_iatt_merge (this, &local->stbuf, postbuf, NULL); dht_iatt_merge (this, &local->prebuf, prebuf, NULL); - ret = dht_inode_ctx_get1 (this, local->fd->inode, &subvol); - if (subvol) { - dht_writev2 (this, subvol, frame); + ret = dht_inode_ctx_get_mig_info (this, local->fd->inode, + &subvol1, &subvol2); + if (!dht_mig_info_is_invalid (local->cached_subvol, + subvol1, subvol2)) { + dht_writev2 (this, subvol2, frame); return 0; } ret = dht_rebalance_in_progress_check (this, frame); @@ -172,7 +175,8 @@ dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, dht_local_t *local = NULL; call_frame_t *prev = NULL; int ret = -1; - xlator_t *subvol = NULL; + xlator_t *src_subvol = NULL; + xlator_t *dst_subvol = NULL; inode_t *inode = NULL; GF_VALIDATE_OR_GOTO ("dht", frame, err); @@ -216,9 +220,12 @@ dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this, dht_iatt_merge (this, &local->stbuf, postbuf, NULL); dht_iatt_merge (this, &local->prebuf, prebuf, NULL); inode = (local->fd) ? local->fd->inode : local->loc.inode; - dht_inode_ctx_get1 (this, inode, &subvol); - if (subvol) { - dht_truncate2 (this, subvol, frame); + + dht_inode_ctx_get_mig_info (this, inode, &src_subvol, + &dst_subvol); + if (!dht_mig_info_is_invalid (local->cached_subvol, + src_subvol, dst_subvol)) { + dht_truncate2 (this, dst_subvol, frame); return 0; } ret = dht_rebalance_in_progress_check (this, frame); @@ -363,7 +370,8 @@ dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, dht_local_t *local = NULL; call_frame_t *prev = NULL; int ret = -1; - xlator_t *subvol = NULL; + xlator_t *src_subvol = NULL; + xlator_t *dst_subvol = NULL; GF_VALIDATE_OR_GOTO ("dht", frame, err); GF_VALIDATE_OR_GOTO ("dht", this, out); @@ -403,9 +411,12 @@ dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { dht_iatt_merge (this, &local->stbuf, postbuf, NULL); dht_iatt_merge (this, &local->prebuf, prebuf, NULL); - dht_inode_ctx_get1 (this, local->fd->inode, &subvol); - if (subvol) { - dht_fallocate2 (this, subvol, frame); + + dht_inode_ctx_get_mig_info (this, local->fd->inode, &src_subvol, + &dst_subvol); + if (!dht_mig_info_is_invalid (local->cached_subvol, + src_subvol, dst_subvol)) { + dht_fallocate2 (this, dst_subvol, frame); return 0; } ret = dht_rebalance_in_progress_check (this, frame); @@ -503,7 +514,8 @@ dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, dht_local_t *local = NULL; call_frame_t *prev = NULL; int ret = -1; - xlator_t *subvol = NULL; + xlator_t *src_subvol = NULL; + xlator_t *dst_subvol = NULL; GF_VALIDATE_OR_GOTO ("dht", frame, err); GF_VALIDATE_OR_GOTO ("dht", this, out); @@ -543,9 +555,12 @@ dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { dht_iatt_merge (this, &local->stbuf, postbuf, NULL); dht_iatt_merge (this, &local->prebuf, prebuf, NULL); - dht_inode_ctx_get1 (this, local->fd->inode, &subvol); - if (subvol) { - dht_discard2 (this, subvol, frame); + + dht_inode_ctx_get_mig_info (this, local->fd->inode, &src_subvol, + &dst_subvol); + if (!dht_mig_info_is_invalid(local->cached_subvol, + src_subvol, dst_subvol)) { + dht_discard2 (this, dst_subvol, frame); return 0; } ret = dht_rebalance_in_progress_check (this, frame); @@ -637,10 +652,10 @@ dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, int op_errno, struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata) { - dht_local_t *local = NULL; - call_frame_t *prev = NULL; - int ret = -1; - xlator_t *subvol = NULL; + dht_local_t *local = NULL; + call_frame_t *prev = NULL; + int ret = -1; + xlator_t *subvol1 = NULL, *subvol2 = NULL; GF_VALIDATE_OR_GOTO ("dht", frame, err); GF_VALIDATE_OR_GOTO ("dht", this, out); @@ -678,9 +693,12 @@ dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, if (IS_DHT_MIGRATION_PHASE1 (postbuf)) { dht_iatt_merge (this, &local->stbuf, postbuf, NULL); dht_iatt_merge (this, &local->prebuf, prebuf, NULL); - dht_inode_ctx_get1 (this, local->fd->inode, &subvol); - if (subvol) { - dht_zerofill2 (this, subvol, frame); + + ret = dht_inode_ctx_get_mig_info (this, local->fd->inode, + &subvol1, &subvol2); + if (!dht_mig_info_is_invalid (local->cached_subvol, + subvol1, subvol2)) { + dht_zerofill2 (this, subvol2, frame); return 0; } diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h index 46028e6d9e0..85e5baed62c 100644 --- a/xlators/cluster/dht/src/dht-mem-types.h +++ b/xlators/cluster/dht/src/dht-mem-types.h @@ -33,6 +33,7 @@ enum gf_dht_mem_types_ { gf_dht_mt_dirent_t, gf_dht_mt_container_t, gf_dht_mt_octx_t, + gf_dht_mt_miginfo_t, gf_dht_mt_end }; #endif |