diff options
author | Csaba Henk <csaba@redhat.com> | 2016-10-27 07:30:48 +0200 |
---|---|---|
committer | Raghavendra G <rgowdapp@redhat.com> | 2017-01-11 07:40:25 -0800 |
commit | bb438d849a4a3941c1a9b525213f695f0a2c961b (patch) | |
tree | e17e44f95d734ac1dcdabb69b1d867afcd758604 /xlators/cluster | |
parent | d6bc8da62f1b0d454fa5187687fdbf894403c7ce (diff) |
feature/dht: undo partially successful dir rename
As with dht, dirs are present on all subvolumes,
renaming them is a compound operation and thus a
partial success + partial failure scenario is
possible, resulting in an inconsistent state.
For purposes of reproduction, such a scenario can
easily be produced by stopping the volume, edit the
volfile of a certain subvolume to get at an
"option read-only on" setting, and then restart
the volume. Thus those operations that are to make change
on the affected subvolume will fail with EROFS.
To handle such scenarios, we introduce an in-memory cache
where we record the return values obtained from the
subvolumes. At the final stage of the dir rename operation
we check if it's a partial success/fail situation. If yes,
then we perform a reverse rename op on those subvolumes
where the operation succeeded.
Change-Id: I3d05f74f53932cb984a918d252a7309c1009a51d
BUG: 1412069
Signed-off-by: Raghavendra G <rgowdapp@redhat.com>
Reviewed-on: http://review.gluster.org/15739
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: N Balachandran <nbalacha@redhat.com>
Diffstat (limited to 'xlators/cluster')
-rw-r--r-- | xlators/cluster/dht/src/dht-common.h | 3 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-helper.c | 3 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-mem-types.h | 1 | ||||
-rw-r--r-- | xlators/cluster/dht/src/dht-rename.c | 61 |
4 files changed, 61 insertions, 7 deletions
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index be9d2ce5e4e..6030af7c41d 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -303,6 +303,9 @@ struct dht_local { call_stub_t *stub; int32_t parent_disk_layout[4]; + + /* rename rollback */ + int *ret_cache ; }; typedef struct dht_local dht_local_t; diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index ad031b6216c..81d1dffa0af 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -638,6 +638,9 @@ dht_local_wipe (xlator_t *this, dht_local_t *local) local->stub = NULL; } + if (local->ret_cache) + GF_FREE (local->ret_cache); + mem_put (local); } diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h index 5de5d1838ad..3554f3f9c2d 100644 --- a/xlators/cluster/dht/src/dht-mem-types.h +++ b/xlators/cluster/dht/src/dht-mem-types.h @@ -38,6 +38,7 @@ enum gf_dht_mem_types_ { gf_tier_mt_ipc_ctr_params_t, gf_dht_mt_fd_ctx_t, gf_tier_mt_qfile_array_t, + gf_dht_ret_cache_t, gf_dht_mt_end }; #endif diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c index 7e7e7151af7..53c61f8a714 100644 --- a/xlators/cluster/dht/src/dht-rename.c +++ b/xlators/cluster/dht/src/dht-rename.c @@ -25,17 +25,22 @@ dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata) { - dht_local_t *local = NULL; - int this_call_cnt = 0; - xlator_t *prev = NULL; - char gfid[GF_UUID_BUF_SIZE] = {0}; + dht_conf_t *conf = NULL; + dht_local_t *local = NULL; + int this_call_cnt = 0; + xlator_t *prev = NULL; + int i = 0; + char gfid[GF_UUID_BUF_SIZE] = {0}; + int subvol_cnt = -1; + conf = this->private; local = frame->local; prev = cookie; + subvol_cnt = dht_subvol_cnt (this, prev); + local->ret_cache[subvol_cnt] = op_ret; if (op_ret == -1) { - /* TODO: undo the damage */ gf_uuid_unparse(local->loc.inode->gfid, gfid); gf_msg (this->name, GF_LOG_INFO, op_errno, @@ -63,6 +68,42 @@ dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, unwind: this_call_cnt = dht_frame_return (frame); if (is_last_call (this_call_cnt)) { + /* We get here with local->call_cnt == 0. Which means + * we are the only one executing this code, there is + * no contention. Therefore it's safe to manipulate or + * deref local->call_cnt directly (without locking). + */ + if (local->ret_cache[conf->subvolume_cnt] == 0) { + /* count errant subvols in last field of ret_cache */ + for (i = 0; i < conf->subvolume_cnt; i++) { + if (local->ret_cache[i] != 0) + ++local->ret_cache[conf->subvolume_cnt]; + } + if (local->ret_cache[conf->subvolume_cnt]) { + /* undoing the damage: + * for all subvolumes, where rename + * succeeded, we perform the reverse operation + */ + for (i = 0; i < conf->subvolume_cnt; i++) { + if (local->ret_cache[i] == 0) + ++local->call_cnt; + } + for (i = 0; i < conf->subvolume_cnt; i++) { + if (local->ret_cache[i]) + continue; + + STACK_WIND (frame, + dht_rename_dir_cbk, + conf->subvolumes[i], + conf->subvolumes[i]->fops->rename, + &local->loc2, &local->loc, + NULL); + } + + return 0; + } + } + WIPE (&local->preoldparent); WIPE (&local->postoldparent); WIPE (&local->preparent); @@ -96,8 +137,6 @@ dht_rename_hashed_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this, if (op_ret == -1) { - /* TODO: undo the damage */ - gf_uuid_unparse(local->loc.inode->gfid, gfid); gf_msg (this->name, GF_LOG_INFO, op_errno, @@ -324,6 +363,14 @@ dht_rename_dir (call_frame_t *frame, xlator_t *this) conf = frame->this->private; local = frame->local; + local->ret_cache = GF_CALLOC (conf->subvolume_cnt + 1, sizeof (int), + gf_dht_ret_cache_t); + + if (local->ret_cache == NULL) { + op_errno = ENOMEM; + goto err; + } + /* We must take a lock on all the subvols with src gfid. * Along with this if dst exists we must take lock on * any one subvol with dst gfid. |