summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKrutika Dhananjay <kdhananj@redhat.com>2017-04-06 18:10:41 +0530
committerjiffin tony Thottan <jthottan@redhat.com>2017-04-10 11:45:51 -0400
commitd71ec72b981d110199c3376f39f91b704241975c (patch)
tree37392e186acfc99a7a4ef9b8153574fd60c16433
parentd5d599abaa598062885abc7ad8226faf26d11e64 (diff)
features/shard: Fix vm corruption upon fix-layout
Backport of: https://review.gluster.org/17010 shard's writev implementation, as part of identifying presence of participant shards that aren't in memory, first sends an MKNOD on these shards, and upon EEXIST error, looks up the shards before proceeding with the writes. The VM corruption was caused when the following happened: 1. DHT had n subvolumes initially. 2. Upon add-brick + fix-layout, the layout of .shard changed although the existing shards under it were yet to be migrated to their new hashed subvolumes. 3. During this time, there were writes on the VM falling in regions of the file whose corresponding shards were already existing under .shard. 4. Sharding xl sent MKNOD on these shards, now creating them in their new hashed subvolumes although there already exist shard blocks for this region with valid data. 5. All subsequent writes were wound on these newly created copies. The net outcome is that both copies of the shard didn't have the correct data. This caused the affected VMs to be unbootable. FIX: For want of better alternatives in DHT, the fix changes shard fops to do a LOOKUP before the MKNOD and upon EEXIST error, perform another lookup. Change-Id: I1a5d3515b42e2e5583c407d1b4aff44d7ce472eb BUG: 1440635 RCA'd-by: Raghavendra Gowdappa <rgowdapp@redhat.com> Reported-by: Mahdi Adnan <mahdi.adnan@outlook.com> Signed-off-by: Krutika Dhananjay <kdhananj@redhat.com> Reviewed-on: https://review.gluster.org/17019 CentOS-regression: Gluster Build System <jenkins@build.gluster.org> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org> Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: jiffin tony Thottan <jthottan@redhat.com>
-rw-r--r--xlators/features/shard/src/shard.c154
-rw-r--r--xlators/features/shard/src/shard.h1
2 files changed, 96 insertions, 59 deletions
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
index 33c501f8498..4dd581dd19e 100644
--- a/xlators/features/shard/src/shard.c
+++ b/xlators/features/shard/src/shard.c
@@ -1693,11 +1693,30 @@ shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie,
if (op_ret < 0) {
/* Ignore absence of shards in the backend in truncate fop. */
- if (((local->fop == GF_FOP_TRUNCATE) ||
- (local->fop == GF_FOP_FTRUNCATE) ||
- (local->fop == GF_FOP_RENAME) ||
- (local->fop == GF_FOP_UNLINK)) && (op_errno == ENOENT))
- goto done;
+ switch (local->fop) {
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ case GF_FOP_RENAME:
+ case GF_FOP_UNLINK:
+ if (op_errno == ENOENT)
+ goto done;
+ break;
+ case GF_FOP_WRITE:
+ case GF_FOP_READ:
+ case GF_FOP_ZEROFILL:
+ case GF_FOP_DISCARD:
+ case GF_FOP_FALLOCATE:
+ if ((!local->first_lookup_done) &&
+ (op_errno == ENOENT)) {
+ local->create_count++;
+ goto done;
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* else */
gf_msg (this->name, GF_LOG_ERROR, op_errno,
SHARD_MSG_LOOKUP_SHARD_FAILED, "Lookup on shard %d "
"failed. Base file gfid = %s", shard_block_num,
@@ -1714,6 +1733,8 @@ shard_common_lookup_shards_cbk (call_frame_t *frame, void *cookie,
done:
call_count = shard_call_count_return (frame);
if (call_count == 0) {
+ if (!local->first_lookup_done)
+ local->first_lookup_done = _gf_true;
if (local->op_ret < 0)
goto unwind;
else
@@ -3197,47 +3218,6 @@ next:
}
int
-shard_post_lookup_shards_readv_handler (call_frame_t *frame, xlator_t *this)
-{
- shard_local_t *local = NULL;
-
- local = frame->local;
-
- if (local->op_ret < 0) {
- SHARD_STACK_UNWIND (readv, frame, local->op_ret,
- local->op_errno, NULL, 0, NULL, NULL, NULL);
- return 0;
- }
-
- shard_readv_do (frame, this);
-
- return 0;
-}
-
-int
-shard_post_mknod_readv_handler (call_frame_t *frame, xlator_t *this)
-{
- shard_local_t *local = NULL;
-
- local = frame->local;
-
- if (local->op_ret < 0) {
- SHARD_STACK_UNWIND (readv, frame, local->op_ret,
- local->op_errno, NULL, 0, NULL, NULL, NULL);
- return 0;
- }
-
- if (!local->eexist_count) {
- shard_readv_do (frame, this);
- } else {
- local->call_count = local->eexist_count;
- shard_common_lookup_shards (frame, this, local->loc.inode,
- shard_post_lookup_shards_readv_handler);
- }
- return 0;
-}
-
-int
shard_common_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
@@ -3267,6 +3247,7 @@ done:
call_count = shard_call_count_return (frame);
if (call_count == 0) {
SHARD_UNSET_ROOT_FS_ID (frame, local);
+ local->create_count = 0;
local->post_mknod_handler (frame, this);
}
@@ -3397,6 +3378,55 @@ err:
}
int
+shard_post_mknod_readv_handler (call_frame_t *frame, xlator_t *this);
+
+int
+shard_post_lookup_shards_readv_handler (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ SHARD_STACK_UNWIND (readv, frame, local->op_ret,
+ local->op_errno, NULL, 0, NULL, NULL, NULL);
+ return 0;
+ }
+
+ if (local->create_count) {
+ shard_common_resume_mknod (frame, this,
+ shard_post_mknod_readv_handler);
+ } else {
+ shard_readv_do (frame, this);
+ }
+
+ return 0;
+}
+
+int
+shard_post_mknod_readv_handler (call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ SHARD_STACK_UNWIND (readv, frame, local->op_ret,
+ local->op_errno, NULL, 0, NULL, NULL, NULL);
+ return 0;
+ }
+
+ if (!local->eexist_count) {
+ shard_readv_do (frame, this);
+ } else {
+ local->call_count = local->eexist_count;
+ shard_common_lookup_shards (frame, this, local->loc.inode,
+ shard_post_lookup_shards_readv_handler);
+ }
+ return 0;
+}
+
+int
shard_post_resolve_readv_handler (call_frame_t *frame, xlator_t *this)
{
shard_local_t *local = NULL;
@@ -3422,9 +3452,9 @@ shard_post_resolve_readv_handler (call_frame_t *frame, xlator_t *this)
}
if (local->call_count) {
- local->create_count = local->call_count;
- shard_common_resume_mknod (frame, this,
- shard_post_mknod_readv_handler);
+ shard_common_lookup_shards (frame, this,
+ local->resolver_base_inode,
+ shard_post_lookup_shards_readv_handler);
} else {
shard_readv_do (frame, this);
}
@@ -3576,14 +3606,11 @@ shard_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
shard_lookup_base_file (frame, this, &local->loc,
shard_post_lookup_readv_handler);
-
return 0;
-
err:
SHARD_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL,
NULL);
return 0;
-
}
int
@@ -3876,6 +3903,10 @@ next:
}
int
+shard_common_inode_write_post_mknod_handler (call_frame_t *frame,
+ xlator_t *this);
+
+int
shard_common_inode_write_post_lookup_shards_handler (call_frame_t *frame,
xlator_t *this)
{
@@ -3890,7 +3921,12 @@ shard_common_inode_write_post_lookup_shards_handler (call_frame_t *frame,
return 0;
}
- shard_common_inode_write_do (frame, this);
+ if (local->create_count) {
+ shard_common_resume_mknod (frame, this,
+ shard_common_inode_write_post_mknod_handler);
+ } else {
+ shard_common_inode_write_do (frame, this);
+ }
return 0;
}
@@ -3938,11 +3974,13 @@ shard_common_inode_write_post_lookup_handler (call_frame_t *frame,
local->postbuf = local->prebuf;
- if (local->create_count)
- shard_common_resume_mknod (frame, this,
- shard_common_inode_write_post_mknod_handler);
- else
+ if (local->call_count) {
+ shard_common_lookup_shards (frame, this,
+ local->resolver_base_inode,
+ shard_common_inode_write_post_lookup_shards_handler);
+ } else {
shard_common_inode_write_do (frame, this);
+ }
return 0;
}
@@ -3962,8 +4000,6 @@ shard_common_inode_write_post_resolve_handler (call_frame_t *frame,
return 0;
}
- local->create_count = local->call_count;
-
shard_lookup_base_file (frame, this, &local->loc,
shard_common_inode_write_post_lookup_handler);
return 0;
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
index 09232a45f24..73195983aa4 100644
--- a/xlators/features/shard/src/shard.h
+++ b/xlators/features/shard/src/shard.h
@@ -255,6 +255,7 @@ typedef struct shard_local {
shard_lock_t *shard_lock;
} lock;
inode_t *resolver_base_inode;
+ gf_boolean_t first_lookup_done;
} shard_local_t;
typedef struct shard_inode_ctx {