diff options
-rw-r--r-- | tests/basic/inode-leak.t | 8 | ||||
-rw-r--r-- | tests/bugs/shard/bug-1605056-2.t | 34 | ||||
-rw-r--r-- | tests/bugs/shard/bug-1605056.t | 63 | ||||
-rw-r--r-- | tests/bugs/shard/shard-inode-refcount-test.t | 2 | ||||
-rw-r--r-- | tests/volume.rc | 6 | ||||
-rw-r--r-- | xlators/features/shard/src/shard.c | 48 |
6 files changed, 140 insertions, 21 deletions
diff --git a/tests/basic/inode-leak.t b/tests/basic/inode-leak.t index 6bfbf572f03..e112fdddf8a 100644 --- a/tests/basic/inode-leak.t +++ b/tests/basic/inode-leak.t @@ -11,15 +11,15 @@ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3} TEST $CLI volume start $V0 TEST glusterfs -s $H0 --volfile-id $V0 $M0 -EXPECT "1" get_mount_active_size_value $V0 -EXPECT "0" get_mount_lru_size_value $V0 +EXPECT "1" get_mount_active_size_value $V0 $M0 +EXPECT "0" get_mount_lru_size_value $V0 $M0 TEST cp -rf /etc $M0 TEST find $M0 TEST rm -rf $M0/* -EXPECT "1" get_mount_active_size_value $V0 -EXPECT "0" get_mount_lru_size_value $V0 +EXPECT "1" get_mount_active_size_value $V0 $M0 +EXPECT "0" get_mount_lru_size_value $V0 $M0 cleanup diff --git a/tests/bugs/shard/bug-1605056-2.t b/tests/bugs/shard/bug-1605056-2.t new file mode 100644 index 00000000000..a9c10fec3ea --- /dev/null +++ b/tests/bugs/shard/bug-1605056-2.t @@ -0,0 +1,34 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume set $V0 features.shard on +TEST $CLI volume set $V0 features.shard-block-size 4MB +TEST $CLI volume set $V0 features.shard-lru-limit 25 +TEST $CLI volume set $V0 performance.write-behind off + +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 + +# Perform a write that would cause 25 shards to be created under .shard +TEST dd if=/dev/zero of=$M0/foo bs=1M count=104 + +# Write into another file bar to ensure all of foo's shards are evicted from lru list of $M0 +TEST dd if=/dev/zero of=$M0/bar bs=1M count=104 + +# Delete foo from $M0. If there's a bug, the mount will crash. +TEST unlink $M0/foo + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0 + +cleanup diff --git a/tests/bugs/shard/bug-1605056.t b/tests/bugs/shard/bug-1605056.t new file mode 100644 index 00000000000..c2329ea79f8 --- /dev/null +++ b/tests/bugs/shard/bug-1605056.t @@ -0,0 +1,63 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +SHARD_COUNT_TIME=5 + +cleanup + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume set $V0 features.shard on +TEST $CLI volume set $V0 features.shard-block-size 4MB +TEST $CLI volume set $V0 features.shard-lru-limit 25 +TEST $CLI volume set $V0 performance.write-behind off + +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M1 + +# Perform a write that would cause 25 shards to be created under .shard +TEST dd if=/dev/zero of=$M0/foo bs=1M count=104 + +# Read the file from $M1, indirectly filling up the lru list. +TEST `cat $M1/foo > /dev/null` +statedump=$(generate_mount_statedump $V0 $M1) +sleep 1 +EXPECT "25" echo $(grep "inode-count" $statedump | cut -f2 -d'=' | tail -1) +rm -f $statedump + +# Delete foo from $M0. +TEST unlink $M0/foo + +# Send stat on foo from $M1 to force $M1 to "forget" inode associated with foo. +# Now the ghost shards associated with "foo" are still in lru list of $M1. +TEST ! stat $M1/foo + +# Let's force the ghost shards of "foo" out of lru list by looking up more shards +# through I/O on a file named "bar" from $M1. This should crash if the base inode +# had been destroyed by now. + +TEST dd if=/dev/zero of=$M1/bar bs=1M count=104 + +############################################### +#### Now for some inode ref-leak tests ... #### +############################################### + +# Expect there to be 29 active inodes - 26 belonging to "bar", 1 for .shard, +# 1 for .shard/remove_me and 1 for '/' +EXPECT_WITHIN $SHARD_COUNT_TIME `expr 26 + 3` get_mount_active_size_value $V0 $M1 + +TEST rm -f $M1/bar +EXPECT_WITHIN $SHARD_COUNT_TIME 3 get_mount_active_size_value $V0 $M1 + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1 + +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0 + +cleanup diff --git a/tests/bugs/shard/shard-inode-refcount-test.t b/tests/bugs/shard/shard-inode-refcount-test.t index 087c8ba7815..3fd181be690 100644 --- a/tests/bugs/shard/shard-inode-refcount-test.t +++ b/tests/bugs/shard/shard-inode-refcount-test.t @@ -21,7 +21,7 @@ TEST dd if=/dev/zero conv=fsync of=$M0/one-plus-five-shards bs=1M count=23 ACTIVE_INODES_BEFORE=$(get_mount_active_size_value $V0) TEST rm -f $M0/one-plus-five-shards # Expect 5 inodes less. But one inode more than before because .remove_me would be created. -EXPECT_WITHIN $SHARD_COUNT_TIME `expr $ACTIVE_INODES_BEFORE - 5 + 1` get_mount_active_size_value $V0 +EXPECT_WITHIN $SHARD_COUNT_TIME `expr $ACTIVE_INODES_BEFORE - 5 + 1` get_mount_active_size_value $V0 $M0 EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 TEST $CLI volume stop $V0 diff --git a/tests/volume.rc b/tests/volume.rc index cc0b5c670e4..261c6554d46 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -857,7 +857,8 @@ function get_active_fd_count { function get_mount_active_size_value { local vol=$1 - local statedump=$(generate_mount_statedump $vol) + local mount=$2 + local statedump=$(generate_mount_statedump $vol $mount) sleep 1 local val=$(grep "active_size" $statedump | cut -f2 -d'=' | tail -1) rm -f $statedump @@ -866,7 +867,8 @@ function get_mount_active_size_value { function get_mount_lru_size_value { local vol=$1 - local statedump=$(generate_mount_statedump $vol) + local mount=$2 + local statedump=$(generate_mount_statedump $vol $mount) sleep 1 local val=$(grep "lru_size" $statedump | cut -f2 -d'=' | tail -1) rm -f $statedump diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index 182678c9a64..d5f55efa0bc 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -650,7 +650,8 @@ out: inode_t * __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, - inode_t *base_inode, int block_num) + inode_t *base_inode, int block_num, + uuid_t gfid) { char block_bname[256] = { 0, @@ -680,10 +681,13 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, inode_ref(linked_inode); if (base_inode) gf_uuid_copy(ctx->base_gfid, base_inode->gfid); + else + gf_uuid_copy(ctx->base_gfid, gfid); ctx->block_num = block_num; list_add_tail(&ctx->ilist, &priv->ilist_head); priv->inode_count++; - ctx->base_inode = base_inode; + if (base_inode) + ctx->base_inode = inode_ref(base_inode); } else { /*If on the other hand there is no available slot for this inode * in the list, delete the lru inode from the head of the list, @@ -701,6 +705,8 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, * deleted from fsync list and fsync'd in a new frame, * and then unlinked in memory and forgotten. */ + if (!lru_base_inode) + goto after_fsync_check; LOCK(&lru_base_inode->lock); LOCK(&lru_inode->lock); { @@ -716,6 +722,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, UNLOCK(&lru_inode->lock); UNLOCK(&lru_base_inode->lock); + after_fsync_check: if (!do_fsync) { shard_make_block_bname(lru_inode_ctx->block_num, lru_inode_ctx->base_gfid, block_bname, @@ -728,20 +735,31 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, inode_forget(lru_inode, 0); } else { fsync_inode = lru_inode; - inode_unref(lru_base_inode); + if (lru_base_inode) + inode_unref(lru_base_inode); } /* The following unref corresponds to the ref * held by inode_find() above. */ inode_unref(lru_inode); + + /* The following unref corresponds to the ref held on the base shard + * at the time of adding shard inode to lru list + */ + if (lru_base_inode) + inode_unref(lru_base_inode); + /* For as long as an inode is in lru list, we try to * keep it alive by holding a ref on it. */ inode_ref(linked_inode); if (base_inode) gf_uuid_copy(ctx->base_gfid, base_inode->gfid); + else + gf_uuid_copy(ctx->base_gfid, gfid); ctx->block_num = block_num; - ctx->base_inode = base_inode; + if (base_inode) + ctx->base_inode = inode_ref(base_inode); list_add_tail(&ctx->ilist, &priv->ilist_head); } } else { @@ -1025,7 +1043,7 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this, LOCK(&priv->lock); { fsync_inode = __shard_update_shards_inode_list( - inode, this, res_inode, shard_idx_iter); + inode, this, res_inode, shard_idx_iter, gfid); } UNLOCK(&priv->lock); shard_idx_iter++; @@ -2167,7 +2185,7 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode, LOCK(&priv->lock); { fsync_inode = __shard_update_shards_inode_list( - linked_inode, this, local->loc.inode, block_num); + linked_inode, this, local->loc.inode, block_num, gfid); } UNLOCK(&priv->lock); if (fsync_inode) @@ -2875,17 +2893,18 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num) shard_inode_ctx_t *ctx = NULL; shard_inode_ctx_t *base_ictx = NULL; gf_boolean_t unlink_unref_forget = _gf_false; + int unref_base_inode = 0; this = THIS; priv = this->private; inode = local->inode_list[shard_block_num - local->first_block]; - base_inode = local->resolver_base_inode; + shard_inode_ctx_get(inode, this, &ctx); + base_inode = ctx->base_inode; if (base_inode) gf_uuid_copy(gfid, base_inode->gfid); else - gf_uuid_copy(gfid, local->base_gfid); - + gf_uuid_copy(gfid, ctx->base_gfid); shard_make_block_bname(shard_block_num, gfid, block_bname, sizeof(block_bname)); @@ -2898,17 +2917,16 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num) if (!list_empty(&ctx->ilist)) { list_del_init(&ctx->ilist); priv->inode_count--; + unref_base_inode++; GF_ASSERT(priv->inode_count >= 0); unlink_unref_forget = _gf_true; } if (ctx->fsync_needed) { - if (base_inode) - inode_unref(base_inode); + unref_base_inode++; list_del_init(&ctx->to_fsync_list); - if (base_inode) { + if (base_inode) __shard_inode_ctx_get(base_inode, this, &base_ictx); - base_ictx->fsync_count--; - } + base_ictx->fsync_count--; } } UNLOCK(&inode->lock); @@ -2919,6 +2937,8 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num) inode_unref(inode); inode_forget(inode, 0); } + if (base_inode && unref_base_inode) + inode_ref_reduce_by_n(base_inode, unref_base_inode); UNLOCK(&priv->lock); } |