diff options
| -rw-r--r-- | tests/basic/inode-leak.t | 8 | ||||
| -rw-r--r-- | tests/bugs/shard/bug-1605056-2.t | 34 | ||||
| -rw-r--r-- | tests/bugs/shard/bug-1605056.t | 63 | ||||
| -rw-r--r-- | tests/bugs/shard/shard-inode-refcount-test.t | 2 | ||||
| -rw-r--r-- | tests/volume.rc | 6 | ||||
| -rw-r--r-- | xlators/features/shard/src/shard.c | 48 | 
6 files changed, 140 insertions, 21 deletions
diff --git a/tests/basic/inode-leak.t b/tests/basic/inode-leak.t index 6bfbf572f03..e112fdddf8a 100644 --- a/tests/basic/inode-leak.t +++ b/tests/basic/inode-leak.t @@ -11,15 +11,15 @@ TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}  TEST $CLI volume start $V0  TEST glusterfs -s $H0 --volfile-id $V0 $M0 -EXPECT "1" get_mount_active_size_value $V0 -EXPECT "0" get_mount_lru_size_value $V0 +EXPECT "1" get_mount_active_size_value $V0 $M0 +EXPECT "0" get_mount_lru_size_value $V0 $M0  TEST cp -rf /etc $M0  TEST find $M0  TEST rm -rf $M0/* -EXPECT "1" get_mount_active_size_value $V0 -EXPECT "0" get_mount_lru_size_value $V0 +EXPECT "1" get_mount_active_size_value $V0 $M0 +EXPECT "0" get_mount_lru_size_value $V0 $M0  cleanup diff --git a/tests/bugs/shard/bug-1605056-2.t b/tests/bugs/shard/bug-1605056-2.t new file mode 100644 index 00000000000..a9c10fec3ea --- /dev/null +++ b/tests/bugs/shard/bug-1605056-2.t @@ -0,0 +1,34 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +cleanup + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume set $V0 features.shard on +TEST $CLI volume set $V0 features.shard-block-size 4MB +TEST $CLI volume set $V0 features.shard-lru-limit 25 +TEST $CLI volume set $V0 performance.write-behind off + +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 + +# Perform a write that would cause 25 shards to be created under .shard +TEST dd if=/dev/zero of=$M0/foo bs=1M count=104 + +# Write into another file bar to ensure all of foo's shards are evicted from lru list of $M0 +TEST dd if=/dev/zero of=$M0/bar bs=1M count=104 + +# Delete foo from $M0. If there's a bug, the mount will crash. +TEST unlink $M0/foo + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 + +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0 + +cleanup diff --git a/tests/bugs/shard/bug-1605056.t b/tests/bugs/shard/bug-1605056.t new file mode 100644 index 00000000000..c2329ea79f8 --- /dev/null +++ b/tests/bugs/shard/bug-1605056.t @@ -0,0 +1,63 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc + +SHARD_COUNT_TIME=5 + +cleanup + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2} +TEST $CLI volume set $V0 features.shard on +TEST $CLI volume set $V0 features.shard-block-size 4MB +TEST $CLI volume set $V0 features.shard-lru-limit 25 +TEST $CLI volume set $V0 performance.write-behind off + +TEST $CLI volume start $V0 + +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0 +TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M1 + +# Perform a write that would cause 25 shards to be created under .shard +TEST dd if=/dev/zero of=$M0/foo bs=1M count=104 + +# Read the file from $M1, indirectly filling up the lru list. +TEST `cat $M1/foo > /dev/null` +statedump=$(generate_mount_statedump $V0 $M1) +sleep 1 +EXPECT "25" echo $(grep "inode-count" $statedump | cut -f2 -d'=' | tail -1) +rm -f $statedump + +# Delete foo from $M0. +TEST unlink $M0/foo + +# Send stat on foo from $M1 to force $M1 to "forget" inode associated with foo. +# Now the ghost shards associated with "foo" are still in lru list of $M1. +TEST ! stat $M1/foo + +# Let's force the ghost shards of "foo" out of lru list by looking up more shards +# through I/O on a file named "bar" from $M1. This should crash if the base inode +# had been destroyed by now. + +TEST dd if=/dev/zero of=$M1/bar bs=1M count=104 + +############################################### +#### Now for some inode ref-leak tests ... #### +############################################### + +# Expect there to be 29 active inodes - 26 belonging to "bar", 1 for .shard, +# 1 for .shard/remove_me and 1 for '/' +EXPECT_WITHIN $SHARD_COUNT_TIME `expr 26 + 3` get_mount_active_size_value $V0 $M1 + +TEST rm -f $M1/bar +EXPECT_WITHIN $SHARD_COUNT_TIME 3 get_mount_active_size_value $V0 $M1 + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0 +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1 + +TEST $CLI volume stop $V0 +TEST $CLI volume delete $V0 + +cleanup diff --git a/tests/bugs/shard/shard-inode-refcount-test.t b/tests/bugs/shard/shard-inode-refcount-test.t index 087c8ba7815..3fd181be690 100644 --- a/tests/bugs/shard/shard-inode-refcount-test.t +++ b/tests/bugs/shard/shard-inode-refcount-test.t @@ -21,7 +21,7 @@ TEST dd if=/dev/zero conv=fsync of=$M0/one-plus-five-shards bs=1M count=23  ACTIVE_INODES_BEFORE=$(get_mount_active_size_value $V0)  TEST rm -f $M0/one-plus-five-shards  # Expect 5 inodes less. But one inode more than before because .remove_me would be created. -EXPECT_WITHIN $SHARD_COUNT_TIME `expr $ACTIVE_INODES_BEFORE - 5 + 1` get_mount_active_size_value $V0 +EXPECT_WITHIN $SHARD_COUNT_TIME `expr $ACTIVE_INODES_BEFORE - 5 + 1` get_mount_active_size_value $V0 $M0  EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0  TEST $CLI volume stop $V0 diff --git a/tests/volume.rc b/tests/volume.rc index cc0b5c670e4..261c6554d46 100644 --- a/tests/volume.rc +++ b/tests/volume.rc @@ -857,7 +857,8 @@ function get_active_fd_count {  function get_mount_active_size_value {          local vol=$1 -        local statedump=$(generate_mount_statedump $vol) +        local mount=$2 +        local statedump=$(generate_mount_statedump $vol $mount)          sleep 1          local val=$(grep "active_size" $statedump | cut -f2 -d'=' | tail -1)          rm -f $statedump @@ -866,7 +867,8 @@ function get_mount_active_size_value {  function get_mount_lru_size_value {          local vol=$1 -        local statedump=$(generate_mount_statedump $vol) +        local mount=$2 +        local statedump=$(generate_mount_statedump $vol $mount)          sleep 1          local val=$(grep "lru_size" $statedump | cut -f2 -d'=' | tail -1)          rm -f $statedump diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c index c2fde028f08..e721526d4bc 100644 --- a/xlators/features/shard/src/shard.c +++ b/xlators/features/shard/src/shard.c @@ -650,7 +650,8 @@ out:  inode_t *  __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this, -                                 inode_t *base_inode, int block_num) +                                 inode_t *base_inode, int block_num, +                                 uuid_t gfid)  {      char block_bname[256] = {          0, @@ -680,10 +681,13 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,              inode_ref(linked_inode);              if (base_inode)                  gf_uuid_copy(ctx->base_gfid, base_inode->gfid); +            else +                gf_uuid_copy(ctx->base_gfid, gfid);              ctx->block_num = block_num;              list_add_tail(&ctx->ilist, &priv->ilist_head);              priv->inode_count++; -            ctx->base_inode = base_inode; +            if (base_inode) +                ctx->base_inode = inode_ref(base_inode);          } else {              /*If on the other hand there is no available slot for this inode               * in the list, delete the lru inode from the head of the list, @@ -701,6 +705,8 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,               * deleted from fsync list and fsync'd in a new frame,               * and then unlinked in memory and forgotten.               */ +            if (!lru_base_inode) +                goto after_fsync_check;              LOCK(&lru_base_inode->lock);              LOCK(&lru_inode->lock);              { @@ -716,6 +722,7 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,              UNLOCK(&lru_inode->lock);              UNLOCK(&lru_base_inode->lock); +        after_fsync_check:              if (!do_fsync) {                  shard_make_block_bname(lru_inode_ctx->block_num,                                         lru_inode_ctx->base_gfid, block_bname, @@ -728,20 +735,31 @@ __shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,                  inode_forget(lru_inode, 0);              } else {                  fsync_inode = lru_inode; -                inode_unref(lru_base_inode); +                if (lru_base_inode) +                    inode_unref(lru_base_inode);              }              /* The following unref corresponds to the ref               * held by inode_find() above.               */              inode_unref(lru_inode); + +            /* The following unref corresponds to the ref held on the base shard +             * at the time of adding shard inode to lru list +             */ +            if (lru_base_inode) +                inode_unref(lru_base_inode); +              /* For as long as an inode is in lru list, we try to               * keep it alive by holding a ref on it.               */              inode_ref(linked_inode);              if (base_inode)                  gf_uuid_copy(ctx->base_gfid, base_inode->gfid); +            else +                gf_uuid_copy(ctx->base_gfid, gfid);              ctx->block_num = block_num; -            ctx->base_inode = base_inode; +            if (base_inode) +                ctx->base_inode = inode_ref(base_inode);              list_add_tail(&ctx->ilist, &priv->ilist_head);          }      } else { @@ -1025,7 +1043,7 @@ shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,              LOCK(&priv->lock);              {                  fsync_inode = __shard_update_shards_inode_list( -                    inode, this, res_inode, shard_idx_iter); +                    inode, this, res_inode, shard_idx_iter, gfid);              }              UNLOCK(&priv->lock);              shard_idx_iter++; @@ -2167,7 +2185,7 @@ shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,      LOCK(&priv->lock);      {          fsync_inode = __shard_update_shards_inode_list( -            linked_inode, this, local->loc.inode, block_num); +            linked_inode, this, local->loc.inode, block_num, gfid);      }      UNLOCK(&priv->lock);      if (fsync_inode) @@ -2875,17 +2893,18 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num)      shard_inode_ctx_t *ctx = NULL;      shard_inode_ctx_t *base_ictx = NULL;      gf_boolean_t unlink_unref_forget = _gf_false; +    int unref_base_inode = 0;      this = THIS;      priv = this->private;      inode = local->inode_list[shard_block_num - local->first_block]; -    base_inode = local->resolver_base_inode; +    shard_inode_ctx_get(inode, this, &ctx); +    base_inode = ctx->base_inode;      if (base_inode)          gf_uuid_copy(gfid, base_inode->gfid);      else -        gf_uuid_copy(gfid, local->base_gfid); - +        gf_uuid_copy(gfid, ctx->base_gfid);      shard_make_block_bname(shard_block_num, gfid, block_bname,                             sizeof(block_bname)); @@ -2898,17 +2917,16 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num)          if (!list_empty(&ctx->ilist)) {              list_del_init(&ctx->ilist);              priv->inode_count--; +            unref_base_inode++;              GF_ASSERT(priv->inode_count >= 0);              unlink_unref_forget = _gf_true;          }          if (ctx->fsync_needed) { -            if (base_inode) -                inode_unref(base_inode); +            unref_base_inode++;              list_del_init(&ctx->to_fsync_list); -            if (base_inode) { +            if (base_inode)                  __shard_inode_ctx_get(base_inode, this, &base_ictx); -                base_ictx->fsync_count--; -            } +            base_ictx->fsync_count--;          }      }      UNLOCK(&inode->lock); @@ -2919,6 +2937,8 @@ shard_unlink_block_inode(shard_local_t *local, int shard_block_num)          inode_unref(inode);          inode_forget(inode, 0);      } +    if (base_inode && unref_base_inode) +        inode_ref_reduce_by_n(base_inode, unref_base_inode);      UNLOCK(&priv->lock);  }  | 
