From 6630fff4812f4e8617336b98d8e3ac35976e5990 Mon Sep 17 00:00:00 2001 From: Shyam Date: Tue, 12 Aug 2014 10:48:27 -0400 Subject: cluster/dht: Fix dht_access treating directory like files When the cluster topology changes due to add-brick, all sub volumes of DHT will not contain the directories till a rebalance is completed. Till the rebalance is run, if a caller bypasses lookup and calls access due to saved/cached inode information (like NFS server does) then, dht_access misreads the error (ESTALE/ENOENT) from the new subvolumes and incorrectly tries to handle the inode as a file. This results in the directories in memory state in DHT to be corrupted and not heal even post a rebalance. This commit fixes the problem in dht_access thereby preventing DHT from misrepresenting a directory as a file in the case presented above. Change-Id: Idcdaa3837db71c8fe0a40ec0084a6c3dbe27e772 BUG: 1125824 Signed-off-by: Shyam Reviewed-on: http://review.gluster.org/8462 Tested-by: Gluster Build System Reviewed-by: Vijay Bellur --- tests/bugs/bug-1125824.t | 100 +++++++++++++++++++++++++++++++ xlators/cluster/dht/src/dht-inode-read.c | 9 +-- 2 files changed, 105 insertions(+), 4 deletions(-) create mode 100755 tests/bugs/bug-1125824.t diff --git a/tests/bugs/bug-1125824.t b/tests/bugs/bug-1125824.t new file mode 100755 index 00000000000..fb4fb00cf88 --- /dev/null +++ b/tests/bugs/bug-1125824.t @@ -0,0 +1,100 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc +. $(dirname $0)/../nfs.rc + +create_files () { + for i in {1..10}; do + orig=$(printf %s/file%04d $1 $i) + echo "This is file $i" > $orig + done + for i in {1..10}; do + mkdir $(printf %s/dir%04d $1 $i) + done + sync +} + +create_dirs () { + for i in {1..10}; do + mkdir $(printf %s/dir%04d $1 $i) + create_files $(printf %s/dir%04d $1 $i) + done + sync +} + +stat_files () { + for i in {1..10}; do + orig=$(printf %s/file%04d $1 $i) + stat $orig + done + for i in {1..10}; do + stat $(printf %s/dir%04d $1 $i) + done + sync +} + +stat_dirs () { + for i in {1..10}; do + stat $(printf %s/dir%04d $1 $i) + stat_files $(printf %s/dir%04d $1 $i) + done + sync +} + +cleanup; + +TEST glusterd +TEST pidof glusterd +TEST $CLI volume info; + +TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}; + +EXPECT "$V0" volinfo_field $V0 'Volume Name'; +EXPECT 'Created' volinfo_field $V0 'Status'; +EXPECT '4' brick_count $V0 + +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; + +EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available; +TEST mount_nfs $H0:/$V0 $N0 + +# Create and poulate the NFS inode tables +TEST create_dirs $N0 +TEST stat_dirs $N0 + +# add-bricks changing the state of the volume where some bricks +# would have some directories and others would not +TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6,7,8} + +# Post this dht_access was creating a mess for directories which is fixed +# with this commit. The issues could range from getting ENOENT or +# ESTALE or entries missing to directories not having complete +# layouts. +TEST cd $N0 +TEST ls -lR + +TEST $CLI volume rebalance $V0 start force +EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0 + +# tests to check post rebalance if layouts and entires are fine and +# accessible by NFS to clear the volume +TEST ls -lR +rm -rf ./* +# There are additional bugs where NFS+DHT does not delete all entries +# on an rm -rf, so we do an additional rm -rf to ensure all is done +# and we are facing this transient issue, rather than a bad directory +# layout that is cached in memory +TEST rm -rf ./* + +# Get out of the mount, so that umount can work +TEST cd / + +EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0 +TEST $CLI volume stop $V0; +EXPECT 'Stopped' volinfo_field $V0 'Status'; +TEST $CLI volume delete $V0; +TEST ! $CLI volume info $V0; + +cleanup; diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c index 9a7b3e22354..d78dd2ea0ef 100644 --- a/xlators/cluster/dht/src/dht-inode-read.c +++ b/xlators/cluster/dht/src/dht-inode-read.c @@ -515,9 +515,9 @@ dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, goto out; if (local->call_cnt != 1) goto out; - if ((op_ret == -1) && (op_errno == ENOTCONN) && - IA_ISDIR(local->loc.inode->ia_type)) { - + if ((op_ret == -1) && ((op_errno == ENOTCONN) || + dht_inode_missing(op_errno)) && + IA_ISDIR(local->loc.inode->ia_type)) { subvol = dht_subvol_next_available (this, prev->this); if (!subvol) goto out; @@ -531,7 +531,8 @@ dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this, &local->loc, local->rebalance.flags, NULL); return 0; } - if ((op_ret == -1) && dht_inode_missing(op_errno)) { + if ((op_ret == -1) && dht_inode_missing(op_errno) && + !(IA_ISDIR(local->loc.inode->ia_type))) { /* File would be migrated to other node */ local->op_errno = op_errno; local->rebalance.target_op_fn = dht_access2; -- cgit