diff options
| author | Xavier Hernandez <xhernandez@datalab.es> | 2014-10-08 09:20:11 +0200 | 
|---|---|---|
| committer | Vijay Bellur <vbellur@redhat.com> | 2014-10-22 01:05:19 -0700 | 
| commit | 4522acc20bdd1ca17c053969ef7edce1bb6ede76 (patch) | |
| tree | 5eaecfac3d913a0662de21f13344a2e037846621 /tests | |
| parent | d01b00ae2b124dfdd6905e463533a715f1cedc5b (diff) | |
ec: Fix self-heal issues
Problem: Doing an 'ls' of a directory that has been modified while one
         of the bricks was down, sometimes returns the old directory
         contents.
Cause: Directories are not marked when they are modified as files are.
       The ec xlator balances requests amongst available and healthy
       bricks. Since there is no way to detect that a directory is
       out of date in one of the bricks, it is used from time to time
       to return the directory contents.
Solution: Basically the solution consists in use versioning information
          also for directories, however some additional changes have
          been necessary.
Changes:
 * Use directory versioning:
     This required to lock full directory instead of a single entry for
     all requests that add or remove entries from it. This is needed to
     allow atomic version update. This affects the following fops:
         create, mkdir, mknod, link, symlink, rename, unlink, rmdir
     Another side effect is that opendir requires to do a previous
     lookup to get versioning information and discard out of date
     bricks for subsequent readdir(p) calls.
 * Restrict directory self-heal:
     Till now, when one discrepancy was found in lookup, a self-heal
     was automatically started. This caused the versioning information
     of a bad directory to be healed instantly, making the original
     problem to reapear again.
     To solve this, when a missing directory is detected in one or more
     bricks on lookup or opendir fops, only a partial self-heal is
     performed on it. A partial self-heal basically creates the
     directory but does not restore any additional information.
     This avoids that an 'ls' could repair the directory and cause the
     problem to happen again. With this change, output of 'ls' is
     always consistent. However, since the directory has been created
     in the brick, this allows any other operation on it (create new
     files, for example) to succeed on all bricks and not add additional
     work to the self-heal process.
     To force a self-heal of a directory, any other operation must be
     done on it. For example a getxattr.
     With these changes, the correct healing procedure that would avoid
     inconsistent directory browsing consists on a post-order traversal
     of directoriesi being healed. This way, the directory contents will
     be healed before healing the directory itslef.
 * Additional changes to fix self-heal errors
     - Don't use fop->fd to decide between fd/loc.
         open, opendir and create have an fd, but the correct data is in
         loc.
     - Fix incorrect management of bad bricks per inode/fd.
     - Fix incorrect selection of fop's target bricks when there are bad
       bricks involved.
     - Improved ec_loc_parent() to always return a parent loc as
       complete as possible.
This is a backport of http://review.gluster.org/8916/
Change-Id: Iaf3df174d7857da57d4a87b4a8740a7048b366ad
BUG: 1149727
Signed-off-by: Xavier Hernandez <xhernandez@datalab.es>
Reviewed-on: http://review.gluster.org/8946
Reviewed-by: Dan Lambright <dlambrig@redhat.com>
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'tests')
| -rw-r--r-- | tests/basic/ec/ec.t | 59 | ||||
| -rw-r--r-- | tests/basic/ec/self-heal.t | 157 | 
2 files changed, 169 insertions, 47 deletions
diff --git a/tests/basic/ec/ec.t b/tests/basic/ec/ec.t index 4c61f950fa5..91bad52499a 100644 --- a/tests/basic/ec/ec.t +++ b/tests/basic/ec/ec.t @@ -45,66 +45,97 @@ function check_truncate {  }  function check_hard_link { +    stat $M0/hard-link-1 +    stat $M0/hard-link-2      for b in $*; do          inum1=$(ls -i $b/hard-link-1 | cut -d' ' -f1)          inum2=$(ls -i $b/hard-link-2 | cut -d' ' -f1) -        [ "$inum1" = "$inum2" ] || return 1 +        if [ "$inum1" != "$inum2" ]; then +            echo "N" +            return 0 +        fi      done      echo "Y"      return 0  }  function check_soft_link { +    stat $M0/soft-link      for b in $*; do -        [ "$(readlink $b/soft-link)" = "soft-link-tgt" ] || return 1 +        if [ "$(readlink $b/soft-link)" != "soft-link-tgt" ]; then +            echo "N" +            return 0 +        fi      done      echo "Y"      return 0  }  function check_unlink { +    stat $M0/unlink      for b in $*; do -        [ ! -e $b/unlink ] || return 1 +        if [ -e $b/unlink ]; then +            echo "N" +            return 0 +        fi      done      echo "Y"      return 0  }  function check_mkdir { +    getfattr -m. -d $M0/mkdir      for b in $*; do -        [ -d $b/mkdir ] || return 1 +        if [ ! -d $b/mkdir ]; then +            echo "N" +            return 0 +        fi      done      echo "Y"      return 0  }  function check_rmdir { +    getfattr -m. -d $M0/rmdir      for b in $*; do -        [ ! -e $b/rmdir ] || return 1 +        if [ -e $b/rmdir ]; then +            echo "N" +            return 0 +        fi      done      echo "Y"      return 0  }  function check_setxattr { +    stat $M0/setxattr      for b in $*; do          v=$(my_getfattr -n user.foo $b/setxattr) -        [ "$v" = "ash_nazg_durbatuluk" ] || return 1 +        if [ "$v" != "ash_nazg_durbatuluk" ]; then +            echo "N" +            return 0 +        fi      done      echo "Y"      return 0  }  function check_removexattr { +    stat $M0/removexattr      for b in $*; do          my_getfattr -n user.bar $b/removexattr 2> /dev/null -        [ $? = 0 ] && return 1 +        if [ $? -eq 0 ]; then +            echo "N" +            return 0 +        fi      done      echo "Y"      return 0  }  function check_perm_file { +    stat $M0/perm_dir/perm_file +    getfattr -m. -d $M0/perm_dir      b1=$1      shift 1      ftext=$(stat -c "%u %g %a" $b1/perm_dir/perm_file) @@ -113,7 +144,8 @@ function check_perm_file {          btext=$(stat -c "%u %g %a" $b/perm_dir/perm_file)          #echo "  next u/a/a = $btext" > /dev/tty          if [ x"$btext" != x"$ftext" ]; then -            return 1 +            echo "N" +            return 0          fi      done      echo "Y" @@ -210,17 +242,6 @@ EXPECT_WITHIN $CHILD_UP_TIMEOUT "10" ec_child_up_count $V0 0  TEST check_create_write $M0  TEST check_truncate $M0 -TEST stat $M0/hard-link-1 -TEST stat $M0/hard-link-2 -TEST stat $M0/soft-link -TEST ! stat $M0/unlink -TEST ! stat $M0/rmdir -TEST stat $M0/mkdir -TEST stat $M0/setxattr -TEST stat $M0/removexattr -TEST stat $M0/perm_dir -TEST stat $M0/perm_dir/perm_file -  EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_hard_link $B0/${V0}{0..9}  EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_soft_link $B0/${V0}{0..9}  EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_unlink $B0/${V0}{0..9} diff --git a/tests/basic/ec/self-heal.t b/tests/basic/ec/self-heal.t index 9d5e5844062..3df19184169 100644 --- a/tests/basic/ec/self-heal.t +++ b/tests/basic/ec/self-heal.t @@ -7,12 +7,119 @@  cleanup -tmp=`mktemp -d` +function check_mount_dir +{ +    for i in {1..20}; do +        ls | grep "dir1" +        if [ $? -ne 0 ]; then +            return 1 +        fi +    done + +    return 0 +} + +function check_size +{ +    stat $1 +    for i in "${brick[@]}"; do +        res=`stat -c "%s" $i/$1` +        if [ "$res" != "$2" ]; then +            echo "N" +            return 0 +        fi +    done +    echo "Y" +    return 0 +} + +function check_mode +{ +    stat $1 +    for i in "${brick[@]}"; do +        res=`stat -c "%A" $i/$1` +        if [ "$res" != "$2" ]; then +            echo "N" +            return 0 +        fi +    done +    echo "Y" +    return 0 +} + +function check_date +{ +    stat $1 +    for i in "${brick[@]}"; do +        res=`stat -c "%Y" $i/$1` +        if [ "$res" != "$2" ]; then +            echo "N" +            return 0 +        fi +    done +    echo "Y" +    return 0 +} + +function check_xattr +{ +    stat $1 +    for i in "${brick[@]}"; do +        getfattr -n $2 $i/$1 2>/dev/null +        if [ $? -eq 0 ]; then +            echo "N" +            return 0 +        fi +    done +    echo "Y" +    return 0 +} + +function check_dir +{ +    getfattr -m. -d dir1 +    for i in "${brick[@]}"; do +        if [ ! -d $i/dir1 ]; then +            echo "N" +            return 0 +        fi +    done +    echo "Y" +    return 0 +} + +function check_soft_link +{ +    stat test3 +    for i in "${brick[@]}"; do +        if [ ! -h $i/test3 ]; then +            echo "N" +            return 0 +        fi +    done +    echo "Y" +    return 0 +} + +function check_hard_link +{ +    stat test4 +    for i in "${brick[@]}"; do +        if [ `stat -c "%h" $i/test4` -ne 3 ]; then +            echo "N" +            return 0 +        fi +    done +    echo "Y" +    return 0 +} + +tmp=`mktemp -d -t ${0##*/}.XXXXXX`  if [ ! -d $tmp ]; then      exit 1  fi -TESTS_EXPECTED_IN_LOOP=250 +TESTS_EXPECTED_IN_LOOP=194  TEST glusterd  TEST pidof glusterd @@ -20,7 +127,8 @@ TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}  EXPECT "Created" volinfo_field $V0 'Status'  TEST $CLI volume start $V0  EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status' -TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-id=/$V0 --volfile-server=$H0 $M0; +TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 +# Wait until all 6 childs have been recognized by the ec xlator  EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0  TEST dd if=/dev/urandom of=$tmp/test bs=1024 count=1024 @@ -45,12 +153,11 @@ for idx1 in {0..5}; do      TEST chmod 666 ${brick[$idx1]}/test      TEST truncate -s 0 ${brick[$idx1]}/test      TEST setfattr -n user.test -v "test1" ${brick[$idx1]}/test -    sleep 1      EXPECT "-rw-r--r--" stat -c "%A" test -    EXPECT_WITHIN $HEAL_TIMEOUT "262144" stat -c "%s" ${brick[$idx1]}/test -    EXPECT_WITHIN $HEAL_TIMEOUT "-rw-r--r--" stat -c "%A" ${brick[$idx1]}/test -    EXPECT_WITHIN $HEAL_TIMEOUT "946681200" stat -c "%Y" ${brick[$idx1]}/test -    TEST ! getfattr -n user.test ${brick[$idx1]}/test +    EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_size test "262144" +    EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_mode test "-rw-r--r--" +    EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_date test "946681200" +    EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_xattr test "user.test"  done  for idx1 in {0..4}; do @@ -62,16 +169,11 @@ for idx1 in {0..4}; do              TEST truncate -s 2097152 ${brick[$idx2]}/test              TEST setfattr -n user.test -v "test1" ${brick[$idx1]}/test              TEST setfattr -n user.test -v "test2" ${brick[$idx2]}/test -            sleep 1              EXPECT "-rw-r--r--" stat -c "%A" test -            EXPECT_WITHIN $HEAL_TIMEOUT "262144" stat -c "%s" ${brick[$idx1]}/test -            EXPECT_WITHIN $HEAL_TIMEOUT "262144" stat -c "%s" ${brick[$idx2]}/test -            EXPECT_WITHIN $HEAL_TIMEOUT "-rw-r--r--" stat -c "%A" ${brick[$idx1]}/test -            EXPECT_WITHIN $HEAL_TIMEOUT "-rw-r--r--" stat -c "%A" ${brick[$idx2]}/test -            EXPECT_WITHIN $HEAL_TIMEOUT "946681200" stat -c "%Y" ${brick[$idx1]}/test -            EXPECT_WITHIN $HEAL_TIMEOUT "946681200" stat -c "%Y" ${brick[$idx2]}/test -            TEST ! getfattr -n user.test ${brick[$idx1]}/test -            TEST ! getfattr -n user.test ${brick[$idx2]}/test +            EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_size test "262144" +            EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_mode test "-rw-r--r--" +            EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_date test "946681200" +            EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_xattr test "user.test"          fi      done  done @@ -95,27 +197,26 @@ EXPECT "2" stat -c "%h" test2  EXPECT "2" stat -c "%h" test4  TEST $CLI volume start $V0 force +# Wait until the killed bricks have been started and recognized by the ec +# xlator  EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 +TEST check_mount_dir +  EXPECT "1048576" stat -c "%s" test2  EXPECT "-rwxrwxrwx" stat -c "%A" test2 -EXPECT_WITHIN $HEAL_TIMEOUT "262144" stat -c "%s" ${brick[0]}/test2 -EXPECT_WITHIN $HEAL_TIMEOUT "262144" stat -c "%s" ${brick[1]}/test2 -EXPECT_WITHIN $HEAL_TIMEOUT "-rwxrwxrwx" stat -c "%A" ${brick[0]}/test2 -EXPECT_WITHIN $HEAL_TIMEOUT "-rwxrwxrwx" stat -c "%A" ${brick[1]}/test2 +EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_size test2 "262144" +EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_mode test2 "-rwxrwxrwx"  TEST ls -al dir1 -EXPECT_WITHIN $HEAL_TIMEOUT "1" eval "if [ -d ${brick[0]}/dir1 ]; then echo 1; fi" -EXPECT_WITHIN $HEAL_TIMEOUT "1" eval "if [ -d ${brick[1]}/dir1 ]; then echo 1; fi" +EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_dir  TEST [ -h test3 ] -EXPECT_WITHIN $HEAL_TIMEOUT "1" eval "if [ -h ${brick[0]}/test3 ]; then echo 1; fi" -EXPECT_WITHIN $HEAL_TIMEOUT "1" eval "if [ -h ${brick[1]}/test3 ]; then echo 1; fi" +EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_soft_link  EXPECT "2" stat -c "%h" test4 -EXPECT_WITHIN $HEAL_TIMEOUT "3" stat -c "%h" ${brick[0]}/test4 -EXPECT_WITHIN $HEAL_TIMEOUT "3" stat -c "%h" ${brick[1]}/test4 +EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_hard_link -rm -rf $tmp +TEST rm -rf $tmp  cleanup  | 
