From 50d72373fadf1174918e754e31bae3806aa4c214 Mon Sep 17 00:00:00 2001 From: Sanoj Unnikrishnan Date: Thu, 29 Jun 2017 15:39:35 +0530 Subject: Scripts to identify quota accouting issues The patch contains 2 scripts: log_accounting.sh does a du -h on the FS hierarchy and a quota list on the hierarchy and interleaves the two output. We can then identify which directory(s) in FS has caused the accounting to go bad and try to investigate what fops happened on those directories. We can also limit the set of directories on which we need to set dirty xattr to correct accounting. xattr_analysis.py reads all the xattr of a brick and dumps it a human readable form to ease debugging. Change-Id: I2155561d10c08dc3ab9e8b09dbd258f0592b4d33 BUG: 1466188 Signed-off-by: Sanoj Unnikrishnan Reviewed-on: https://review.gluster.org/17649 CentOS-regression: Gluster Build System Smoke: Gluster Build System Reviewed-by: Raghavendra G --- extras/contri-add.sh | 73 ------------------------------------------ extras/quota/contri-add.sh | 73 ++++++++++++++++++++++++++++++++++++++++++ extras/quota/log_accounting.sh | 26 +++++++++++++++ extras/quota/xattr_analysis.py | 72 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 171 insertions(+), 73 deletions(-) delete mode 100755 extras/contri-add.sh create mode 100755 extras/quota/contri-add.sh create mode 100755 extras/quota/log_accounting.sh create mode 100755 extras/quota/xattr_analysis.py diff --git a/extras/contri-add.sh b/extras/contri-add.sh deleted file mode 100755 index 7db5edd5d20..00000000000 --- a/extras/contri-add.sh +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/bash - -# This script adds contributions of files/directories in backend to volume -# size. -# It can also be used to debug by passing dir as first argument, in which case -# it will just add contributions from immediate children of a directory and -# displays only if added contributions from immediate children is different -# from size stored in directory. -# For Eg., find -type d -exec ./contri-add.sh dir \{} \; -# will list all the directories which have descrepancies in their -# size/contributions. - -usage () -{ - echo >&2 "usage: $0 " -} - -add_contributions () -{ - local var=0 - local count=0 - - SIZE=`getfattr -h -e hex -n trusted.glusterfs.quota.size $2 2>&1 | sed -e '/^#/d' | sed -e '/^getfattr/d' | sed -e '/^$/d' | cut -d'=' -f 2` - CONTRI=`getfattr -h -e hex -d -m trusted.glusterfs.quota.*.contri $2 2>&1 | sed -e '/^#/d' | sed -e '/^getfattr/d' | sed -e '/^$/d' | cut -d'=' -f 2` - - if [ $1 == "file" ]; then - PATHS=`find $2 ! -type d | sed -e "\|^$2$|d" | sed -e '/^[ \t]*$/d'` - else - PATHS=`find $2 -maxdepth 1 | sed -e "\|^$2$|d" | sed -e '/^[ \t]*$/d'` - fi - - if [ -z "$PATHS" ]; then - return 0 - fi - - CONTRIBUTIONS=`echo $PATHS | xargs getfattr -h -e hex -d -m trusted.glusterfs.quota.*.contri 2>&1 | sed -e '/^#/d' | sed -e '/^getfattr/d' | sed -e '/^$/d' | cut -d'=' -f 2 | sed -e 's/^[ \t]*\([^ \t]*\)/\1/g'` - - if [ -n "$CONTRIBUTIONS" ]; then - for i in $CONTRIBUTIONS; do - count=$(($count + 1)) - var=$(($var + $i)) - done - fi - - if [ $1 == "file" ] || [ $var -ne $(($SIZE)) ] || [ $(($SIZE)) -ne $(($CONTRI)) ]; then - if [ $1 == "dir" ]; then - TMP_PATH=`echo $2 | sed -e "s/\/home\/export\/[0-9]*/\/mnt\/raghu/g"` - stat $TMP_PATH > /dev/null - fi - - echo "file count $count" - echo "added contribution of $2=$var" - echo "size stored in xattrs on $2=$(($SIZE))" - echo "contribution of $2 to its parent directory=$(($CONTRI))" - echo "==============================================================" - fi -} - - -main () -{ - [ $# -lt 1 ] && usage - - TYPE=$1 - - shift 1 - - for i in $@; do - add_contributions $TYPE $i - done -} - -main $@ \ No newline at end of file diff --git a/extras/quota/contri-add.sh b/extras/quota/contri-add.sh new file mode 100755 index 00000000000..7db5edd5d20 --- /dev/null +++ b/extras/quota/contri-add.sh @@ -0,0 +1,73 @@ +#!/bin/bash + +# This script adds contributions of files/directories in backend to volume +# size. +# It can also be used to debug by passing dir as first argument, in which case +# it will just add contributions from immediate children of a directory and +# displays only if added contributions from immediate children is different +# from size stored in directory. +# For Eg., find -type d -exec ./contri-add.sh dir \{} \; +# will list all the directories which have descrepancies in their +# size/contributions. + +usage () +{ + echo >&2 "usage: $0 " +} + +add_contributions () +{ + local var=0 + local count=0 + + SIZE=`getfattr -h -e hex -n trusted.glusterfs.quota.size $2 2>&1 | sed -e '/^#/d' | sed -e '/^getfattr/d' | sed -e '/^$/d' | cut -d'=' -f 2` + CONTRI=`getfattr -h -e hex -d -m trusted.glusterfs.quota.*.contri $2 2>&1 | sed -e '/^#/d' | sed -e '/^getfattr/d' | sed -e '/^$/d' | cut -d'=' -f 2` + + if [ $1 == "file" ]; then + PATHS=`find $2 ! -type d | sed -e "\|^$2$|d" | sed -e '/^[ \t]*$/d'` + else + PATHS=`find $2 -maxdepth 1 | sed -e "\|^$2$|d" | sed -e '/^[ \t]*$/d'` + fi + + if [ -z "$PATHS" ]; then + return 0 + fi + + CONTRIBUTIONS=`echo $PATHS | xargs getfattr -h -e hex -d -m trusted.glusterfs.quota.*.contri 2>&1 | sed -e '/^#/d' | sed -e '/^getfattr/d' | sed -e '/^$/d' | cut -d'=' -f 2 | sed -e 's/^[ \t]*\([^ \t]*\)/\1/g'` + + if [ -n "$CONTRIBUTIONS" ]; then + for i in $CONTRIBUTIONS; do + count=$(($count + 1)) + var=$(($var + $i)) + done + fi + + if [ $1 == "file" ] || [ $var -ne $(($SIZE)) ] || [ $(($SIZE)) -ne $(($CONTRI)) ]; then + if [ $1 == "dir" ]; then + TMP_PATH=`echo $2 | sed -e "s/\/home\/export\/[0-9]*/\/mnt\/raghu/g"` + stat $TMP_PATH > /dev/null + fi + + echo "file count $count" + echo "added contribution of $2=$var" + echo "size stored in xattrs on $2=$(($SIZE))" + echo "contribution of $2 to its parent directory=$(($CONTRI))" + echo "==============================================================" + fi +} + + +main () +{ + [ $# -lt 1 ] && usage + + TYPE=$1 + + shift 1 + + for i in $@; do + add_contributions $TYPE $i + done +} + +main $@ \ No newline at end of file diff --git a/extras/quota/log_accounting.sh b/extras/quota/log_accounting.sh new file mode 100755 index 00000000000..9c2381f6a95 --- /dev/null +++ b/extras/quota/log_accounting.sh @@ -0,0 +1,26 @@ +#!/usr/bin/bash +# The script does an accounting of all directories using command 'du' and +# using gluster. We can then compare the two to identify accounting mismatch +# THere can be minor mismatch because gluster only accounts for the size of +# files. Direcotries can take up upto 4kB space on FS per directory. THis +# size is accounted by du and not by gluster. However the difference would +# not be significant. + +mountpoint=$1 +volname=$2 + +usage () +{ + echo >&2 "usage: $0 " + exit +} + +[ $# -lt 2 ] && usage + +cd $mountpoint +du -h | head -n -1 | tr -d '.' |awk '{ for (i = 2; i <= NF; i++) { printf("%s ", $i);} print "" }' > /tmp/gluster_quota_1 +cat /tmp/gluster_quota_1 | sed 's/ $//' | sed 's/ /\\ /g' | sed 's/(/\\(/g' | sed 's/)/\\)/g' |xargs gluster v quota $volname list > /tmp/gluster_quota_2 +du -h | head -n -1 |awk '{ for (i = 2; i <= NF; i++) { printf("%s %s", $i, $1);} print "" }' | tr -d '.' > /tmp/gluster_quota_3 +cat /tmp/gluster_quota_2 /tmp/gluster_quota_3 | sort > /tmp/gluster_quota_4 +find . -type d > /tmp/gluster_quota_5 +tar -cvf /tmp/gluster_quota_files.tar /tmp/gluster_quota_* diff --git a/extras/quota/xattr_analysis.py b/extras/quota/xattr_analysis.py new file mode 100755 index 00000000000..d3d1a74170b --- /dev/null +++ b/extras/quota/xattr_analysis.py @@ -0,0 +1,72 @@ +#!/usr/bin/python +# Below script has to purpose +# 1. Display xattr of entire FS tree in a human readable form +# 2. Display all the directory where contri and size mismatch. +# (If there are any directory with contri and size mismatch that are not dirty +# then that highlights a propogation issue) +# The script takes only one input LOG _FILE generated from the command, +# find | xargs getfattr -d -m. -e hex > log_gluster_xattr + +import re +import subprocess +import sys +from hurry.filesize import size + +if len(sys.argv) < 2: + sys.exit('Usage: %s log_gluster_xattr \n' + 'to genereate log_gluster_xattr use: \n' + 'find | xargs getfattr -d -m. -e hex > log_gluster_xattr' + % sys.argv[0]) +LOG_FILE=sys.argv[1] + +def get_quota_xattr_brick(): + out = subprocess.check_output (["/usr/bin/cat", LOG_FILE]) + pairs = out.splitlines() + + xdict = {} + mismatch_size = [('====contri_size===', '====size====')] + for xattr in pairs: + k = xattr.split("=")[0] + if re.search("# file:",k): + print xdict + filename=k + print "=====" + filename + "=======" + xdict = {} + elif k is "": + pass + else: + print xattr + v = xattr.split("=")[1] + if re.search("contri",k): + if len(v) == 34: + # for files size is obtained in iatt, file count should be 1, dir count=0 + xdict['contri_file_count'] = int(v[18:34], 16) + xdict['contri_dir_count'] = 0 + else: + xdict['contri_size'] = size(int(v[2:18], 16)) + xdict['contri_file_count'] = int(v[18:34], 16) + xdict['contri_dir_count'] = int(v[34:], 16) + elif re.search("size",k): + xdict['size'] = size(int(v[2:18], 16)) + xdict['file_count'] = int(v[18:34], 16) + xdict['dir_count'] = int(v[34:], 16) + elif re.search("dirty",k): + if v == '0x3000': + xdict['dirty'] = False + elif v == '0x3100': + xdict['dirty'] = True + elif re.search("limit_objects",k): + xdict['limit_objects'] = int(v[2:18], 16) + elif re.search("limit_set",k): + xdict['limit_set'] = size(int(v[2:18], 16)) + + if 'size' in xdict and 'contri_size' in xdict and xdict['size'] != xdict['contri_size']: + mismatch_size.append((xdict['contri_size'], xdict['size'], filename)) + + for values in mismatch_size: + print values + + +if __name__ == '__main__': + get_quota_xattr_brick() + -- cgit