diff options
| -rwxr-xr-x | extras/quota/quota_fsck.py | 378 | 
1 files changed, 378 insertions, 0 deletions
diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py new file mode 100755 index 00000000000..0ca93a4b7d9 --- /dev/null +++ b/extras/quota/quota_fsck.py @@ -0,0 +1,378 @@ +#!/usr/bin/python +# The following script enables, Detecting, Reporting and Fixing +# anomalies in quota accounting. Run this script with -h option +# for further details. + +''' +   Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com> +   This file is part of GlusterFS. + +   This file is licensed to you under your choice of the GNU Lesser +   General Public License, version 3 or any later version (LGPLv3 or +   later), or the GNU General Public License, version 2 (GPLv2), in all +   cases as published by the Free Software Foundation. +''' +import os, sys, re +from stat import * +import subprocess +import argparse +import xattr + +aggr_size = {} +verbose_mode = False +mnt_path = None +brick_path = None +obj_fix_count = 0 +file_count = 0 +dir_count = 0 + +#CONSTANTS +KB = 1024 +MB = 1048576 +GB = 1048576 * 1024 +TB = 1048576 * 1048576 + +QUOTA_VERBOSE = 0 +QUOTA_META_ABSENT = 1 +QUOTA_SIZE_MISMATCH = 2 + +IS_DIRTY ='0x3100' +IS_CLEAN ='0x3000' + + +epilog_msg=''' +            The script attempts to find any gluster accounting issues in the +            filesystem at the given subtree. The script crawls the given +            subdirectory tree doing a stat for all files and compares the +            size reported by gluster quota with the size reported by stat +            calls. Any mismatch is reported. In addition integrity of marker +            xattrs are verified. +            ''' + +def print_msg(log_type, path, xattr_dict = {}, stbuf = "", dir_size = None): +    if log_type == QUOTA_VERBOSE: +        print '%-24s %-60s\nxattr_values: %s\n%s\n' % ("Verbose", path ,  xattr_dict, stbuf) +    elif log_type == QUOTA_META_ABSENT: +        print '%-24s %-60s\n%s\n' % ("Quota-Meta Absent", path , xattr_dict) +    elif log_type == QUOTA_SIZE_MISMATCH: +        print "mismatch" +        if dir_size is not None: +            print '%24s %60s %12s %12s' % ("Size Mismatch",path , xattr_dict['contri_size'], +                   dir_size) +        else: +            print '%-24s %-60s %-12i %-12i' % ("Size Mismatch",path , xattr_dict['contri_size'], +                   stbuf.st_size) + +def size_differs_lot(s1, s2): +    ''' +    There could be minor accounting differences between the stat based +    accounting and gluster accounting. To avoid these from throwing lot +    of false positives in our logs. using a threshold of 1M for now. +    TODO: For a deeply nested directory, at higher levels in hierarchy +    differences may not be significant, hence this check needs to be improved. +    ''' +    if abs(s1-s2) > 0: +        return True +    else: +        return False + +def fix_hardlink_accounting(curr_dict, accounted_dict, curr_size): +    ''' +            Hard links are messy.. we have to account them for their parent +            directory. But, stop accounting at the most common ancestor. +            Eg: +                say we have 3 hardlinks : /d1/d2/h1, /d1/d3/h2 and /d1/h3 + +            suppose we encounter the hard links h1 first , then h2 and then h3. +            while accounting for h1, we account the size until root(d2->d1->/) +            while accounting for h2, we need to account only till d3. (as d1 +            and / are accounted for this inode). +            while accounting for h3 we should not account at all.. as all +            its ancestors are already accounted for same inode. + +            curr_dict                : dict of hardlinks that were seen and +                                       accounted by the current iteration. +            accounted_dict           : dict of hardlinks that has already been +                                       accounted for. + +            size                     : size of the object as accounted by the +                                       curr_iteration. + +            Return vale: +            curr_size                : size reduced by hardlink sizes for those +                                       hardlinks that has already been accounted +                                       in current subtree. +            Also delete the duplicate link from curr_dict. +    ''' + +    dual_accounted_links = set(curr_dict.keys()) & set(accounted_dict.keys()) +    for link in dual_accounted_links: +        curr_size = curr_size - curr_dict[link] +        del curr_dict[link] +    return curr_size + + +def fix_xattr(file_name, mark_dirty): +    global obj_fix_count +    global mnt_path + +    if mnt_path is None: +        return +    if mark_dirty: +        print "MARKING DIRTY: " + file_name +        out = subprocess.check_output (["/usr/bin/setfattr", "-n", +                                       "trusted.glusterfs.quota.dirty", +                                       "-v", IS_DIRTY, file_name]) +    rel_path = os.path.relpath(file_name, brick_path) +    print "stat on "  + mnt_path + "/" + rel_path +    stbuf = os.lstat(mnt_path + "/" + rel_path) + +    obj_fix_count += 1 + +def get_quota_xattr_brick(dpath): +    out = subprocess.check_output (["/usr/bin/getfattr", "--no-dereference", +                                    "-d", "-m.", "-e", "hex", dpath]) +    pairs = out.splitlines() + +    ''' +    Sample output to be parsed: +    [root@dhcp35-100 mnt]# getfattr -d -m. -e hex /export/b1/B0/d14/d13/ +    # file: export/b1/B0/d14/d13/ +    security.selinux=0x756e636f6e66696e65645f753a6f626a6563745f723a7573725f743a733000 +    trusted.gfid=0xbae5e0d2d05043de9fd851d91ecf63e8 +    trusted.glusterfs.dht=0x000000010000000000000000ffffffff +    trusted.glusterfs.dht.mds=0x00000000 +    trusted.glusterfs.quota.6a7675a3-b85a-40c5-830b-de9229d702ce.contri.39=0x00000000000000000000000000000000000000000000000e +    trusted.glusterfs.quota.dirty=0x3000 +    trusted.glusterfs.quota.size.39=0x00000000000000000000000000000000000000000000000e +    ''' + +    ''' +    xattr_dict dictionary holds quota related xattrs +    eg: +    ''' + +    xattr_dict = {} +    xattr_dict['parents'] = {} + +    for xattr in pairs: +        xattr_key = xattr.split("=")[0] +        if re.search("# file:",xattr_key): +            # skip the file comment +            continue +        elif xattr_key is "": +            # skip any empty lines +            continue +        elif not re.search("quota",xattr_key): +            # skip all non quota xattr. +            continue + +        xattr_value = xattr.split("=")[1] +        if re.search("contri",xattr_key): + +            xattr_version = xattr_key.split(".")[5] +            if 'version' not in xattr_dict: +                xattr_dict['version'] = xattr_version +            else: +                if xattr_version != xattr_dict['version']: +                   print "Multiple xattr version found" + + +            cur_parent = xattr_key.split(".")[3] +            if cur_parent not in xattr_dict['parents']: +                xattr_dict['parents'][cur_parent] = {} + +            contri_dict = xattr_dict['parents'][cur_parent] +            if len(xattr_value) == 34: +                # 34 bytes implies file contri xattr +                # contri format =0x< 16bytes file size><16bytes file count> +                # size is obtained in iatt, file count = 1, dir count=0 +                contri_dict['contri_size'] = int(xattr_value[2:18], 16) +                contri_dict['contri_file_count'] = int(xattr_value[18:34], 16) +                contri_dict['contri_dir_count'] = 0 +            else: +                # This is a directory contri. +                contri_dict['contri_size'] = int(xattr_value[2:18], 16) +                contri_dict['contri_file_count'] = int(xattr_value[18:34], 16) +                contri_dict['contri_dir_count'] = int(xattr_value[34:], 16) + +        elif re.search("size",xattr_key): +            xattr_dict['size'] = int(xattr_value[2:18], 16) +            xattr_dict['file_count'] = int(xattr_value[18:34], 16) +            xattr_dict['dir_count'] = int(xattr_value[34:], 16) +        elif re.search("dirty",xattr_key): +            if xattr_value == IS_CLEAN: +                xattr_dict['dirty'] = False +            elif xattr_value == IS_DIRTY: +                xattr_dict['dirty'] = True +        elif re.search("limit_objects",xattr_key): +            xattr_dict['limit_objects'] = int(xattr_value[2:18], 16) +        elif re.search("limit_set",xattr_key): +            xattr_dict['limit_set'] = int(xattr_value[2:18], 16) + +    return xattr_dict + +def verify_file_xattr(path, stbuf = None): + +    global file_count +    file_count += 1 + +    if stbuf is None: +        stbuf = os.lstat(path) + +    xattr_dict = get_quota_xattr_brick(path) + +    for parent in xattr_dict['parents']: +        contri_dict = xattr_dict['parents'][parent] + +        if 'contri_size' not in contri_dict or \ +           'contri_file_count' not in contri_dict or \ +           'contri_dir_count' not in contri_dict: +            print_msg(QUOTA_META_ABSENT, path, xattr_dict, stbuf) +            fix_xattr(path, False) +            return +        elif size_differs_lot(contri_dict['contri_size'] , stbuf.st_size): +            print_msg(QUOTA_SIZE_MISMATCH, path, xattr_dict, stbuf) +            fix_xattr(path, False) +            return + +    if verbose_mode is True: +        print_msg(QUOTA_VERBOSE, path, xattr_dict, stbuf) + + +def verify_dir_xattr(path, dir_size): + +    global dir_count +    dir_count += 1 +    xattr_dict = get_quota_xattr_brick(path) + +    stbuf = os.lstat(path) + +    for parent in xattr_dict['parents']: +        contri_dict = xattr_dict['parents'][parent] + +        if 'size' not in xattr_dict or 'contri_size' not in contri_dict: +            print_msg(QUOTA_META_ABSENT, path) +            fix_xattr(path, True) +            return +        elif size_differs_lot(dir_size, xattr_dict['size']) or \ +             size_differs_lot(contri_dict['contri_size'] , xattr_dict['size']): +            print_msg(QUOTA_SIZE_MISMATCH, path, xattr_dict, stbuf, dir_size) +            fix_xattr(path, True) +            return + +    if verbose_mode is True: +        print_msg("VERBOSE", path, xattr_dict, stbuf, dir_size) + + +def walktree(t_dir, hard_link_dict): +    '''recursively descend the directory tree rooted at dir, +       aggregating the size +       t_dir            : directory to walk over. +       hard_link_dict   : dict of inodes with multiple hard_links under t_dir +    ''' +    global aggr_size +    aggr_size[t_dir] = 0 + +    for entry in os.listdir(t_dir): +        pathname = os.path.join(t_dir, entry) +        stbuf = os.lstat(pathname) +        if S_ISDIR(stbuf.st_mode): +            # It's a directory, recurse into it +            if entry == '.glusterfs': +                print "skipping " + pathname +                continue +            descendent_hardlinks = {} +            subtree_size = walktree(pathname, descendent_hardlinks) + +            subtree_size = fix_hardlink_accounting(descendent_hardlinks, +                                                   hard_link_dict, +                                                   subtree_size) + +            aggr_size[t_dir] = aggr_size[t_dir] + subtree_size + +        elif S_ISREG(stbuf.st_mode) or S_ISLNK(stbuf.st_mode): +            # Even a symbolic link file may have multiple hardlinks. + +            file_size = stbuf.st_size +            if stbuf.st_nlink > 2: +                # send a single element dict to check if file is accounted. +                file_size = fix_hardlink_accounting({stbuf.st_ino:stbuf.st_size}, +                                                    hard_link_dict, +                                                    stbuf.st_size) + +                if file_size == 0: +                    print_msg("HARD_LINK (skipped)", pathname, "", +                                stbuf) +                else: +                    print_msg("HARD_LINK (accounted)", pathname, "", +                               stbuf) +                    hard_link_dict[stbuf.st_ino] = stbuf.st_size + +            if t_dir in aggr_size: +                aggr_size[t_dir] = aggr_size[t_dir] + file_size +            else: +                aggr_size[t_dir] = file_size +            verify_file_xattr(pathname, stbuf) + +        else: +            # Unknown file type, print a message +            print 'Skipping %s, due to file mode' % pathname + +    if t_dir not in aggr_size: +        aggr_size[t_dir] = 0 + +    verify_dir_xattr(t_dir, aggr_size[t_dir]) +    # du also accounts for t_directory sizes +    # aggr_size[t_dir] += 4096 + +    #cleanup +    ret = aggr_size[t_dir] +    del aggr_size[t_dir] +    return ret + + +if __name__ == '__main__': + +    parser = argparse.ArgumentParser(description='Diagnose quota accounting issues.', epilog=epilog_msg) +    parser.add_argument('brick_path', nargs=1, +                        help='The brick path (or any descendent sub-directory of brick path)', +                        ) +    parser.add_argument('--full-logs', dest='verbose', action='store_true', +                   help=''' +                         log all the xattr values and stat values reported +                         for analysis. [CAUTION: This can give lot of output +                         depending on FS depth. So one has to make sure enough +                         disk space exists if redirecting to file] +                        ''' +                        ) +    parser.add_argument('--fix-issues',metavar='mount_path', dest='mnt', action='store', +                   help=''' +                         fix accounting issues where the xattr values disagree +                         with stat sizes reported by gluster. A mount is also +                         required for this option to be used. +                         [CAUTION: This will directly modify backend xattr] +                        ''' +                        ) +    parser.add_argument('--sub-dir',metavar='sub_dir', dest='sub_dir', action='store', +                   help=''' +                         limit the crawling and accounting verification/correction +                         to a specific subdirectory. +                        ''' +                        ) + +    args = parser.parse_args() +    verbose_mode = args.verbose +    brick_path = args.brick_path[0] +    sub_dir = args.sub_dir +    mnt_path = args.mnt +    hard_link_dict = {} +    if sub_dir is not None: +        walktree(os.path.join(brick_path, sub_dir), hard_link_dict) +    else: +        walktree(brick_path, hard_link_dict) + +    print "Files verified : " + str(file_count) +    print "Directories verified : " + str(dir_count) +    if mnt_path is not None: +        print "Objects Fixed : " + str(obj_fix_count)  | 
