diff options
author | Kotresh HR <khiremat@redhat.com> | 2015-01-29 15:53:19 +0530 |
---|---|---|
committer | Vijay Bellur <vbellur@redhat.com> | 2015-03-15 21:20:03 -0700 |
commit | 7a9a66cc5fb7f06118fab1fc2ae1c43cfbb1178f (patch) | |
tree | 11a1b53b1410c7bd9b9cf2424b2e75118bd86d18 /tools/gfind_missing_files/gfid_to_path.py | |
parent | 38e342ca4a2167720bea82d3cee7fca08baba666 (diff) |
tools: Finds missing files in gluster volume given backend brickpath
The tool finds the missing files in a geo-replication slave volume.
The tool crawls backend .glusterfs of the brickpath, which is passed
as a parameter and stats each entry on slave volume mount to check
the presence of file. The mount used is aux-gfid-mount, hence no path
conversion is required and is fast. The tool needs to be run on every
node in cluster for each brickpath of geo-rep master volume to find
missing files on slave volume. The tool is generic enough and can be
used in non geo-replication context as well.
Most of the crawler code is leverged from Avati's xfind and is modified
to crawl only .glusterfs (https://github.com/avati/xsync)
Thanks Aravinda for scripts to convert gfid to path.
Change-Id: I84deaaaf638f7c571ff1319b67a3440fe27da810
BUG: 1187140
Signed-off-by: Aravinda VK <avishwan@redhat.com>
Signed-off-by: Kotresh HR <khiremat@redhat.com>
Reviewed-on: http://review.gluster.org/9503
Tested-by: Gluster Build System <jenkins@build.gluster.com>
Reviewed-by: Vijay Bellur <vbellur@redhat.com>
Diffstat (limited to 'tools/gfind_missing_files/gfid_to_path.py')
-rw-r--r-- | tools/gfind_missing_files/gfid_to_path.py | 162 |
1 files changed, 162 insertions, 0 deletions
diff --git a/tools/gfind_missing_files/gfid_to_path.py b/tools/gfind_missing_files/gfid_to_path.py new file mode 100644 index 00000000000..8362f68b955 --- /dev/null +++ b/tools/gfind_missing_files/gfid_to_path.py @@ -0,0 +1,162 @@ +#!/usr/bin/env python + +# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> +# This file is part of GlusterFS. +# +# This file is licensed to you under your choice of the GNU Lesser +# General Public License, version 3 or any later version (LGPLv3 or +# later), or the GNU General Public License, version 2 (GPLv2), in all +# cases as published by the Free Software Foundation. + +import sys +import os +import xattr +import uuid +import re +import errno + +CHANGELOG_SEARCH_MAX_TRY = 31 +DEC_CTIME_START = 5 +ROOT_GFID = "00000000-0000-0000-0000-000000000001" +MAX_NUM_CHANGELOGS_TRY = 2 + + +def output_not_found(gfid): + # Write GFID to stderr + sys.stderr.write("%s\n" % gfid) + + +def output_success(path): + # Write converted Path to Stdout + sys.stdout.write("%s\n" % path) + + +def full_dir_path(gfid): + out_path = "" + while True: + path = os.path.join(".glusterfs", gfid[0:2], gfid[2:4], gfid) + path_readlink = os.readlink(path) + pgfid = os.path.dirname(path_readlink) + out_path = os.path.join(os.path.basename(path_readlink), out_path) + if pgfid == "../../00/00/%s" % ROOT_GFID: + out_path = os.path.join("./", out_path) + break + gfid = os.path.basename(pgfid) + return out_path + + +def find_path_from_changelog(fd, gfid): + """ + In given Changelog File, finds using following pattern + <T><GFID>\x00<TYPE>\x00<MODE>\x00<UID>\x00<GID>\x00<PARGFID>/<BASENAME> + Pattern search finds PARGFID and BASENAME, Convert PARGFID to Path + Using readlink and add basename to form Full path. + """ + content = fd.read() + + pattern = "E%s" % gfid + pattern += "\x00(3|23)\x00\d+\x00\d+\x00\d+\x00([^\x00]+)/([^\x00]+)" + pat = re.compile(pattern) + match = pat.search(content) + + if match: + pgfid = match.group(2) + basename = match.group(3) + if pgfid == ROOT_GFID: + return os.path.join("./", basename) + else: + full_path_parent = full_dir_path(pgfid) + if full_path_parent: + return os.path.join(full_path_parent, basename) + + return None + + +def gfid_to_path(gfid): + """ + Try readlink, if it is directory it succeeds. + Get ctime of the GFID file, Decrement by 5 sec + Search for Changelog filename, Since Changelog file generated + every 15 sec, Search and get immediate next Changelog after the file + Creation. Get the Path by searching in Changelog file. + Get the resultant file's GFID and Compare with the input, If these + GFIDs are different then Some thing is changed(May be Rename) + """ + gfid = gfid.strip() + gpath = os.path.join(".glusterfs", gfid[0:2], gfid[2:4], gfid) + try: + output_success(full_dir_path(gfid)) + return + except OSError: + # Not an SymLink + pass + + try: + ctime = int(os.stat(gpath).st_ctime) + ctime -= DEC_CTIME_START + except (OSError, IOError): + output_not_found(gfid) + return + + path = None + found_changelog = False + changelog_parse_try = 0 + for i in range(CHANGELOG_SEARCH_MAX_TRY): + cl = os.path.join(".glusterfs/changelogs", "CHANGELOG.%s" % ctime) + + try: + with open(cl, "rb") as f: + changelog_parse_try += 1 + found_changelog = True + path = find_path_from_changelog(f, gfid) + if not path and changelog_parse_try < MAX_NUM_CHANGELOGS_TRY: + ctime += 1 + continue + break + except (IOError, OSError) as e: + if e.errno == errno.ENOENT: + ctime += 1 + else: + break + + if not found_changelog: + output_not_found(gfid) + return + + if not path: + output_not_found(gfid) + return + gfid1 = str(uuid.UUID(bytes=xattr.get(path, "trusted.gfid"))) + if gfid != gfid1: + output_not_found(gfid) + return + + output_success(path) + + +def main(): + num_arguments = 3 + if not sys.stdin.isatty(): + num_arguments = 2 + + if len(sys.argv) != num_arguments: + sys.stderr.write("Invalid arguments\nUsage: " + "%s <BRICK_PATH> <GFID_FILE>\n" % sys.argv[0]) + sys.exit(1) + + path = sys.argv[1] + + if sys.stdin.isatty(): + gfid_list = os.path.abspath(sys.argv[2]) + os.chdir(path) + with open(gfid_list) as f: + for gfid in f: + gfid_to_path(gfid) + else: + os.chdir(path) + for gfid in sys.stdin: + gfid_to_path(gfid) + + +if __name__ == "__main__": + main() |