diff options
Diffstat (limited to 'tools/glusterfind/src/changelog.py')
| -rw-r--r-- | tools/glusterfind/src/changelog.py | 105 |
1 files changed, 83 insertions, 22 deletions
diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py index 721b8d0ca3a..a5e9ea4288f 100644 --- a/tools/glusterfind/src/changelog.py +++ b/tools/glusterfind/src/changelog.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/python3 # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> @@ -14,9 +14,13 @@ import sys import time import xattr import logging +from gfind_py2py3 import bytearray_to_str from argparse import ArgumentParser, RawDescriptionHelpFormatter import hashlib -import urllib +try: + import urllib.parse as urllib +except ImportError: + import urllib import codecs import libgfchangelog @@ -40,8 +44,6 @@ history_turn_time = 0 logger = logging.getLogger() - - def pgfid_to_path(brick, changelog_data): """ For all the pgfids in table, converts into path using recursive @@ -49,7 +51,7 @@ def pgfid_to_path(brick, changelog_data): """ # pgfid1 to path1 in case of CREATE/MKNOD/MKDIR/LINK/SYMLINK for row in changelog_data.gfidpath_get_distinct("pgfid1", {"path1": ""}): - # In case of Data/Metadata only, pgfid1 will not be their + # In case of Data/Metadata only, pgfid1 will not be there if row[0] == "": continue @@ -94,7 +96,7 @@ def populate_pgfid_and_inodegfid(brick, changelog_data): path = symlink_gfid_to_path(brick, gfid) path = output_path_prepare(path, args) changelog_data.gfidpath_update({"path1": path}, - {"gfid": gfid}) + {"gfid": gfid}) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) continue @@ -104,15 +106,55 @@ def populate_pgfid_and_inodegfid(brick, changelog_data): changelog_data.inodegfid_add(os.stat(p).st_ino, gfid) file_xattrs = xattr.list(p) for x in file_xattrs: - if x.startswith("trusted.pgfid."): + x_str = bytearray_to_str(x) + if x_str.startswith("trusted.pgfid."): # PGFID in pgfid table - changelog_data.pgfid_add(x.split(".")[-1]) + changelog_data.pgfid_add(x_str.split(".")[-1]) except (IOError, OSError): # All OS Errors ignored, since failures will be logged # in End. All GFIDs present in gfidpath table continue +def enum_hard_links_using_gfid2path(brick, gfid, args): + hardlinks = [] + p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid) + if not os.path.isdir(p): + # we have a symlink or a normal file + try: + file_xattrs = xattr.list(p) + for x in file_xattrs: + x_str = bytearray_to_str(x) + if x_str.startswith("trusted.gfid2path."): + # get the value for the xattr i.e. <PGFID>/<BN> + v = xattr.getxattr(p, x_str) + v_str = bytearray_to_str(v) + pgfid, bn = v_str.split(os.sep) + try: + path = symlink_gfid_to_path(brick, pgfid) + fullpath = os.path.join(path, bn) + fullpath = output_path_prepare(fullpath, args) + hardlinks.append(fullpath) + except (IOError, OSError) as e: + logger.warn("Error converting to path: %s" % e) + continue + except (IOError, OSError): + pass + return hardlinks + + +def gfid_to_all_paths_using_gfid2path(brick, changelog_data, args): + path = "" + for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}): + gfid = row[3].strip() + logger.debug("Processing gfid %s" % gfid) + hardlinks = enum_hard_links_using_gfid2path(brick, gfid, args) + + path = ",".join(hardlinks) + + changelog_data.gfidpath_update({"path1": path}, {"gfid": gfid}) + + def gfid_to_path_using_pgfid(brick, changelog_data, args): """ For all the pgfids collected, Converts to Path and @@ -160,10 +202,10 @@ def gfid_to_path_using_pgfid(brick, changelog_data, args): try: path = symlink_gfid_to_path(brick, row[0]) find(os.path.join(brick, path), - callback_func=output_callback, - filter_func=inode_filter, - ignore_dirs=ignore_dirs, - subdirs_crawl=False) + callback_func=output_callback, + filter_func=inode_filter, + ignore_dirs=ignore_dirs, + subdirs_crawl=False) except (IOError, OSError) as e: logger.warn("Error converting to path: %s" % e) continue @@ -245,7 +287,7 @@ def get_changes(brick, hash_dir, log_file, start, end, args): session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, - "%s.status" % urllib.quote_plus(args.brick)) + "%s.status" % urllib.quote_plus(args.brick)) # Get previous session try: @@ -272,9 +314,10 @@ def get_changes(brick, hash_dir, log_file, start, end, args): actual_end = libgfchangelog.cl_history_changelog( cl_path, start, end, CHANGELOGAPI_NUM_WORKERS) except libgfchangelog.ChangelogException as e: - fail("%s Historical Changelogs not available: %s" % (brick, e), - logger=logger) + fail("%s: %s Historical Changelogs not available: %s" % + (args.node, brick, e), logger=logger) + logger.info("[1/4] Starting changelog parsing ...") try: # scan followed by getchanges till scan returns zero. # history_scan() is blocking call, till it gets the number @@ -296,25 +339,34 @@ def get_changes(brick, hash_dir, log_file, start, end, args): libgfchangelog.cl_history_done(change) except IOError as e: logger.warn("Error parsing changelog file %s: %s" % - (change, e)) + (change, e)) changelog_data.commit() except libgfchangelog.ChangelogException as e: fail("%s Error during Changelog Crawl: %s" % (brick, e), logger=logger) + logger.info("[1/4] Finished changelog parsing.") + # Convert all pgfid available from Changelogs + logger.info("[2/4] Starting 'pgfid to path' conversions ...") pgfid_to_path(brick, changelog_data) changelog_data.commit() + logger.info("[2/4] Finished 'pgfid to path' conversions.") - # Convert all GFIDs for which no other additional details available - gfid_to_path_using_pgfid(brick, changelog_data, args) + # Convert all gfids recorded for data and metadata to all hardlink paths + logger.info("[3/4] Starting 'gfid2path' conversions ...") + gfid_to_all_paths_using_gfid2path(brick, changelog_data, args) changelog_data.commit() + logger.info("[3/4] Finished 'gfid2path' conversions.") # If some GFIDs fail to get converted from previous step, # convert using find + logger.info("[4/4] Starting 'gfid to path using batchfind' " + "conversions ...") gfid_to_path_using_batchfind(brick, changelog_data) changelog_data.commit() + logger.info("[4/4] Finished 'gfid to path using batchfind' conversions.") return actual_end @@ -328,7 +380,7 @@ def changelog_crawl(brick, start, end, args): # WORKING_DIR/BRICKHASH/OUTFILE working_dir = os.path.dirname(args.outfile) - brickhash = hashlib.sha1(brick) + brickhash = hashlib.sha1(brick.encode()) brickhash = str(brickhash.hexdigest()) working_dir = os.path.join(working_dir, brickhash) @@ -350,9 +402,11 @@ def _get_args(): parser.add_argument("session", help="Session Name") parser.add_argument("volume", help="Volume Name") + parser.add_argument("node", help="Node Name") parser.add_argument("brick", help="Brick Name") parser.add_argument("outfile", help="Output File") parser.add_argument("start", help="Start Time", type=int) + parser.add_argument("end", help="End Time", type=int) parser.add_argument("--only-query", help="Query mode only (no session)", action="store_true") parser.add_argument("--debug", help="Debug", action="store_true") @@ -361,6 +415,7 @@ def _get_args(): action="store_true") parser.add_argument("--output-prefix", help="File prefix in output", default=".") + parser.add_argument("--type",default="both") parser.add_argument("-N", "--only-namespace-changes", help="List only namespace changes", action="store_true") @@ -380,13 +435,15 @@ if __name__ == "__main__": session_dir = os.path.join(conf.get_opt("session_dir"), args.session) status_file = os.path.join(session_dir, args.volume, - "%s.status" % urllib.quote_plus(args.brick)) + "%s.status" % urllib.quote_plus(args.brick)) status_file_pre = status_file + ".pre" mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True, logger=logger) + end = -1 if args.only_query: start = args.start + end = args.end else: try: with open(status_file) as f: @@ -394,13 +451,17 @@ if __name__ == "__main__": except (ValueError, OSError, IOError): start = args.start - end = int(time.time()) - get_changelog_rollover_time(args.volume) + # end time is optional; so a -1 may be sent to use the default method of + # identifying the end time + if end == -1: + end = int(time.time()) - get_changelog_rollover_time(args.volume) + logger.info("%s Started Changelog Crawl - Start: %s End: %s" % (args.brick, start, end)) actual_end = changelog_crawl(args.brick, start, end, args) if not args.only_query: - with open(status_file_pre, "w", buffering=0) as f: + with open(status_file_pre, "w") as f: f.write(str(actual_end)) logger.info("%s Finished Changelog Crawl - End: %s" % (args.brick, |
