summaryrefslogtreecommitdiffstats
path: root/tools/glusterfind/src/changelog.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/glusterfind/src/changelog.py')
-rw-r--r--tools/glusterfind/src/changelog.py128
1 files changed, 97 insertions, 31 deletions
diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py
index d6f3dc188ac..a5e9ea4288f 100644
--- a/tools/glusterfind/src/changelog.py
+++ b/tools/glusterfind/src/changelog.py
@@ -1,4 +1,5 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
# This file is part of GlusterFS.
@@ -13,9 +14,14 @@ import sys
import time
import xattr
import logging
+from gfind_py2py3 import bytearray_to_str
from argparse import ArgumentParser, RawDescriptionHelpFormatter
import hashlib
-import urllib
+try:
+ import urllib.parse as urllib
+except ImportError:
+ import urllib
+import codecs
import libgfchangelog
from utils import mkdirp, symlink_gfid_to_path
@@ -38,8 +44,6 @@ history_turn_time = 0
logger = logging.getLogger()
-
-
def pgfid_to_path(brick, changelog_data):
"""
For all the pgfids in table, converts into path using recursive
@@ -47,13 +51,13 @@ def pgfid_to_path(brick, changelog_data):
"""
# pgfid1 to path1 in case of CREATE/MKNOD/MKDIR/LINK/SYMLINK
for row in changelog_data.gfidpath_get_distinct("pgfid1", {"path1": ""}):
- # In case of Data/Metadata only, pgfid1 will not be their
+ # In case of Data/Metadata only, pgfid1 will not be there
if row[0] == "":
continue
try:
path = symlink_gfid_to_path(brick, row[0])
- path = output_path_prepare(path, args.output_prefix)
+ path = output_path_prepare(path, args)
changelog_data.gfidpath_set_path1(path, row[0])
except (IOError, OSError) as e:
logger.warn("Error converting to path: %s" % e)
@@ -69,7 +73,7 @@ def pgfid_to_path(brick, changelog_data):
try:
path = symlink_gfid_to_path(brick, row[0])
- path = output_path_prepare(path, args.output_prefix)
+ path = output_path_prepare(path, args)
changelog_data.gfidpath_set_path2(path, row[0])
except (IOError, OSError) as e:
logger.warn("Error converting to path: %s" % e)
@@ -90,9 +94,9 @@ def populate_pgfid_and_inodegfid(brick, changelog_data):
# It is a Directory if GFID backend path is symlink
try:
path = symlink_gfid_to_path(brick, gfid)
- path = output_path_prepare(path, args.output_prefix)
+ path = output_path_prepare(path, args)
changelog_data.gfidpath_update({"path1": path},
- {"gfid": gfid})
+ {"gfid": gfid})
except (IOError, OSError) as e:
logger.warn("Error converting to path: %s" % e)
continue
@@ -102,15 +106,55 @@ def populate_pgfid_and_inodegfid(brick, changelog_data):
changelog_data.inodegfid_add(os.stat(p).st_ino, gfid)
file_xattrs = xattr.list(p)
for x in file_xattrs:
- if x.startswith("trusted.pgfid."):
+ x_str = bytearray_to_str(x)
+ if x_str.startswith("trusted.pgfid."):
# PGFID in pgfid table
- changelog_data.pgfid_add(x.split(".")[-1])
+ changelog_data.pgfid_add(x_str.split(".")[-1])
except (IOError, OSError):
# All OS Errors ignored, since failures will be logged
# in End. All GFIDs present in gfidpath table
continue
+def enum_hard_links_using_gfid2path(brick, gfid, args):
+ hardlinks = []
+ p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
+ if not os.path.isdir(p):
+ # we have a symlink or a normal file
+ try:
+ file_xattrs = xattr.list(p)
+ for x in file_xattrs:
+ x_str = bytearray_to_str(x)
+ if x_str.startswith("trusted.gfid2path."):
+ # get the value for the xattr i.e. <PGFID>/<BN>
+ v = xattr.getxattr(p, x_str)
+ v_str = bytearray_to_str(v)
+ pgfid, bn = v_str.split(os.sep)
+ try:
+ path = symlink_gfid_to_path(brick, pgfid)
+ fullpath = os.path.join(path, bn)
+ fullpath = output_path_prepare(fullpath, args)
+ hardlinks.append(fullpath)
+ except (IOError, OSError) as e:
+ logger.warn("Error converting to path: %s" % e)
+ continue
+ except (IOError, OSError):
+ pass
+ return hardlinks
+
+
+def gfid_to_all_paths_using_gfid2path(brick, changelog_data, args):
+ path = ""
+ for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}):
+ gfid = row[3].strip()
+ logger.debug("Processing gfid %s" % gfid)
+ hardlinks = enum_hard_links_using_gfid2path(brick, gfid, args)
+
+ path = ",".join(hardlinks)
+
+ changelog_data.gfidpath_update({"path1": path}, {"gfid": gfid})
+
+
def gfid_to_path_using_pgfid(brick, changelog_data, args):
"""
For all the pgfids collected, Converts to Path and
@@ -145,7 +189,7 @@ def gfid_to_path_using_pgfid(brick, changelog_data, args):
path = path.strip()
path = path[brick_path_len+1:]
- path = output_path_prepare(path, args.output_prefix)
+ path = output_path_prepare(path, args)
changelog_data.append_path1(path, inode)
changelog_data.inodegfid_update({"converted": 1}, {"inode": inode})
@@ -158,10 +202,10 @@ def gfid_to_path_using_pgfid(brick, changelog_data, args):
try:
path = symlink_gfid_to_path(brick, row[0])
find(os.path.join(brick, path),
- callback_func=output_callback,
- filter_func=inode_filter,
- ignore_dirs=ignore_dirs,
- subdirs_crawl=False)
+ callback_func=output_callback,
+ filter_func=inode_filter,
+ ignore_dirs=ignore_dirs,
+ subdirs_crawl=False)
except (IOError, OSError) as e:
logger.warn("Error converting to path: %s" % e)
continue
@@ -193,7 +237,7 @@ def gfid_to_path_using_batchfind(brick, changelog_data):
# Also updates converted flag in inodegfid table as 1
path = path.strip()
path = path[brick_path_len+1:]
- path = output_path_prepare(path, args.output_prefix)
+ path = output_path_prepare(path, args)
changelog_data.append_path1(path, inode)
@@ -211,7 +255,7 @@ def parse_changelog_to_db(changelog_data, filename, args):
"""
Parses a Changelog file and populates data in gfidpath table
"""
- with open(filename) as f:
+ with codecs.open(filename, encoding="utf-8") as f:
changelogfile = os.path.basename(filename)
for line in f:
data = line.strip().split(" ")
@@ -230,7 +274,7 @@ def parse_changelog_to_db(changelog_data, filename, args):
changelog_data.when_rename(changelogfile, data)
elif data[0] == "E" and data[2] in ["UNLINK", "RMDIR"]:
# UNLINK/RMDIR
- changelog_data.when_unlink_rmdir(changelogfile, data, args)
+ changelog_data.when_unlink_rmdir(changelogfile, data)
def get_changes(brick, hash_dir, log_file, start, end, args):
@@ -243,7 +287,7 @@ def get_changes(brick, hash_dir, log_file, start, end, args):
session_dir = os.path.join(conf.get_opt("session_dir"),
args.session)
status_file = os.path.join(session_dir, args.volume,
- "%s.status" % urllib.quote_plus(args.brick))
+ "%s.status" % urllib.quote_plus(args.brick))
# Get previous session
try:
@@ -260,7 +304,7 @@ def get_changes(brick, hash_dir, log_file, start, end, args):
fail("%s Changelog register failed: %s" % (brick, e), logger=logger)
# Output files to record GFIDs and GFID to Path failure GFIDs
- changelog_data = ChangelogData(args.outfile)
+ changelog_data = ChangelogData(args.outfile, args)
# Changelogs path(Hard coded to BRICK/.glusterfs/changelogs
cl_path = os.path.join(brick, ".glusterfs/changelogs")
@@ -270,9 +314,10 @@ def get_changes(brick, hash_dir, log_file, start, end, args):
actual_end = libgfchangelog.cl_history_changelog(
cl_path, start, end, CHANGELOGAPI_NUM_WORKERS)
except libgfchangelog.ChangelogException as e:
- fail("%s Historical Changelogs not available: %s" % (brick, e),
- logger=logger)
+ fail("%s: %s Historical Changelogs not available: %s" %
+ (args.node, brick, e), logger=logger)
+ logger.info("[1/4] Starting changelog parsing ...")
try:
# scan followed by getchanges till scan returns zero.
# history_scan() is blocking call, till it gets the number
@@ -282,7 +327,7 @@ def get_changes(brick, hash_dir, log_file, start, end, args):
# history_getchanges()
changes = []
while libgfchangelog.cl_history_scan() > 0:
- changes += libgfchangelog.cl_history_getchanges()
+ changes = libgfchangelog.cl_history_getchanges()
for change in changes:
# Ignore if last processed changelog comes
@@ -294,25 +339,34 @@ def get_changes(brick, hash_dir, log_file, start, end, args):
libgfchangelog.cl_history_done(change)
except IOError as e:
logger.warn("Error parsing changelog file %s: %s" %
- (change, e))
+ (change, e))
changelog_data.commit()
except libgfchangelog.ChangelogException as e:
fail("%s Error during Changelog Crawl: %s" % (brick, e),
logger=logger)
+ logger.info("[1/4] Finished changelog parsing.")
+
# Convert all pgfid available from Changelogs
+ logger.info("[2/4] Starting 'pgfid to path' conversions ...")
pgfid_to_path(brick, changelog_data)
changelog_data.commit()
+ logger.info("[2/4] Finished 'pgfid to path' conversions.")
- # Convert all GFIDs for which no other additional details available
- gfid_to_path_using_pgfid(brick, changelog_data, args)
+ # Convert all gfids recorded for data and metadata to all hardlink paths
+ logger.info("[3/4] Starting 'gfid2path' conversions ...")
+ gfid_to_all_paths_using_gfid2path(brick, changelog_data, args)
changelog_data.commit()
+ logger.info("[3/4] Finished 'gfid2path' conversions.")
# If some GFIDs fail to get converted from previous step,
# convert using find
+ logger.info("[4/4] Starting 'gfid to path using batchfind' "
+ "conversions ...")
gfid_to_path_using_batchfind(brick, changelog_data)
changelog_data.commit()
+ logger.info("[4/4] Finished 'gfid to path using batchfind' conversions.")
return actual_end
@@ -326,7 +380,7 @@ def changelog_crawl(brick, start, end, args):
# WORKING_DIR/BRICKHASH/OUTFILE
working_dir = os.path.dirname(args.outfile)
- brickhash = hashlib.sha1(brick)
+ brickhash = hashlib.sha1(brick.encode())
brickhash = str(brickhash.hexdigest())
working_dir = os.path.join(working_dir, brickhash)
@@ -348,14 +402,20 @@ def _get_args():
parser.add_argument("session", help="Session Name")
parser.add_argument("volume", help="Volume Name")
+ parser.add_argument("node", help="Node Name")
parser.add_argument("brick", help="Brick Name")
parser.add_argument("outfile", help="Output File")
parser.add_argument("start", help="Start Time", type=int)
+ parser.add_argument("end", help="End Time", type=int)
parser.add_argument("--only-query", help="Query mode only (no session)",
action="store_true")
parser.add_argument("--debug", help="Debug", action="store_true")
+ parser.add_argument("--no-encode",
+ help="Do not encode path in outfile",
+ action="store_true")
parser.add_argument("--output-prefix", help="File prefix in output",
default=".")
+ parser.add_argument("--type",default="both")
parser.add_argument("-N", "--only-namespace-changes",
help="List only namespace changes",
action="store_true")
@@ -375,13 +435,15 @@ if __name__ == "__main__":
session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
status_file = os.path.join(session_dir, args.volume,
- "%s.status" % urllib.quote_plus(args.brick))
+ "%s.status" % urllib.quote_plus(args.brick))
status_file_pre = status_file + ".pre"
mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
logger=logger)
+ end = -1
if args.only_query:
start = args.start
+ end = args.end
else:
try:
with open(status_file) as f:
@@ -389,13 +451,17 @@ if __name__ == "__main__":
except (ValueError, OSError, IOError):
start = args.start
- end = int(time.time()) - get_changelog_rollover_time(args.volume)
+ # end time is optional; so a -1 may be sent to use the default method of
+ # identifying the end time
+ if end == -1:
+ end = int(time.time()) - get_changelog_rollover_time(args.volume)
+
logger.info("%s Started Changelog Crawl - Start: %s End: %s" % (args.brick,
start,
end))
actual_end = changelog_crawl(args.brick, start, end, args)
if not args.only_query:
- with open(status_file_pre, "w", buffering=0) as f:
+ with open(status_file_pre, "w") as f:
f.write(str(actual_end))
logger.info("%s Finished Changelog Crawl - End: %s" % (args.brick,