diff options
author | Aravinda VK <avishwan@redhat.com> | 2017-07-03 14:51:21 +0530 |
---|---|---|
committer | Aravinda VK <avishwan@redhat.com> | 2017-07-21 08:41:13 +0000 |
commit | df85ed48e5e94449cdcc77de3b86e10ccea49f1e (patch) | |
tree | e657add312f453e35ad60268ba768caaff6dd8a6 /tools/glusterfind | |
parent | 08ee8541cfc9096a7f1cb40db7d7df763256d535 (diff) |
tools/glusterfind: Fix encoding to encode only space,newline and percent chars
libgfchangelog was encoding path using spec rfc3986, but encoding only
required for SPACE, NEWLINE and PERCENT chars since the NEWLINE char is
used as record separator and SPACE as field separator in the parsed
changelogs output.
Changed the encoding function to encode only SPACE, NEWLINE and PERCENT chars
BUG: 1451724
Change-Id: Ic1dea824d23493dedcf3db45f353f90572f4e046
Signed-off-by: Aravinda VK <avishwan@redhat.com>
Reviewed-on: https://review.gluster.org/17788
CentOS-regression: Gluster Build System <jenkins@build.gluster.org>
Smoke: Gluster Build System <jenkins@build.gluster.org>
Reviewed-by: Milind Changire <mchangir@redhat.com>
Diffstat (limited to 'tools/glusterfind')
-rw-r--r-- | tools/glusterfind/src/__init__.py | 1 | ||||
-rw-r--r-- | tools/glusterfind/src/changelogdata.py | 52 | ||||
-rw-r--r-- | tools/glusterfind/src/conf.py | 1 | ||||
-rw-r--r-- | tools/glusterfind/src/libgfchangelog.py | 1 | ||||
-rw-r--r-- | tools/glusterfind/src/main.py | 7 | ||||
-rw-r--r-- | tools/glusterfind/src/utils.py | 21 |
6 files changed, 33 insertions, 50 deletions
diff --git a/tools/glusterfind/src/__init__.py b/tools/glusterfind/src/__init__.py index 0ffb3f7432d..1753698b5fa 100644 --- a/tools/glusterfind/src/__init__.py +++ b/tools/glusterfind/src/__init__.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> diff --git a/tools/glusterfind/src/changelogdata.py b/tools/glusterfind/src/changelogdata.py index b4a97093aa8..3140d945b49 100644 --- a/tools/glusterfind/src/changelogdata.py +++ b/tools/glusterfind/src/changelogdata.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> @@ -10,10 +9,9 @@ # cases as published by the Free Software Foundation. import sqlite3 -import urllib import os -from utils import RecordType +from utils import RecordType, unquote_plus_space_newline from utils import output_path_prepare @@ -92,7 +90,7 @@ class ChangelogData(object): self._create_table_pgfid() self._create_table_inodegfid() self.args = args - self.path_sep = "/" if args.no_encode else "%2F" + self.path_sep = "/" def _create_table_gfidpath(self): drop_table = "DROP TABLE IF EXISTS gfidpath" @@ -323,36 +321,21 @@ class ChangelogData(object): def when_create_mknod_mkdir(self, changelogfile, data): # E <GFID> <MKNOD|CREATE|MKDIR> <MODE> <USER> <GRP> <PGFID>/<BNAME> # Add the Entry to DB - # urllib.unquote_plus will not handle unicode so, encode Unicode to - # represent in 8 bit format and then unquote - pgfid1, bn1 = urllib.unquote_plus( - data[6].encode("utf-8")).split("/", 1) + pgfid1, bn1 = data[6].split("/", 1) if self.args.no_encode: - # No urlencode since no_encode is set, so convert again to Unicode - # format from previously encoded. - bn1 = bn1.decode("utf-8").strip() - else: - # Quote again the basename - bn1 = urllib.quote_plus(bn1.strip()) + bn1 = unquote_plus_space_newline(bn1).strip() self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1) def when_rename(self, changelogfile, data): # E <GFID> RENAME <OLD_PGFID>/<BNAME> <PGFID>/<BNAME> - pgfid1, bn1 = urllib.unquote_plus( - data[3].encode("utf-8")).split("/", 1) - pgfid2, bn2 = urllib.unquote_plus( - data[4].encode("utf-8")).split("/", 1) + pgfid1, bn1 = data[3].split("/", 1) + pgfid2, bn2 = data[4].split("/", 1) if self.args.no_encode: - # Quote again the basename - bn1 = bn1.decode("utf-8").strip() - bn2 = bn2.decode("utf-8").strip() - else: - # Quote again the basename - bn1 = urllib.quote_plus(bn1.strip()) - bn2 = urllib.quote_plus(bn2.strip()) + bn1 = unquote_plus_space_newline(bn1).strip() + bn2 = unquote_plus_space_newline(bn2).strip() if self.gfidpath_exists({"gfid": data[1], "type": "NEW", "pgfid1": pgfid1, "bn1": bn1}): @@ -392,14 +375,9 @@ class ChangelogData(object): def when_link_symlink(self, changelogfile, data): # E <GFID> <LINK|SYMLINK> <PGFID>/<BASENAME> # Add as New record in Db as Type NEW - pgfid1, bn1 = urllib.unquote_plus( - data[3].encode("utf-8")).split("/", 1) + pgfid1, bn1 = data[3].split("/", 1) if self.args.no_encode: - # Quote again the basename - bn1 = bn1.decode("utf-8").strip() - else: - # Quote again the basename - bn1 = urllib.quote_plus(bn1.strip()) + bn1 = unquote_plus_space_newline(bn1).strip() self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1) @@ -411,18 +389,14 @@ class ChangelogData(object): def when_unlink_rmdir(self, changelogfile, data): # E <GFID> <UNLINK|RMDIR> <PGFID>/<BASENAME> - pgfid1, bn1 = urllib.unquote_plus( - data[3].encode("utf-8")).split("/", 1) + pgfid1, bn1 = data[3].split("/", 1) if self.args.no_encode: - bn1 = bn1.decode("utf-8").strip() - else: - # Quote again the basename - bn1 = urllib.quote_plus(bn1.strip()) + bn1 = unquote_plus_space_newline(bn1).strip() deleted_path = data[4] if len(data) == 5 else "" if deleted_path != "": - deleted_path = urllib.unquote_plus(deleted_path.encode("utf-8")) + deleted_path = unquote_plus_space_newline(deleted_path) deleted_path = output_path_prepare(deleted_path, self.args) if self.gfidpath_exists({"gfid": data[1], "type": "NEW", diff --git a/tools/glusterfind/src/conf.py b/tools/glusterfind/src/conf.py index d73fee42aad..d91746bda13 100644 --- a/tools/glusterfind/src/conf.py +++ b/tools/glusterfind/src/conf.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> diff --git a/tools/glusterfind/src/libgfchangelog.py b/tools/glusterfind/src/libgfchangelog.py index dd8153e4e61..0f6b40d6c9c 100644 --- a/tools/glusterfind/src/libgfchangelog.py +++ b/tools/glusterfind/src/libgfchangelog.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py index 3d0f02a65d4..e7e9889569c 100644 --- a/tools/glusterfind/src/main.py +++ b/tools/glusterfind/src/main.py @@ -21,13 +21,13 @@ import shutil import tempfile import signal from datetime import datetime +import codecs from utils import execute, is_host_local, mkdirp, fail from utils import setup_logger, human_time, handle_rm_error from utils import get_changelog_rollover_time, cache_output, create_file import conf from changelogdata import OutputMerger -import codecs PROG_DESCRIPTION = """ GlusterFS Incremental API @@ -481,10 +481,9 @@ def write_output(outfile, outfilemerger, field_separator): for p in paths: if p == "": continue - p_rep = p.replace("%2F%2F", "%2F").replace("//", "/") + p_rep = p.replace("//", "/") if not row_2_rep: - row_2_rep = row[2].replace("%2F%2F", "%2F").replace("//", - "/") + row_2_rep = row[2].replace("//", "/") if p_rep == row_2_rep: continue diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py index b08233e4a9f..c24258e6ef8 100644 --- a/tools/glusterfind/src/utils.py +++ b/tools/glusterfind/src/utils.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/> @@ -16,10 +15,12 @@ import xml.etree.cElementTree as etree import logging import os from datetime import datetime -import urllib ROOT_GFID = "00000000-0000-0000-0000-000000000001" DEFAULT_CHANGELOG_INTERVAL = 15 +SPACE_ESCAPE_CHAR = "%20" +NEWLINE_ESCAPE_CHAR = "%0A" +PERCENTAGE_ESCAPE_CHAR = "%25" ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError cache_data = {} @@ -84,7 +85,7 @@ def output_write(f, path, prefix=".", encode=False, tag="", path = os.path.join(prefix, path) if encode: - path = urllib.quote_plus(path) + path = quote_plus_space_newline(path) # set the field separator FS = "" if tag == "" else field_separator @@ -246,4 +247,16 @@ def output_path_prepare(path, args): if args.no_encode: return path else: - return urllib.quote_plus(path.encode("utf-8")) + return quote_plus_space_newline(path) + + +def unquote_plus_space_newline(s): + return s.replace(SPACE_ESCAPE_CHAR, " ")\ + .replace(NEWLINE_ESCAPE_CHAR, "\n")\ + .replace(PERCENTAGE_ESCAPE_CHAR, "%") + + +def quote_plus_space_newline(s): + return s.replace("%", PERCENTAGE_ESCAPE_CHAR)\ + .replace(" ", SPACE_ESCAPE_CHAR)\ + .replace("\n", NEWLINE_ESCAPE_CHAR) |