tools/glusterfind: Fix encoding to encode only space,newline and percent chars

libgfchangelog was encoding path using spec rfc3986, but encoding only required for SPACE, NEWLINE and PERCENT chars since the NEWLINE char is used as record separator and SPACE as field separator in the parsed changelogs output. Changed the encoding function to encode only SPACE, NEWLINE and PERCENT chars BUG: 1451724 Change-Id: Ic1dea824d23493dedcf3db45f353f90572f4e046 Signed-off-by: Aravinda VK <avishwan@redhat.com> Reviewed-on: https://review.gluster.org/17788 CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Milind Changire <mchangir@redhat.com>
author: Aravinda VK <avishwan@redhat.com> 2017-07-03 14:51:21 +0530
committer: Aravinda VK <avishwan@redhat.com> 2017-07-21 08:41:13 +0000
commit: df85ed48e5e94449cdcc77de3b86e10ccea49f1e (patch)
tree: e657add312f453e35ad60268ba768caaff6dd8a6
parent: 08ee8541cfc9096a7f1cb40db7d7df763256d535 (diff)
6 files changed, 33 insertions, 50 deletions
diff --git a/tools/glusterfind/src/__init__.py b/tools/glusterfind/src/__init__.py
index 0ffb3f7432d..1753698b5fa 100644
--- a/tools/glusterfind/src/__init__.py
+++ b/tools/glusterfind/src/__init__.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
diff --git a/tools/glusterfind/src/changelogdata.py b/tools/glusterfind/src/changelogdata.py
index b4a97093aa8..3140d945b49 100644
--- a/tools/glusterfind/src/changelogdata.py
+++ b/tools/glusterfind/src/changelogdata.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
@@ -10,10 +9,9 @@
 # cases as published by the Free Software Foundation.
 
 import sqlite3
-import urllib
 import os
 
-from utils import RecordType
+from utils import RecordType, unquote_plus_space_newline
 from utils import output_path_prepare
 
 
@@ -92,7 +90,7 @@ class ChangelogData(object):
         self._create_table_pgfid()
         self._create_table_inodegfid()
         self.args = args
-        self.path_sep = "/" if args.no_encode else "%2F"
+        self.path_sep = "/"
 
     def _create_table_gfidpath(self):
         drop_table = "DROP TABLE IF EXISTS gfidpath"
@@ -323,36 +321,21 @@ class ChangelogData(object):
     def when_create_mknod_mkdir(self, changelogfile, data):
         # E <GFID> <MKNOD|CREATE|MKDIR> <MODE> <USER> <GRP> <PGFID>/<BNAME>
         # Add the Entry to DB
-        # urllib.unquote_plus will not handle unicode so, encode Unicode to
-        # represent in 8 bit format and then unquote
-        pgfid1, bn1 = urllib.unquote_plus(
-            data[6].encode("utf-8")).split("/", 1)
+        pgfid1, bn1 = data[6].split("/", 1)
 
         if self.args.no_encode:
-            # No urlencode since no_encode is set, so convert again to Unicode
-            # format from previously encoded.
-            bn1 = bn1.decode("utf-8").strip()
-        else:
-            # Quote again the basename
-            bn1 = urllib.quote_plus(bn1.strip())
+            bn1 = unquote_plus_space_newline(bn1).strip()
 
         self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1)
 
     def when_rename(self, changelogfile, data):
         # E <GFID> RENAME <OLD_PGFID>/<BNAME> <PGFID>/<BNAME>
-        pgfid1, bn1 = urllib.unquote_plus(
-            data[3].encode("utf-8")).split("/", 1)
-        pgfid2, bn2 = urllib.unquote_plus(
-            data[4].encode("utf-8")).split("/", 1)
+        pgfid1, bn1 = data[3].split("/", 1)
+        pgfid2, bn2 = data[4].split("/", 1)
 
         if self.args.no_encode:
-            # Quote again the basename
-            bn1 = bn1.decode("utf-8").strip()
-            bn2 = bn2.decode("utf-8").strip()
-        else:
-            # Quote again the basename
-            bn1 = urllib.quote_plus(bn1.strip())
-            bn2 = urllib.quote_plus(bn2.strip())
+            bn1 = unquote_plus_space_newline(bn1).strip()
+            bn2 = unquote_plus_space_newline(bn2).strip()
 
         if self.gfidpath_exists({"gfid": data[1], "type": "NEW",
                                  "pgfid1": pgfid1, "bn1": bn1}):
@@ -392,14 +375,9 @@ class ChangelogData(object):
     def when_link_symlink(self, changelogfile, data):
         # E <GFID> <LINK|SYMLINK> <PGFID>/<BASENAME>
         # Add as New record in Db as Type NEW
-        pgfid1, bn1 = urllib.unquote_plus(
-            data[3].encode("utf-8")).split("/", 1)
+        pgfid1, bn1 = data[3].split("/", 1)
         if self.args.no_encode:
-            # Quote again the basename
-            bn1 = bn1.decode("utf-8").strip()
-        else:
-            # Quote again the basename
-            bn1 = urllib.quote_plus(bn1.strip())
+            bn1 = unquote_plus_space_newline(bn1).strip()
 
         self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1)
 
@@ -411,18 +389,14 @@ class ChangelogData(object):
 
     def when_unlink_rmdir(self, changelogfile, data):
         # E <GFID> <UNLINK|RMDIR> <PGFID>/<BASENAME>
-        pgfid1, bn1 = urllib.unquote_plus(
-            data[3].encode("utf-8")).split("/", 1)
+        pgfid1, bn1 = data[3].split("/", 1)
 
         if self.args.no_encode:
-            bn1 = bn1.decode("utf-8").strip()
-        else:
-            # Quote again the basename
-            bn1 = urllib.quote_plus(bn1.strip())
+            bn1 = unquote_plus_space_newline(bn1).strip()
 
         deleted_path = data[4] if len(data) == 5 else ""
         if deleted_path != "":
-            deleted_path = urllib.unquote_plus(deleted_path.encode("utf-8"))
+            deleted_path = unquote_plus_space_newline(deleted_path)
             deleted_path = output_path_prepare(deleted_path, self.args)
 
         if self.gfidpath_exists({"gfid": data[1], "type": "NEW",
diff --git a/tools/glusterfind/src/conf.py b/tools/glusterfind/src/conf.py
index d73fee42aad..d91746bda13 100644
--- a/tools/glusterfind/src/conf.py
+++ b/tools/glusterfind/src/conf.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
diff --git a/tools/glusterfind/src/libgfchangelog.py b/tools/glusterfind/src/libgfchangelog.py
index dd8153e4e61..0f6b40d6c9c 100644
--- a/tools/glusterfind/src/libgfchangelog.py
+++ b/tools/glusterfind/src/libgfchangelog.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
index 3d0f02a65d4..e7e9889569c 100644
--- a/tools/glusterfind/src/main.py
+++ b/tools/glusterfind/src/main.py
@@ -21,13 +21,13 @@ import shutil
 import tempfile
 import signal
 from datetime import datetime
+import codecs
 
 from utils import execute, is_host_local, mkdirp, fail
 from utils import setup_logger, human_time, handle_rm_error
 from utils import get_changelog_rollover_time, cache_output, create_file
 import conf
 from changelogdata import OutputMerger
-import codecs
 
 PROG_DESCRIPTION = """
 GlusterFS Incremental API
@@ -481,10 +481,9 @@ def write_output(outfile, outfilemerger, field_separator):
             for p in paths:
                 if p == "":
                     continue
-                p_rep = p.replace("%2F%2F", "%2F").replace("//", "/")
+                p_rep = p.replace("//", "/")
                 if not row_2_rep:
-                    row_2_rep = row[2].replace("%2F%2F", "%2F").replace("//",
-                                                                        "/")
+                    row_2_rep = row[2].replace("//", "/")
                 if p_rep == row_2_rep:
                     continue
 
diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py
index b08233e4a9f..c24258e6ef8 100644
--- a/tools/glusterfind/src/utils.py
+++ b/tools/glusterfind/src/utils.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
 # Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
@@ -16,10 +15,12 @@ import xml.etree.cElementTree as etree
 import logging
 import os
 from datetime import datetime
-import urllib
 
 ROOT_GFID = "00000000-0000-0000-0000-000000000001"
 DEFAULT_CHANGELOG_INTERVAL = 15
+SPACE_ESCAPE_CHAR = "%20"
+NEWLINE_ESCAPE_CHAR = "%0A"
+PERCENTAGE_ESCAPE_CHAR = "%25"
 
 ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError
 cache_data = {}
@@ -84,7 +85,7 @@ def output_write(f, path, prefix=".", encode=False, tag="",
         path = os.path.join(prefix, path)
 
     if encode:
-        path = urllib.quote_plus(path)
+        path = quote_plus_space_newline(path)
 
     # set the field separator
     FS = "" if tag == "" else field_separator
@@ -246,4 +247,16 @@ def output_path_prepare(path, args):
     if args.no_encode:
         return path
     else:
-        return urllib.quote_plus(path.encode("utf-8"))
+        return quote_plus_space_newline(path)
+
+
+def unquote_plus_space_newline(s):
+    return s.replace(SPACE_ESCAPE_CHAR, " ")\
+            .replace(NEWLINE_ESCAPE_CHAR, "\n")\
+            .replace(PERCENTAGE_ESCAPE_CHAR, "%")
+
+
+def quote_plus_space_newline(s):
+    return s.replace("%", PERCENTAGE_ESCAPE_CHAR)\
+            .replace(" ", SPACE_ESCAPE_CHAR)\
+            .replace("\n", NEWLINE_ESCAPE_CHAR)
author	Aravinda VK <avishwan@redhat.com>	2017-07-03 14:51:21 +0530
committer	Aravinda VK <avishwan@redhat.com>	2017-07-21 08:41:13 +0000
commit	df85ed48e5e94449cdcc77de3b86e10ccea49f1e (patch)
tree	e657add312f453e35ad60268ba768caaff6dd8a6
parent	08ee8541cfc9096a7f1cb40db7d7df763256d535 (diff)