summaryrefslogtreecommitdiffstats
path: root/tools/glusterfind
diff options
context:
space:
mode:
authorAravinda VK <avishwan@redhat.com>2017-07-03 14:51:21 +0530
committerAravinda VK <avishwan@redhat.com>2017-07-21 08:41:13 +0000
commitdf85ed48e5e94449cdcc77de3b86e10ccea49f1e (patch)
treee657add312f453e35ad60268ba768caaff6dd8a6 /tools/glusterfind
parent08ee8541cfc9096a7f1cb40db7d7df763256d535 (diff)
tools/glusterfind: Fix encoding to encode only space,newline and percent chars
libgfchangelog was encoding path using spec rfc3986, but encoding only required for SPACE, NEWLINE and PERCENT chars since the NEWLINE char is used as record separator and SPACE as field separator in the parsed changelogs output. Changed the encoding function to encode only SPACE, NEWLINE and PERCENT chars BUG: 1451724 Change-Id: Ic1dea824d23493dedcf3db45f353f90572f4e046 Signed-off-by: Aravinda VK <avishwan@redhat.com> Reviewed-on: https://review.gluster.org/17788 CentOS-regression: Gluster Build System <jenkins@build.gluster.org> Smoke: Gluster Build System <jenkins@build.gluster.org> Reviewed-by: Milind Changire <mchangir@redhat.com>
Diffstat (limited to 'tools/glusterfind')
-rw-r--r--tools/glusterfind/src/__init__.py1
-rw-r--r--tools/glusterfind/src/changelogdata.py52
-rw-r--r--tools/glusterfind/src/conf.py1
-rw-r--r--tools/glusterfind/src/libgfchangelog.py1
-rw-r--r--tools/glusterfind/src/main.py7
-rw-r--r--tools/glusterfind/src/utils.py21
6 files changed, 33 insertions, 50 deletions
diff --git a/tools/glusterfind/src/__init__.py b/tools/glusterfind/src/__init__.py
index 0ffb3f7432d..1753698b5fa 100644
--- a/tools/glusterfind/src/__init__.py
+++ b/tools/glusterfind/src/__init__.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
diff --git a/tools/glusterfind/src/changelogdata.py b/tools/glusterfind/src/changelogdata.py
index b4a97093aa8..3140d945b49 100644
--- a/tools/glusterfind/src/changelogdata.py
+++ b/tools/glusterfind/src/changelogdata.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
@@ -10,10 +9,9 @@
# cases as published by the Free Software Foundation.
import sqlite3
-import urllib
import os
-from utils import RecordType
+from utils import RecordType, unquote_plus_space_newline
from utils import output_path_prepare
@@ -92,7 +90,7 @@ class ChangelogData(object):
self._create_table_pgfid()
self._create_table_inodegfid()
self.args = args
- self.path_sep = "/" if args.no_encode else "%2F"
+ self.path_sep = "/"
def _create_table_gfidpath(self):
drop_table = "DROP TABLE IF EXISTS gfidpath"
@@ -323,36 +321,21 @@ class ChangelogData(object):
def when_create_mknod_mkdir(self, changelogfile, data):
# E <GFID> <MKNOD|CREATE|MKDIR> <MODE> <USER> <GRP> <PGFID>/<BNAME>
# Add the Entry to DB
- # urllib.unquote_plus will not handle unicode so, encode Unicode to
- # represent in 8 bit format and then unquote
- pgfid1, bn1 = urllib.unquote_plus(
- data[6].encode("utf-8")).split("/", 1)
+ pgfid1, bn1 = data[6].split("/", 1)
if self.args.no_encode:
- # No urlencode since no_encode is set, so convert again to Unicode
- # format from previously encoded.
- bn1 = bn1.decode("utf-8").strip()
- else:
- # Quote again the basename
- bn1 = urllib.quote_plus(bn1.strip())
+ bn1 = unquote_plus_space_newline(bn1).strip()
self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1)
def when_rename(self, changelogfile, data):
# E <GFID> RENAME <OLD_PGFID>/<BNAME> <PGFID>/<BNAME>
- pgfid1, bn1 = urllib.unquote_plus(
- data[3].encode("utf-8")).split("/", 1)
- pgfid2, bn2 = urllib.unquote_plus(
- data[4].encode("utf-8")).split("/", 1)
+ pgfid1, bn1 = data[3].split("/", 1)
+ pgfid2, bn2 = data[4].split("/", 1)
if self.args.no_encode:
- # Quote again the basename
- bn1 = bn1.decode("utf-8").strip()
- bn2 = bn2.decode("utf-8").strip()
- else:
- # Quote again the basename
- bn1 = urllib.quote_plus(bn1.strip())
- bn2 = urllib.quote_plus(bn2.strip())
+ bn1 = unquote_plus_space_newline(bn1).strip()
+ bn2 = unquote_plus_space_newline(bn2).strip()
if self.gfidpath_exists({"gfid": data[1], "type": "NEW",
"pgfid1": pgfid1, "bn1": bn1}):
@@ -392,14 +375,9 @@ class ChangelogData(object):
def when_link_symlink(self, changelogfile, data):
# E <GFID> <LINK|SYMLINK> <PGFID>/<BASENAME>
# Add as New record in Db as Type NEW
- pgfid1, bn1 = urllib.unquote_plus(
- data[3].encode("utf-8")).split("/", 1)
+ pgfid1, bn1 = data[3].split("/", 1)
if self.args.no_encode:
- # Quote again the basename
- bn1 = bn1.decode("utf-8").strip()
- else:
- # Quote again the basename
- bn1 = urllib.quote_plus(bn1.strip())
+ bn1 = unquote_plus_space_newline(bn1).strip()
self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1)
@@ -411,18 +389,14 @@ class ChangelogData(object):
def when_unlink_rmdir(self, changelogfile, data):
# E <GFID> <UNLINK|RMDIR> <PGFID>/<BASENAME>
- pgfid1, bn1 = urllib.unquote_plus(
- data[3].encode("utf-8")).split("/", 1)
+ pgfid1, bn1 = data[3].split("/", 1)
if self.args.no_encode:
- bn1 = bn1.decode("utf-8").strip()
- else:
- # Quote again the basename
- bn1 = urllib.quote_plus(bn1.strip())
+ bn1 = unquote_plus_space_newline(bn1).strip()
deleted_path = data[4] if len(data) == 5 else ""
if deleted_path != "":
- deleted_path = urllib.unquote_plus(deleted_path.encode("utf-8"))
+ deleted_path = unquote_plus_space_newline(deleted_path)
deleted_path = output_path_prepare(deleted_path, self.args)
if self.gfidpath_exists({"gfid": data[1], "type": "NEW",
diff --git a/tools/glusterfind/src/conf.py b/tools/glusterfind/src/conf.py
index d73fee42aad..d91746bda13 100644
--- a/tools/glusterfind/src/conf.py
+++ b/tools/glusterfind/src/conf.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
diff --git a/tools/glusterfind/src/libgfchangelog.py b/tools/glusterfind/src/libgfchangelog.py
index dd8153e4e61..0f6b40d6c9c 100644
--- a/tools/glusterfind/src/libgfchangelog.py
+++ b/tools/glusterfind/src/libgfchangelog.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
index 3d0f02a65d4..e7e9889569c 100644
--- a/tools/glusterfind/src/main.py
+++ b/tools/glusterfind/src/main.py
@@ -21,13 +21,13 @@ import shutil
import tempfile
import signal
from datetime import datetime
+import codecs
from utils import execute, is_host_local, mkdirp, fail
from utils import setup_logger, human_time, handle_rm_error
from utils import get_changelog_rollover_time, cache_output, create_file
import conf
from changelogdata import OutputMerger
-import codecs
PROG_DESCRIPTION = """
GlusterFS Incremental API
@@ -481,10 +481,9 @@ def write_output(outfile, outfilemerger, field_separator):
for p in paths:
if p == "":
continue
- p_rep = p.replace("%2F%2F", "%2F").replace("//", "/")
+ p_rep = p.replace("//", "/")
if not row_2_rep:
- row_2_rep = row[2].replace("%2F%2F", "%2F").replace("//",
- "/")
+ row_2_rep = row[2].replace("//", "/")
if p_rep == row_2_rep:
continue
diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py
index b08233e4a9f..c24258e6ef8 100644
--- a/tools/glusterfind/src/utils.py
+++ b/tools/glusterfind/src/utils.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
@@ -16,10 +15,12 @@ import xml.etree.cElementTree as etree
import logging
import os
from datetime import datetime
-import urllib
ROOT_GFID = "00000000-0000-0000-0000-000000000001"
DEFAULT_CHANGELOG_INTERVAL = 15
+SPACE_ESCAPE_CHAR = "%20"
+NEWLINE_ESCAPE_CHAR = "%0A"
+PERCENTAGE_ESCAPE_CHAR = "%25"
ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError
cache_data = {}
@@ -84,7 +85,7 @@ def output_write(f, path, prefix=".", encode=False, tag="",
path = os.path.join(prefix, path)
if encode:
- path = urllib.quote_plus(path)
+ path = quote_plus_space_newline(path)
# set the field separator
FS = "" if tag == "" else field_separator
@@ -246,4 +247,16 @@ def output_path_prepare(path, args):
if args.no_encode:
return path
else:
- return urllib.quote_plus(path.encode("utf-8"))
+ return quote_plus_space_newline(path)
+
+
+def unquote_plus_space_newline(s):
+ return s.replace(SPACE_ESCAPE_CHAR, " ")\
+ .replace(NEWLINE_ESCAPE_CHAR, "\n")\
+ .replace(PERCENTAGE_ESCAPE_CHAR, "%")
+
+
+def quote_plus_space_newline(s):
+ return s.replace("%", PERCENTAGE_ESCAPE_CHAR)\
+ .replace(" ", SPACE_ESCAPE_CHAR)\
+ .replace("\n", NEWLINE_ESCAPE_CHAR)