summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPrashanth Pai <ppai@redhat.com>2015-09-08 15:44:09 +0530
committerPrashanth Pai <ppai@redhat.com>2016-01-11 20:47:23 -0800
commitc5d76cdd2e2e99d4ac65b645b17cf8a43e4ccab4 (patch)
tree9266f8a8419d48ab6f19a2bb5ca0988e72f501da
parentac33dc6dbf1f982cf522556aa938ebfb0e6ddded (diff)
Do not use pickle: Use json
Change-Id: Iffdd56704330897fbde21f101c9b2ed03c2ae296 Signed-off-by: Prashanth Pai <ppai@redhat.com> Reviewed-by: Thiago da Silva <tdasilva@redhat.com> Tested-by: Thiago da Silva <tdasilva@redhat.com> Reviewed-on: http://review.gluster.org/13221
-rwxr-xr-xbin/gluster-swift-migrate-metadata162
-rw-r--r--etc/fs.conf-gluster13
-rw-r--r--gluster/swift/common/Glusterfs.py8
-rw-r--r--gluster/swift/common/utils.py70
-rw-r--r--setup.py1
-rw-r--r--test/functional/tests.py12
-rw-r--r--test/unit/common/test_utils.py82
7 files changed, 339 insertions, 9 deletions
diff --git a/bin/gluster-swift-migrate-metadata b/bin/gluster-swift-migrate-metadata
new file mode 100755
index 0000000..2ccf157
--- /dev/null
+++ b/bin/gluster-swift-migrate-metadata
@@ -0,0 +1,162 @@
+#!/usr/bin/env python
+#
+# Copyright (c) 2015 Red Hat, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+# implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import pwd
+import sys
+import stat
+import errno
+import xattr
+import cPickle as pickle
+import multiprocessing
+
+from optparse import OptionParser
+from gluster.swift.common.utils import write_metadata, SafeUnpickler, \
+ METADATA_KEY, MAX_XATTR_SIZE
+
+
+ORIGINAL_EUID = os.geteuid()
+NOBODY_UID = pwd.getpwnam('nobody').pw_uid
+
+
+def print_msg(s):
+ global options
+ if options.verbose:
+ print(s)
+
+
+def clean_metadata(path, key_count):
+ """
+ Can only be used when you know the key_count. Saves one unnecessarry
+ removexattr() call. Ignores error when file or metadata isn't found.
+ """
+ for key in xrange(0, key_count):
+ try:
+ xattr.removexattr(path, '%s%s' % (METADATA_KEY, (key or '')))
+ except IOError as err:
+ if err.errno not in (errno.ENOENT, errno.ESTALE, errno.ENODATA):
+ print_msg("xattr.removexattr(%s, %s%s) failed: %s" %
+ (path, METADATA_KEY, (key or ''), err.errno))
+
+
+def process_object(path):
+
+ metastr = ''
+ key_count = 0
+ try:
+ while True:
+ metastr += xattr.getxattr(path, '%s%s' %
+ (METADATA_KEY, (key_count or '')))
+ key_count += 1
+ if len(metastr) < MAX_XATTR_SIZE:
+ # Prevent further getxattr calls
+ break
+ except IOError as err:
+ if err.errno not in (errno.ENOENT, errno.ESTALE, errno.ENODATA):
+ print_msg("xattr.getxattr(%s, %s%s) failed: %s" %
+ (path, METADATA_KEY, (key_count or ''), err.errno))
+
+ if not metastr:
+ return
+
+ if metastr.startswith('\x80\x02}') and metastr.endswith('.'):
+ # It's pickled. If unpickling is successful and metadata is
+ # not stale write back the metadata by serializing it.
+ try:
+ os.seteuid(NOBODY_UID) # Drop privileges
+ metadata = SafeUnpickler.loads(metastr)
+ os.seteuid(ORIGINAL_EUID) # Restore privileges
+ assert isinstance(metadata, dict)
+ except (pickle.UnpicklingError, EOFError, AttributeError,
+ IndexError, ImportError, AssertionError):
+ clean_metadata(path, key_count)
+ else:
+ try:
+ # Remove existing metadata first before writing new metadata
+ clean_metadata(path, key_count)
+ write_metadata(path, metadata)
+ print_msg("%s MIGRATED" % (path))
+ except IOError as err:
+ if err.errno not in (errno.ENOENT, errno.ESTALE):
+ raise
+ elif metastr.startswith("{") and metastr.endswith("}"):
+ # It's not pickled and is already serialized, just return
+ print_msg("%s SKIPPED" % (path))
+ else:
+ # Metadata is malformed
+ clean_metadata(path, key_count)
+ print_msg("%s CLEANED" % (path))
+
+
+def walktree(top, pool, root=True):
+ """
+ Recursively walk the filesystem tree and migrate metadata of each object
+ found. Unlike os.walk(), this method performs stat() sys call on a
+ file/directory at most only once.
+ """
+
+ if root:
+ # The root of volume is account which also contains metadata
+ pool.apply_async(process_object, (top, ))
+
+ for f in os.listdir(top):
+ if root and f in (".trashcan", ".glusterfs", "async_pending", "tmp"):
+ continue
+ path = os.path.join(top, f)
+ try:
+ s = os.stat(path)
+ except OSError as err:
+ if err.errno in (errno.ENOENT, errno.ESTALE):
+ continue
+ raise
+ if stat.S_ISLNK(s.st_mode):
+ pass
+ elif stat.S_ISDIR(s.st_mode):
+ pool.apply_async(process_object, (path, ))
+ # Recurse into directory
+ walktree(path, pool, root=False)
+ elif stat.S_ISREG(s.st_mode):
+ pool.apply_async(process_object, (path, ))
+
+
+if __name__ == '__main__':
+
+ global options
+
+ usage = "usage: %prog [options] volume1_mountpath volume2_mountpath..."
+ description = """Account, container and object metadata are stored as \
+extended attributes of files and directories. This utility migrates metadata \
+stored in pickled format to JSON format."""
+ parser = OptionParser(usage=usage, description=description)
+ parser.add_option("-v", "--verbose", dest="verbose",
+ action="store_true", default=False,
+ help="Print object paths as they are processed.")
+ (options, mount_paths) = parser.parse_args()
+
+ if len(mount_paths) < 1:
+ print "Mountpoint path(s) missing."
+ parser.print_usage()
+ sys.exit(-1)
+
+ pool = multiprocessing.Pool(multiprocessing.cpu_count() * 2)
+
+ for path in mount_paths:
+ if os.path.isdir(path):
+ walktree(path, pool)
+
+ pool.close()
+ pool.join()
diff --git a/etc/fs.conf-gluster b/etc/fs.conf-gluster
index 6d2a791..31a5e6f 100644
--- a/etc/fs.conf-gluster
+++ b/etc/fs.conf-gluster
@@ -10,4 +10,15 @@ mount_ip = localhost
# numbers of objects, at the expense of an accurate count of combined bytes
# used by all objects in the container. For most installations "off" works
# fine.
-accurate_size_in_listing = off \ No newline at end of file
+accurate_size_in_listing = off
+
+# In older versions of gluster-swift, metadata stored as xattrs of dirs/files
+# were serialized using PICKLE format. The PICKLE format is vulnerable to
+# exploits in deployments where a user has access to backend filesystem over
+# FUSE/SMB. Deserializing pickled metadata can result in malicious code being
+# executed if an attacker has stored malicious code as xattr from filesystem
+# interface. Although, new metadata is always serialized using JSON format,
+# existing metadata already stored in PICKLE format are loaded by default.
+# You can turn this option to 'off' once you have migrated all your metadata
+# from PICKLE format to JSON format using gluster-swift-migrate-metadata tool.
+read_pickled_metadata = on
diff --git a/gluster/swift/common/Glusterfs.py b/gluster/swift/common/Glusterfs.py
index 0098bff..6a2fdb2 100644
--- a/gluster/swift/common/Glusterfs.py
+++ b/gluster/swift/common/Glusterfs.py
@@ -37,6 +37,7 @@ _allow_mount_per_server = False
_implicit_dir_objects = False
_container_update_object_count = False
_account_update_container_count = False
+_read_pickled_metadata = True
if _fs_conf.read(os.path.join(SWIFT_DIR, 'fs.conf')):
try:
@@ -97,6 +98,13 @@ if _fs_conf.read(os.path.join(SWIFT_DIR, 'fs.conf')):
except (NoSectionError, NoOptionError):
pass
+ try:
+ _read_pickled_metadata = _fs_conf.get('DEFAULT',
+ 'read_pickled_metadata',
+ "on") in TRUE_VALUES
+ except (NoSectionError, NoOptionError):
+ pass
+
NAME = 'glusterfs'
diff --git a/gluster/swift/common/utils.py b/gluster/swift/common/utils.py
index 9951132..b6a5a09 100644
--- a/gluster/swift/common/utils.py
+++ b/gluster/swift/common/utils.py
@@ -15,13 +15,17 @@
import os
import stat
+import json
import errno
import logging
from hashlib import md5
from eventlet import sleep
import cPickle as pickle
+from cStringIO import StringIO
+import pickletools
from gluster.swift.common.exceptions import GlusterFileSystemIOError
from swift.common.exceptions import DiskFileNoSpace
+from swift.common.db import utf8encodekeys
from gluster.swift.common.fs_utils import do_getctime, do_getmtime, do_stat, \
do_listdir, do_walk, do_rmdir, do_log_rl, get_filename_from_fd, do_open, \
do_isdir, do_getsize, do_getxattr, do_setxattr, do_removexattr, do_read, \
@@ -57,6 +61,39 @@ PICKLE_PROTOCOL = 2
CHUNK_SIZE = 65536
+class SafeUnpickler(object):
+ """
+ Loading a pickled stream is potentially unsafe and exploitable because
+ the loading process can import modules/classes (via GLOBAL opcode) and
+ run any callable (via REDUCE opcode). As the metadata stored in Swift
+ is just a dictionary, we take away these powerful "features", thus
+ making the loading process safe. Hence, this is very Swift specific
+ and is not a general purpose safe unpickler.
+ """
+
+ __slots__ = 'OPCODE_BLACKLIST'
+ OPCODE_BLACKLIST = ('GLOBAL', 'REDUCE', 'BUILD', 'OBJ', 'NEWOBJ', 'INST',
+ 'EXT1', 'EXT2', 'EXT4')
+
+ @classmethod
+ def find_class(self, module, name):
+ # Do not allow importing of ANY module. This is really redundant as
+ # we block those OPCODEs that results in invocation of this method.
+ raise pickle.UnpicklingError('Potentially unsafe pickle')
+
+ @classmethod
+ def loads(self, string):
+ for opcode in pickletools.genops(string):
+ if opcode[0].name in self.OPCODE_BLACKLIST:
+ raise pickle.UnpicklingError('Potentially unsafe pickle')
+ orig_unpickler = pickle.Unpickler(StringIO(string))
+ orig_unpickler.find_global = self.find_class
+ return orig_unpickler.load()
+
+
+pickle.loads = SafeUnpickler.loads
+
+
def normalize_timestamp(timestamp):
"""
Format a timestamp (string or numeric) into a standardized
@@ -73,7 +110,7 @@ def normalize_timestamp(timestamp):
def serialize_metadata(metadata):
- return pickle.dumps(metadata, PICKLE_PROTOCOL)
+ return json.dumps(metadata, separators=(',', ':'))
def deserialize_metadata(metastr):
@@ -81,16 +118,35 @@ def deserialize_metadata(metastr):
Returns dict populated with metadata if deserializing is successful.
Returns empty dict if deserialzing fails.
"""
- if metastr.startswith('\x80\x02}') and metastr.endswith('.'):
- # Assert that the metadata was indeed pickled before attempting
- # to unpickle. This only *reduces* risk of malicious shell code
- # being executed. However, it does NOT fix anything.
+ if metastr.startswith('\x80\x02}') and metastr.endswith('.') and \
+ Glusterfs._read_pickled_metadata:
+ # Assert that the serialized metadata is pickled using
+ # pickle protocol 2 and is a dictionary.
try:
return pickle.loads(metastr)
- except (pickle.UnpicklingError, EOFError, AttributeError,
- IndexError, ImportError, AssertionError):
+ except Exception:
+ logging.warning("pickle.loads() failed.", exc_info=True)
+ return {}
+ elif metastr.startswith('{') and metastr.endswith('}'):
+
+ def _list_to_tuple(d):
+ for k, v in d.iteritems():
+ if isinstance(v, list):
+ d[k] = tuple(i.encode('utf-8')
+ if isinstance(i, unicode) else i for i in v)
+ if isinstance(v, unicode):
+ d[k] = v.encode('utf-8')
+ return d
+
+ try:
+ metadata = json.loads(metastr, object_hook=_list_to_tuple)
+ utf8encodekeys(metadata)
+ return metadata
+ except (UnicodeDecodeError, ValueError):
+ logging.warning("json.loads() failed.", exc_info=True)
return {}
else:
+ logging.warning("Invalid metadata format (neither PICKLE nor JSON)")
return {}
diff --git a/setup.py b/setup.py
index 5673bdc..214d8f1 100644
--- a/setup.py
+++ b/setup.py
@@ -43,6 +43,7 @@ setup(
scripts=[
'bin/gluster-swift-gen-builders',
'bin/gluster-swift-print-metadata',
+ 'bin/gluster-swift-migrate-metadata',
'gluster/swift/common/middleware/gswauth/bin/gswauth-add-account',
'gluster/swift/common/middleware/gswauth/bin/gswauth-add-user',
'gluster/swift/common/middleware/gswauth/bin/gswauth-cleanup-tokens',
diff --git a/test/functional/tests.py b/test/functional/tests.py
index ad87d7e..b8633b0 100644
--- a/test/functional/tests.py
+++ b/test/functional/tests.py
@@ -1125,6 +1125,9 @@ class TestFile(Base):
self.assert_status(400)
def testMetadataNumberLimit(self):
+ raise SkipTest("Bad test")
+ # TODO(ppai): Fix it in upstream swift first
+ # Refer to comments below
number_limit = load_constraint('max_meta_count')
size_limit = load_constraint('max_meta_overall_size')
@@ -1137,10 +1140,13 @@ class TestFile(Base):
metadata = {}
while len(metadata.keys()) < i:
key = Utils.create_ascii_name()
+ # The following line returns a valid utf8 byte sequence
val = Utils.create_name()
if len(key) > j:
key = key[:j]
+ # This slicing done below can make the 'utf8' byte
+ # sequence invalid and hence it cannot be decoded.
val = val[:j]
size += len(key) + len(val)
@@ -1154,6 +1160,9 @@ class TestFile(Base):
self.assert_status(201)
self.assert_(file_item.sync_metadata())
self.assert_status((201, 202))
+ self.assert_(file_item.initialize())
+ self.assert_status(200)
+ self.assertEqual(file_item.metadata, metadata)
else:
self.assertRaises(ResponseError, file_item.write)
self.assert_status(400)
@@ -1315,6 +1324,9 @@ class TestFile(Base):
self.assert_(file_item.write())
self.assert_status(201)
self.assert_(file_item.sync_metadata())
+ self.assert_(file_item.initialize())
+ self.assert_status(200)
+ self.assertEqual(file_item.metadata, metadata)
else:
self.assertRaises(ResponseError, file_item.write)
self.assert_status(400)
diff --git a/test/unit/common/test_utils.py b/test/unit/common/test_utils.py
index 1fea1fc..0b173be 100644
--- a/test/unit/common/test_utils.py
+++ b/test/unit/common/test_utils.py
@@ -16,6 +16,7 @@
""" Tests for common.utils """
import os
+import json
import unittest
import errno
import xattr
@@ -23,10 +24,12 @@ import tempfile
import hashlib
import tarfile
import shutil
+import cPickle as pickle
from collections import defaultdict
from mock import patch, Mock
from gluster.swift.common import utils, Glusterfs
-from gluster.swift.common.utils import deserialize_metadata, serialize_metadata
+from gluster.swift.common.utils import deserialize_metadata, \
+ serialize_metadata, PICKLE_PROTOCOL
from gluster.swift.common.exceptions import GlusterFileSystemOSError,\
GlusterFileSystemIOError
from swift.common.exceptions import DiskFileNoSpace
@@ -138,6 +141,32 @@ def _mock_os_fsync(fd):
return
+class TestSafeUnpickler(unittest.TestCase):
+
+ class Exploit(object):
+ def __reduce__(self):
+ return (os.system, ('touch /tmp/pickle-exploit',))
+
+ def test_loads(self):
+ valid_md = {'key1': 'val1', 'key2': 'val2'}
+ for protocol in (0, 1, 2):
+ valid_dump = pickle.dumps(valid_md, protocol)
+ mal_dump = pickle.dumps(self.Exploit(), protocol)
+ # malicious dump is appended to valid dump
+ payload1 = valid_dump[:-1] + mal_dump
+ # malicious dump is prefixed to valid dump
+ payload2 = mal_dump[:-1] + valid_dump
+ # entire dump is malicious
+ payload3 = mal_dump
+ for payload in (payload1, payload2, payload3):
+ try:
+ utils.SafeUnpickler.loads(payload)
+ except pickle.UnpicklingError as err:
+ self.assertTrue('Potentially unsafe pickle' in err)
+ else:
+ self.fail("Expecting cPickle.UnpicklingError")
+
+
class TestUtils(unittest.TestCase):
""" Tests for common.utils """
@@ -321,6 +350,57 @@ class TestUtils(unittest.TestCase):
assert _xattr_op_cnt['get'] == 1, "%r" % _xattr_op_cnt
assert _xattr_op_cnt['set'] == 0, "%r" % _xattr_op_cnt
+ def test_deserialize_metadata_pickle(self):
+ orig__read_pickled_metadata = Glusterfs._read_pickled_metadata
+ orig_md = {'key1': 'value1', 'key2': 'value2'}
+ pickled_md = pickle.dumps(orig_md, PICKLE_PROTOCOL)
+ _m_pickle_loads = Mock(return_value={})
+ try:
+ with patch('gluster.swift.common.utils.pickle.loads',
+ _m_pickle_loads):
+ # Conf option turned off
+ Glusterfs._read_pickled_metadata = False
+ # pickled
+ utils.deserialize_metadata(pickled_md)
+ self.assertFalse(_m_pickle_loads.called)
+ _m_pickle_loads.reset_mock()
+ # not pickled
+ utils.deserialize_metadata("not_pickle")
+ self.assertFalse(_m_pickle_loads.called)
+ _m_pickle_loads.reset_mock()
+
+ # Conf option turned on
+ Glusterfs._read_pickled_metadata = True
+ # pickled
+ md = utils.deserialize_metadata(pickled_md)
+ self.assertTrue(_m_pickle_loads.called)
+ self.assertTrue(isinstance(md, dict))
+ _m_pickle_loads.reset_mock()
+ # not pickled
+ utils.deserialize_metadata("not_pickle")
+ self.assertFalse(_m_pickle_loads.called)
+ _m_pickle_loads.reset_mock()
+
+ # malformed pickle
+ _m_pickle_loads.side_effect = pickle.UnpicklingError
+ md = utils.deserialize_metadata("malformed_pickle")
+ self.assertTrue(isinstance(md, dict))
+ finally:
+ Glusterfs._read_pickled_metadata = orig__read_pickled_metadata
+
+ def test_deserialize_metadata_json(self):
+ orig_md = {'key1': 'value1', 'key2': 'value2'}
+ json_md = json.dumps(orig_md, separators=(',', ':'))
+ _m_json_loads = Mock(return_value={})
+ with patch('gluster.swift.common.utils.json.loads',
+ _m_json_loads):
+ utils.deserialize_metadata("not_json")
+ self.assertFalse(_m_json_loads.called)
+ _m_json_loads.reset_mock()
+ utils.deserialize_metadata("{fake_valid_json}")
+ self.assertTrue(_m_json_loads.called)
+ _m_json_loads.reset_mock()
+
def test_add_timestamp_empty(self):
orig = {}
res = utils._add_timestamp(orig)