From 43da7927560811c55838a6a1c2d0ee1a52aada40 Mon Sep 17 00:00:00 2001 From: Prashanth Pai Date: Wed, 2 Mar 2016 12:51:22 +0530 Subject: Don't fetch metadata for plain object listing Fetch metadata (xattr) for objects in a container ONLY when the client asks for it (using content-type indicating JSON or XML response). This avoids a lot of unnecessarry stat() and getxattr() calls whose results would anyways be unused. The performance gain is obvious in this case. Change-Id: I4c8b0516dedd04553a5ed450bc855cafbfabada9 Signed-off-by: Prashanth Pai Reviewed-on: http://review.gluster.org/13573 Reviewed-by: Thiago da Silva Tested-by: Thiago da Silva --- gluster/swift/common/DiskDir.py | 14 +++++++++- gluster/swift/container/server.py | 54 ++++++++++++++++++++++++++++++++++++++- test/unit/common/test_diskdir.py | 42 ++++++++++++++++++++++++++++++ 3 files changed, 108 insertions(+), 2 deletions(-) diff --git a/gluster/swift/common/DiskDir.py b/gluster/swift/common/DiskDir.py index 31b923a..36504a6 100644 --- a/gluster/swift/common/DiskDir.py +++ b/gluster/swift/common/DiskDir.py @@ -368,7 +368,8 @@ class DiskDir(DiskCommon): def list_objects_iter(self, limit, marker, end_marker, prefix, delimiter, path=None, - storage_policy_index=0): + storage_policy_index=0, + out_content_type=None): """ Returns tuple of name, created_at, size, content_type, etag. """ @@ -390,6 +391,7 @@ class DiskDir(DiskCommon): if objects: objects.sort() else: + # No objects in container , return empty list return container_list if end_marker: @@ -417,6 +419,16 @@ class DiskDir(DiskCommon): objects = filter_delimiter(objects, delimiter, prefix, marker, path) + if out_content_type == 'text/plain': + # The client is only asking for a plain list of objects and NOT + # asking for any extended information about objects such as + # bytes used or etag. + for obj in objects: + container_list.append((obj, 0, 0, 0, 0)) + if len(container_list) >= limit: + break + return container_list + count = 0 for obj in objects: obj_path = os.path.join(self.datadir, obj) diff --git a/gluster/swift/container/server.py b/gluster/swift/container/server.py index 173976a..e62076a 100644 --- a/gluster/swift/container/server.py +++ b/gluster/swift/container/server.py @@ -23,7 +23,13 @@ from swift.container import server from gluster.swift.common.DiskDir import DiskDir from swift.common.utils import public, timing_stats from swift.common.exceptions import DiskFileNoSpace -from swift.common.swob import HTTPInsufficientStorage +from swift.common.swob import HTTPInsufficientStorage, HTTPNotFound, \ + HTTPPreconditionFailed +from swift.common.request_helpers import get_param, get_listing_content_type, \ + split_and_validate_path +from swift.common.constraints import check_mount +from swift.container.server import gen_resp_headers +from swift.common import constraints class ContainerController(server.ContainerController): @@ -77,6 +83,52 @@ class ContainerController(server.ContainerController): drive = req.split_path(1, 1, True) return HTTPInsufficientStorage(drive=drive, request=req) + @public + @timing_stats() + def GET(self, req): + """ + Handle HTTP GET request. + + This method is exact copy of swift.container.server.GET() except + that this version of it passes 'out_content_type' information to + broker.list_objects_iter() + """ + drive, part, account, container, obj = split_and_validate_path( + req, 4, 5, True) + path = get_param(req, 'path') + prefix = get_param(req, 'prefix') + delimiter = get_param(req, 'delimiter') + if delimiter and (len(delimiter) > 1 or ord(delimiter) > 254): + # delimiters can be made more flexible later + return HTTPPreconditionFailed(body='Bad delimiter') + marker = get_param(req, 'marker', '') + end_marker = get_param(req, 'end_marker') + limit = constraints.CONTAINER_LISTING_LIMIT + given_limit = get_param(req, 'limit') + if given_limit and given_limit.isdigit(): + limit = int(given_limit) + if limit > constraints.CONTAINER_LISTING_LIMIT: + return HTTPPreconditionFailed( + request=req, + body='Maximum limit is %d' + % constraints.CONTAINER_LISTING_LIMIT) + out_content_type = get_listing_content_type(req) + if self.mount_check and not check_mount(self.root, drive): + return HTTPInsufficientStorage(drive=drive, request=req) + broker = self._get_container_broker(drive, part, account, container, + pending_timeout=0.1, + stale_reads_ok=True) + info, is_deleted = broker.get_info_is_deleted() + resp_headers = gen_resp_headers(info, is_deleted=is_deleted) + if is_deleted: + return HTTPNotFound(request=req, headers=resp_headers) + container_list = broker.list_objects_iter( + limit, marker, end_marker, prefix, delimiter, path, + storage_policy_index=info['storage_policy_index'], + out_content_type=out_content_type) + return self.create_listing(req, out_content_type, info, resp_headers, + broker.metadata, container_list, container) + def app_factory(global_conf, **local_conf): """paste.deploy app factory for creating WSGI container server apps.""" diff --git a/test/unit/common/test_diskdir.py b/test/unit/common/test_diskdir.py index 0ad607b..3c91016 100644 --- a/test/unit/common/test_diskdir.py +++ b/test/unit/common/test_diskdir.py @@ -801,6 +801,48 @@ class TestContainerBroker(unittest.TestCase): self.assertEquals([row[0] for row in listing], ['pets/fish/a', 'pets/fish/b']) + def test_list_objects_iter_plain_listing(self): + broker = self._get_broker(account='a', container='c') + broker.initialize(self.initial_ts) + for obj1 in xrange(10): + for obj2 in xrange(10): + # Create 100 objects + self._create_file('dir%d/obj%d' % (obj1, obj2)) + + # Check and assert that only name is fetched. + listing = broker.list_objects_iter(500, '', None, None, '', + out_content_type="text/plain") + self.assertEquals(len(listing), 100) + for (name, ts, clen, ctype, etag) in listing: + self.assertEqual(ts, 0) + self.assertEqual(clen, 0) + self.assertEqual(ctype, 0) + self.assertEqual(etag, 0) + + # Check that limit is still honored. + listing = broker.list_objects_iter(25, '', None, None, '', + out_content_type="text/plain") + self.assertEquals(len(listing), 25) + + # Confirm that metadata of objects (xattrs) are not fetched when + # out_content_type is text/plain + _m_r_md = Mock(return_value={}) + with patch('gluster.swift.common.utils.read_metadata', _m_r_md): + listing = broker.list_objects_iter(500, '', None, None, '', + out_content_type="text/plain") + self.assertEquals(len(listing), 100) + # 10 getxattr() calls for 10 directories, no getxattr() on objects + self.assertEqual(_m_r_md.call_count, 10) + + # Confirm that metadata of objects (xattrs) are still fetched when + # out_content_type is NOT text/plain + _m_r_md.reset_mock() + with patch('gluster.swift.common.utils.read_metadata', _m_r_md): + listing = broker.list_objects_iter(500, '', None, None, '') + self.assertEquals(len(listing), 100) + # 10 getxattr() calls for 10 directories and 100 more for 100 objects + self.assertEqual(_m_r_md.call_count, 110) + def test_double_check_trailing_delimiter(self): # Test swift.common.db.ContainerBroker.list_objects_iter for a # container that has an odd file with a trailing delimiter -- cgit