From fddb5d4a918affe7837d523b56e53e33f3ae5408 Mon Sep 17 00:00:00 2001 From: Prashanth Pai Date: Fri, 11 Mar 2016 17:56:46 +0530 Subject: Open object only if it's going to be read Open()ing an object is necessarry only in two cases: * Serving a GET request * Recalculating etag when metadata is stale (can be triggered by any type of request) This change ensures that for requests other than GET, a file is not opened if the metadata is valid (size and etag accurate). Note that if metadata is stale, the file is still opened and read to compute etag. This patch does not change the behaviour of triggering metadata validation and regeneration for non-GET requests. Change-Id: Icefa4dec7d715ec9e6dd68ae7fe89a0d90fe71b3 Signed-off-by: Prashanth Pai Reviewed-on: http://review.gluster.org/13684 Reviewed-by: Thiago da Silva Tested-by: Thiago da Silva --- gluster/swift/obj/diskfile.py | 44 ++++++++++++++++++++++++++++++++------ test/unit/obj/test_diskfile.py | 48 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 6 deletions(-) diff --git a/gluster/swift/obj/diskfile.py b/gluster/swift/obj/diskfile.py index 14625d9..2e0837e 100644 --- a/gluster/swift/obj/diskfile.py +++ b/gluster/swift/obj/diskfile.py @@ -613,11 +613,13 @@ class DiskFile(object): raise DiskFileNotExist raise try: - self._stat = do_fstat(self._fd) + if not self._stat: + self._stat = do_fstat(self._fd) self._is_dir = stat.S_ISDIR(self._stat.st_mode) obj_size = self._stat.st_size - self._metadata = read_metadata(self._fd) + if not self._metadata: + self._metadata = read_metadata(self._fd) if not validate_object(self._metadata, self._stat): self._metadata = create_object_metadata(self._fd, self._stat, self._metadata) @@ -731,12 +733,42 @@ class DiskFile(object): This method is invoked by Swift code in POST, PUT, HEAD and DELETE path metadata = disk_file.read_metadata() + The operations performed here is very similar to those made in open(). + This is to avoid opening and closing of file (two syscalls over + network). IOW, this optimization addresses the case where the fd + returned by open() isn't going to be used i.e the file is not read (GET + or metadata recalculation) + :returns: metadata dictionary for an object :raises DiskFileError: this implementation will raise the same errors as the `open()` method. """ - with self.open(): - return self.get_metadata() + try: + self._metadata = read_metadata(self._data_file) + except (OSError, IOError) as err: + if err.errno in (errno.ENOENT, errno.ESTALE): + raise DiskFileNotExist + raise err + + if self._metadata and self._is_object_expired(self._metadata): + raise DiskFileExpired(metadata=self._metadata) + + try: + self._stat = do_stat(self._data_file) + except (OSError, IOError) as err: + if err.errno in (errno.ENOENT, errno.ESTALE): + raise DiskFileNotExist + raise err + + if not validate_object(self._metadata, self._stat): + # Metadata is stale/invalid. So open the object for reading + # to update Etag and other metadata. + with self.open(): + return self._metadata + else: + # Metadata is valid. Don't have to open the file. + self._filter_metadata() + return self._metadata def reader(self, iter_hook=None, keep_cache=False): """ @@ -1046,9 +1078,9 @@ class DiskFile(object): errors as the `create()` method. """ try: - metadata = read_metadata(self._data_file) + metadata = self._metadata or read_metadata(self._data_file) except (IOError, OSError) as err: - if err.errno != errno.ENOENT: + if err.errno not in (errno.ESTALE, errno.ENOENT): raise else: if metadata[X_TIMESTAMP] >= timestamp: diff --git a/test/unit/obj/test_diskfile.py b/test/unit/obj/test_diskfile.py index 9ef186b..b190526 100644 --- a/test/unit/obj/test_diskfile.py +++ b/test/unit/obj/test_diskfile.py @@ -205,6 +205,54 @@ class TestDiskFile(unittest.TestCase): self.assertRaises(DiskFileNotOpen, gdf.reader) self.assertRaises(DiskFileNotOpen, gdf.__enter__) + def test_read_metadata_optimize_open_close(self): + the_path = os.path.join(self.td, "vol0", "bar") + the_file = os.path.join(the_path, "z") + os.makedirs(the_path) + with open(the_file, "wb") as fd: + fd.write("1234") + init_md = { + 'X-Type': 'Object', + 'X-Object-Type': 'file', + 'Content-Length': 4, + 'ETag': md5("1234").hexdigest(), + 'X-Timestamp': normalize_timestamp(os.stat(the_file).st_ctime), + 'Content-Type': 'application/octet-stream'} + _metadata[_mapit(the_file)] = init_md + gdf = self._get_diskfile("vol0", "p57", "ufo47", "bar", "z") + assert gdf._obj == "z" + assert gdf._fd is None + assert gdf._disk_file_open is False + assert gdf._metadata is None + assert not gdf._is_dir + + # Case 1 + # Ensure that reading metadata for non-GET requests + # does not incur opening and closing the file when + # metadata is NOT stale. + mock_open = Mock() + mock_close = Mock() + with mock.patch("gluster.swift.obj.diskfile.do_open", mock_open): + with mock.patch("gluster.swift.obj.diskfile.do_close", mock_close): + md = gdf.read_metadata() + self.assertEqual(md, init_md) + self.assertFalse(mock_open.called) + self.assertFalse(mock_close.called) + + # Case 2 + # Ensure that reading metadata for non-GET requests + # still opens and reads the file when metadata is stale + with open(the_file, "a") as fd: + # Append to the existing file to make the stored metadata + # invalid/stale. + fd.write("5678") + md = gdf.read_metadata() + # Check that the stale metadata is recalculated to account for + # change in file content + self.assertNotEqual(md, init_md) + self.assertEqual(md['Content-Length'], 8) + self.assertEqual(md['ETag'], md5("12345678").hexdigest()) + def test_open_and_close(self): mock_close = Mock() -- cgit