From 6478b569c379dd0520ac3a46789284af5eb6cb4d Mon Sep 17 00:00:00 2001 From: Peter Portante Date: Tue, 23 Oct 2012 11:47:44 -0400 Subject: object-storeage: refactor to use one memcache key Address BZ 868087: https://bugzilla.redhat.com/show_bug.cgi?id=868087 Store all of the data needed to generate the correct set of container and account details in one object, respectively, rather than using three seperate memcache keys. Change-Id: I46bf60c405b37cdb22727965bfd67bc5c410e77c BUG: 868087 Signed-off-by: Peter Portante Reviewed-on: http://review.gluster.org/4139 Reviewed-by: Kaleb KEITHLEY Tested-by: Gluster Build System --- swift/1.4.8/plugins/utils.py | 139 ++++----- .../test/unit/plugins/data/account_tree.tar.bz2 | Bin 0 -> 228 bytes .../test/unit/plugins/data/container_tree.tar.bz2 | Bin 0 -> 282 bytes swift/1.4.8/test/unit/plugins/test_utils.py | 320 +++++++++++++++++++++ 4 files changed, 394 insertions(+), 65 deletions(-) create mode 100644 swift/1.4.8/test/unit/plugins/data/account_tree.tar.bz2 create mode 100644 swift/1.4.8/test/unit/plugins/data/container_tree.tar.bz2 (limited to 'swift/1.4.8') diff --git a/swift/1.4.8/plugins/utils.py b/swift/1.4.8/plugins/utils.py index 1e666d709..c011b681f 100644 --- a/swift/1.4.8/plugins/utils.py +++ b/swift/1.4.8/plugins/utils.py @@ -49,6 +49,9 @@ DEFAULT_UID = -1 DEFAULT_GID = -1 PICKLE_PROTOCOL = 2 CHUNK_SIZE = 65536 +MEMCACHE_KEY_PREFIX = 'gluster.swift.' +MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX = MEMCACHE_KEY_PREFIX + 'account.details.' +MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX = MEMCACHE_KEY_PREFIX + 'container.details.' def mkdirs(path): @@ -421,108 +424,114 @@ def update_list(path, const_path, dirs=[], files=[], object_count=0, obj_list) return object_count, bytes_used -def get_container_details_from_fs(cont_path, const_path, - memcache=None): + +class ContainerDetails(object): + def __init__(self, bytes_used, object_count, obj_list, dir_list): + self.bytes_used = bytes_used + self.object_count = object_count + self.obj_list = obj_list + self.dir_list = dir_list + + +def _get_container_details_from_fs(cont_path): """ get container details by traversing the filesystem """ bytes_used = 0 object_count = 0 - obj_list=[] + obj_list = [] dir_list = [] if os.path.isdir(cont_path): for (path, dirs, files) in os.walk(cont_path): - object_count, bytes_used = update_list(path, const_path, dirs, files, + object_count, bytes_used = update_list(path, cont_path, dirs, files, object_count, bytes_used, obj_list) - dir_list.append(path + ':' + str(do_stat(path).st_mtime)) - - if memcache: - memcache.set(strip_obj_storage_path(cont_path), obj_list) - memcache.set(strip_obj_storage_path(cont_path) + '-dir_list', - ','.join(dir_list)) - memcache.set(strip_obj_storage_path(cont_path) + '-cont_meta', - [object_count, bytes_used]) - - return obj_list, object_count, bytes_used - -def get_container_details_from_memcache(cont_path, const_path, - memcache): - """ - get container details stored in memcache - """ - - bytes_used = 0 - object_count = 0 - obj_list=[] - - dir_contents = memcache.get(strip_obj_storage_path(cont_path) + '-dir_list') - if not dir_contents: - return get_container_details_from_fs(cont_path, const_path, - memcache=memcache) - - for i in dir_contents.split(','): - path, mtime = i.split(':') - if mtime != str(do_stat(path).st_mtime): - return get_container_details_from_fs(cont_path, const_path, - memcache=memcache) + dir_list.append((path, do_stat(path).st_mtime)) - obj_list = memcache.get(strip_obj_storage_path(cont_path)) - - object_count, bytes_used = memcache.get(strip_obj_storage_path(cont_path) + '-cont_meta') - - return obj_list, object_count, bytes_used + return ContainerDetails(bytes_used, object_count, obj_list, dir_list) def get_container_details(cont_path, memcache=None): """ Return object_list, object_count and bytes_used. """ + mkey = '' if memcache: - object_list, object_count, bytes_used = get_container_details_from_memcache(cont_path, cont_path, - memcache) + mkey = MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + strip_obj_storage_path(cont_path) + cd = memcache.get(mkey) + if cd: + if not cd.dir_list: + cd = None + else: + for (path, mtime) in cd.dir_list: + if mtime != do_stat(path).st_mtime: + cd = None else: - object_list, object_count, bytes_used = get_container_details_from_fs(cont_path, cont_path) + cd = None + if not cd: + cd = _get_container_details_from_fs(cont_path) + if memcache: + memcache.set(mkey, cd) + return cd.obj_list, cd.object_count, cd.bytes_used + + +class AccountDetails(object): + """ A simple class to store the three pieces of information associated + with an account: + + 1. The last known modification time + 2. The count of containers in the following list + 3. The list of containers + """ + def __init__(self, mtime, container_count, container_list): + self.mtime = mtime + self.container_count = container_count + self.container_list = container_list - return object_list, object_count, bytes_used -def get_account_details_from_fs(acc_path, memcache=None): +def _get_account_details_from_fs(acc_path, acc_stats): container_list = [] container_count = 0 - if os.path.isdir(acc_path): + if not acc_stats: + acc_stats = do_stat(acc_path) + is_dir = (acc_stats.st_mode & 0040000) != 0 + if is_dir: for name in do_listdir(acc_path): - if not os.path.isdir(acc_path + '/' + name) or \ - name.lower() == 'tmp' or name.lower() == 'async_pending': + if name.lower() == 'tmp' \ + or name.lower() == 'async_pending' \ + or not os.path.isdir(os.path.join(acc_path, name)): continue container_count += 1 container_list.append(name) - if memcache: - memcache.set(strip_obj_storage_path(acc_path) + '_container_list', container_list) - memcache.set(strip_obj_storage_path(acc_path)+'_mtime', str(do_stat(acc_path).st_mtime)) - memcache.set(strip_obj_storage_path(acc_path)+'_container_count', container_count) - - return container_list, container_count - -def get_account_details_from_memcache(acc_path, memcache=None): - if memcache: - mtime = memcache.get(strip_obj_storage_path(acc_path)+'_mtime') - if not mtime or mtime != str(do_stat(acc_path).st_mtime): - return get_account_details_from_fs(acc_path, memcache) - container_list = memcache.get(strip_obj_storage_path(acc_path) + '_container_list') - container_count = memcache.get(strip_obj_storage_path(acc_path)+'_container_count') - return container_list, container_count + return AccountDetails(acc_stats.st_mtime, container_count, container_list) def get_account_details(acc_path, memcache=None): """ Return container_list and container_count. """ + acc_stats = None + mkey = '' if memcache: - return get_account_details_from_memcache(acc_path, memcache) + mkey = MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + strip_obj_storage_path(acc_path) + ad = memcache.get(mkey) + if ad: + # FIXME: Do we really need to stat the file? If we are object + # only, then we can track the other Swift HTTP APIs that would + # modify the account and invalidate the cached entry there. If we + # are not object only, are we even called on this path? + acc_stats = do_stat(acc_path) + if ad.mtime != acc_stats.st_mtime: + ad = None else: - return get_account_details_from_fs(acc_path, memcache) + ad = None + if not ad: + ad = _get_account_details_from_fs(acc_path, acc_stats) + if memcache: + memcache.set(mkey, ad) + return ad.container_list, ad.container_count def _get_etag(path): etag = md5() diff --git a/swift/1.4.8/test/unit/plugins/data/account_tree.tar.bz2 b/swift/1.4.8/test/unit/plugins/data/account_tree.tar.bz2 new file mode 100644 index 000000000..cb23e4dd7 Binary files /dev/null and b/swift/1.4.8/test/unit/plugins/data/account_tree.tar.bz2 differ diff --git a/swift/1.4.8/test/unit/plugins/data/container_tree.tar.bz2 b/swift/1.4.8/test/unit/plugins/data/container_tree.tar.bz2 new file mode 100644 index 000000000..b4a149285 Binary files /dev/null and b/swift/1.4.8/test/unit/plugins/data/container_tree.tar.bz2 differ diff --git a/swift/1.4.8/test/unit/plugins/test_utils.py b/swift/1.4.8/test/unit/plugins/test_utils.py index 0731c7865..fb0cb1f2c 100644 --- a/swift/1.4.8/test/unit/plugins/test_utils.py +++ b/swift/1.4.8/test/unit/plugins/test_utils.py @@ -22,6 +22,8 @@ import xattr import cPickle as pickle import tempfile import hashlib +import tarfile +import shutil from swift.common.utils import normalize_timestamp from collections import defaultdict from plugins import utils @@ -105,6 +107,17 @@ def _destroyxattr(): global _xattrs; _xattrs = None +class SimMemcache(object): + def __init__(self): + self._d = {} + + def get(self, key): + return self._d.get(key, None) + + def set(self, key, value): + self._d[key] = value + + class TestUtils(unittest.TestCase): """ Tests for plugins.utils """ @@ -496,3 +509,310 @@ class TestUtils(unittest.TestCase): assert md[utils.X_CONTAINER_COUNT] == (0, 0) finally: os.rmdir(td) + + def test_container_details_uncached(self): + the_path = "/tmp/bar" + def mock_get_container_details_from_fs(cont_path): + bu = 5 + oc = 1 + ol = ['foo',] + dl = [('a',100),] + return utils.ContainerDetails(bu, oc, ol, dl) + orig_gcdff = utils._get_container_details_from_fs + utils._get_container_details_from_fs = mock_get_container_details_from_fs + try: + retval = utils.get_container_details(the_path) + cd = mock_get_container_details_from_fs(the_path) + assert retval == (cd.obj_list, cd.object_count, cd.bytes_used) + finally: + utils._get_container_details_from_fs = orig_gcdff + + def test_container_details_cached_hit(self): + mc = SimMemcache() + the_path = "/tmp/bar" + def mock_get_container_details_from_fs(cont_path, bu_p=5): + bu = bu_p + oc = 1 + ol = ['foo',] + dl = [('a',100),] + return utils.ContainerDetails(bu, oc, ol, dl) + def mock_do_stat(path): + class MockStat(object): + def __init__(self, mtime): + self.st_mtime = mtime + return MockStat(100) + cd = mock_get_container_details_from_fs(the_path, bu_p=6) + mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path), cd) + orig_gcdff = utils._get_container_details_from_fs + utils._get_container_details_from_fs = mock_get_container_details_from_fs + orig_ds = utils.do_stat + utils.do_stat = mock_do_stat + try: + retval = utils.get_container_details(the_path, memcache=mc) + # If it did not properly use memcache, the default mocked version + # of get details from fs would return 5 bytes used instead of the + # 6 we specified above. + cd = mock_get_container_details_from_fs(the_path, bu_p=6) + assert retval == (cd.obj_list, cd.object_count, cd.bytes_used) + finally: + utils._get_container_details_from_fs = orig_gcdff + utils.do_stat = orig_ds + + def test_container_details_cached_miss_key(self): + mc = SimMemcache() + the_path = "/tmp/bar" + def mock_get_container_details_from_fs(cont_path, bu_p=5): + bu = bu_p + oc = 1 + ol = ['foo',] + dl = [('a',100),] + return utils.ContainerDetails(bu, oc, ol, dl) + def mock_do_stat(path): + # Be sure we don't miss due to mtimes not matching + self.fail("do_stat should not have been called") + cd = mock_get_container_details_from_fs(the_path + "u", bu_p=6) + mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path + "u"), cd) + orig_gcdff = utils._get_container_details_from_fs + utils._get_container_details_from_fs = mock_get_container_details_from_fs + orig_ds = utils.do_stat + utils.do_stat = mock_do_stat + try: + retval = utils.get_container_details(the_path, memcache=mc) + cd = mock_get_container_details_from_fs(the_path) + assert retval == (cd.obj_list, cd.object_count, cd.bytes_used) + mkey = utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path) + assert mkey in mc._d + finally: + utils._get_container_details_from_fs = orig_gcdff + utils.do_stat = orig_ds + + def test_container_details_cached_miss_dir_list(self): + mc = SimMemcache() + the_path = "/tmp/bar" + def mock_get_container_details_from_fs(cont_path, bu_p=5): + bu = bu_p + oc = 1 + ol = ['foo',] + dl = [] + return utils.ContainerDetails(bu, oc, ol, dl) + def mock_do_stat(path): + # Be sure we don't miss due to mtimes not matching + self.fail("do_stat should not have been called") + cd = mock_get_container_details_from_fs(the_path, bu_p=6) + mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path), cd) + orig_gcdff = utils._get_container_details_from_fs + utils._get_container_details_from_fs = mock_get_container_details_from_fs + orig_ds = utils.do_stat + utils.do_stat = mock_do_stat + try: + retval = utils.get_container_details(the_path, memcache=mc) + cd = mock_get_container_details_from_fs(the_path) + assert retval == (cd.obj_list, cd.object_count, cd.bytes_used) + mkey = utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path) + assert mkey in mc._d + assert 5 == mc._d[mkey].bytes_used + finally: + utils._get_container_details_from_fs = orig_gcdff + utils.do_stat = orig_ds + + def test_container_details_cached_miss_mtime(self): + mc = SimMemcache() + the_path = "/tmp/bar" + def mock_get_container_details_from_fs(cont_path, bu_p=5): + bu = bu_p + oc = 1 + ol = ['foo',] + dl = [('a',100),] + return utils.ContainerDetails(bu, oc, ol, dl) + def mock_do_stat(path): + # Be sure we miss due to mtimes not matching + class MockStat(object): + def __init__(self, mtime): + self.st_mtime = mtime + return MockStat(200) + cd = mock_get_container_details_from_fs(the_path, bu_p=6) + mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path), cd) + orig_gcdff = utils._get_container_details_from_fs + utils._get_container_details_from_fs = mock_get_container_details_from_fs + orig_ds = utils.do_stat + utils.do_stat = mock_do_stat + try: + retval = utils.get_container_details(the_path, memcache=mc) + cd = mock_get_container_details_from_fs(the_path) + assert retval == (cd.obj_list, cd.object_count, cd.bytes_used) + mkey = utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path) + assert mkey in mc._d + assert 5 == mc._d[mkey].bytes_used + finally: + utils._get_container_details_from_fs = orig_gcdff + utils.do_stat = orig_ds + + def test_account_details_uncached(self): + the_path = "/tmp/bar" + def mock_get_account_details_from_fs(acc_path, acc_stats): + mt = 100 + cc = 2 + cl = ['a', 'b'] + return utils.AccountDetails(mt, cc, cl) + orig_gcdff = utils._get_account_details_from_fs + utils._get_account_details_from_fs = mock_get_account_details_from_fs + try: + retval = utils.get_account_details(the_path) + ad = mock_get_account_details_from_fs(the_path, None) + assert retval == (ad.container_list, ad.container_count) + finally: + utils._get_account_details_from_fs = orig_gcdff + + def test_account_details_cached_hit(self): + mc = SimMemcache() + the_path = "/tmp/bar" + def mock_get_account_details_from_fs(acc_path, acc_stats): + mt = 100 + cc = 2 + cl = ['a', 'b'] + return utils.AccountDetails(mt, cc, cl) + def mock_do_stat(path): + class MockStat(object): + def __init__(self, mtime): + self.st_mtime = mtime + return MockStat(100) + ad = mock_get_account_details_from_fs(the_path, None) + ad.container_list = ['x', 'y'] + mc.set(utils.MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path), ad) + orig_gcdff = utils._get_account_details_from_fs + orig_ds = utils.do_stat + utils._get_account_details_from_fs = mock_get_account_details_from_fs + utils.do_stat = mock_do_stat + try: + retval = utils.get_account_details(the_path, memcache=mc) + assert retval == (ad.container_list, ad.container_count) + wrong_ad = mock_get_account_details_from_fs(the_path, None) + assert wrong_ad != ad + finally: + utils._get_account_details_from_fs = orig_gcdff + utils.do_stat = orig_ds + + def test_account_details_cached_miss(self): + mc = SimMemcache() + the_path = "/tmp/bar" + def mock_get_account_details_from_fs(acc_path, acc_stats): + mt = 100 + cc = 2 + cl = ['a', 'b'] + return utils.AccountDetails(mt, cc, cl) + def mock_do_stat(path): + class MockStat(object): + def __init__(self, mtime): + self.st_mtime = mtime + return MockStat(100) + ad = mock_get_account_details_from_fs(the_path, None) + ad.container_list = ['x', 'y'] + mc.set(utils.MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path + 'u'), ad) + orig_gcdff = utils._get_account_details_from_fs + orig_ds = utils.do_stat + utils._get_account_details_from_fs = mock_get_account_details_from_fs + utils.do_stat = mock_do_stat + try: + retval = utils.get_account_details(the_path, memcache=mc) + correct_ad = mock_get_account_details_from_fs(the_path, None) + assert retval == (correct_ad.container_list, correct_ad.container_count) + assert correct_ad != ad + finally: + utils._get_account_details_from_fs = orig_gcdff + utils.do_stat = orig_ds + + def test_account_details_cached_miss_mtime(self): + mc = SimMemcache() + the_path = "/tmp/bar" + def mock_get_account_details_from_fs(acc_path, acc_stats): + mt = 100 + cc = 2 + cl = ['a', 'b'] + return utils.AccountDetails(mt, cc, cl) + def mock_do_stat(path): + class MockStat(object): + def __init__(self, mtime): + self.st_mtime = mtime + return MockStat(100) + ad = mock_get_account_details_from_fs(the_path, None) + ad.container_list = ['x', 'y'] + ad.mtime = 200 + mc.set(utils.MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path), ad) + orig_gcdff = utils._get_account_details_from_fs + orig_ds = utils.do_stat + utils._get_account_details_from_fs = mock_get_account_details_from_fs + utils.do_stat = mock_do_stat + try: + retval = utils.get_account_details(the_path, memcache=mc) + correct_ad = mock_get_account_details_from_fs(the_path, None) + assert retval == (correct_ad.container_list, correct_ad.container_count) + assert correct_ad != ad + finally: + utils._get_account_details_from_fs = orig_gcdff + utils.do_stat = orig_ds + + def test_get_container_details_from_fs(self): + td = tempfile.mkdtemp() + tf = tarfile.open("plugins/data/account_tree.tar.bz2", "r:bz2") + orig_cwd = os.getcwd() + os.chdir(td) + tf.extractall() + try: + ad = utils._get_account_details_from_fs(td, None) + assert ad.mtime == os.path.getmtime(td) + assert ad.container_count == 3 + assert set(ad.container_list) == set(['c1', 'c2', 'c3']) + finally: + os.chdir(orig_cwd) + shutil.rmtree(td) + + def test_get_container_details_from_fs_notadir(self): + tf = tempfile.NamedTemporaryFile() + cd = utils._get_container_details_from_fs(tf.name) + assert cd.bytes_used == 0 + assert cd.object_count == 0 + assert cd.obj_list == [] + assert cd.dir_list == [] + + def test_get_account_details_from_fs(self): + td = tempfile.mkdtemp() + tf = tarfile.open("plugins/data/container_tree.tar.bz2", "r:bz2") + orig_cwd = os.getcwd() + os.chdir(td) + tf.extractall() + try: + cd = utils._get_container_details_from_fs(td) + assert cd.bytes_used == 30, repr(cd.bytes_used) + assert cd.object_count == 8, repr(cd.object_count) + assert cd.obj_list == ['file1', 'file3', 'file2', + 'dir3', 'dir1', 'dir2', + 'dir1/file1', 'dir1/file2' + ], repr(cd.obj_list) + full_dir1 = os.path.join(td, 'dir1') + full_dir2 = os.path.join(td, 'dir2') + full_dir3 = os.path.join(td, 'dir3') + exp_dir_dict = { td: os.path.getmtime(td), + full_dir1: os.path.getmtime(full_dir1), + full_dir2: os.path.getmtime(full_dir2), + full_dir3: os.path.getmtime(full_dir3), + } + for d,m in cd.dir_list: + assert d in exp_dir_dict + assert exp_dir_dict[d] == m + finally: + os.chdir(orig_cwd) + shutil.rmtree(td) + + def test_get_account_details_from_fs_notadir_w_stats(self): + tf = tempfile.NamedTemporaryFile() + ad = utils._get_account_details_from_fs(tf.name, os.stat(tf.name)) + assert ad.mtime == os.path.getmtime(tf.name) + assert ad.container_count == 0 + assert ad.container_list == [] + + def test_get_account_details_from_fs_notadir(self): + tf = tempfile.NamedTemporaryFile() + ad = utils._get_account_details_from_fs(tf.name, None) + assert ad.mtime == os.path.getmtime(tf.name) + assert ad.container_count == 0 + assert ad.container_list == [] -- cgit