summaryrefslogtreecommitdiffstats
path: root/swift
diff options
context:
space:
mode:
authorPeter Portante <peter.portante@redhat.com>2012-10-23 11:47:44 -0400
committerVijay Bellur <vbellur@redhat.com>2012-11-03 04:37:15 -0700
commit6478b569c379dd0520ac3a46789284af5eb6cb4d (patch)
treef4de56eb5122b1d909e8e65a46f40ee09a36cca6 /swift
parentabc8e5b01bff976e1f6b3c1e2747a2a341bf0f64 (diff)
object-storeage: refactor to use one memcache key
Address BZ 868087: https://bugzilla.redhat.com/show_bug.cgi?id=868087 Store all of the data needed to generate the correct set of container and account details in one object, respectively, rather than using three seperate memcache keys. Change-Id: I46bf60c405b37cdb22727965bfd67bc5c410e77c BUG: 868087 Signed-off-by: Peter Portante <peter.portante@redhat.com> Reviewed-on: http://review.gluster.org/4139 Reviewed-by: Kaleb KEITHLEY <kkeithle@redhat.com> Tested-by: Gluster Build System <jenkins@build.gluster.com>
Diffstat (limited to 'swift')
-rw-r--r--swift/1.4.8/plugins/utils.py139
-rw-r--r--swift/1.4.8/test/unit/plugins/data/account_tree.tar.bz2bin0 -> 228 bytes
-rw-r--r--swift/1.4.8/test/unit/plugins/data/container_tree.tar.bz2bin0 -> 282 bytes
-rw-r--r--swift/1.4.8/test/unit/plugins/test_utils.py320
4 files changed, 394 insertions, 65 deletions
diff --git a/swift/1.4.8/plugins/utils.py b/swift/1.4.8/plugins/utils.py
index 1e666d70999..c011b681f0b 100644
--- a/swift/1.4.8/plugins/utils.py
+++ b/swift/1.4.8/plugins/utils.py
@@ -49,6 +49,9 @@ DEFAULT_UID = -1
DEFAULT_GID = -1
PICKLE_PROTOCOL = 2
CHUNK_SIZE = 65536
+MEMCACHE_KEY_PREFIX = 'gluster.swift.'
+MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX = MEMCACHE_KEY_PREFIX + 'account.details.'
+MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX = MEMCACHE_KEY_PREFIX + 'container.details.'
def mkdirs(path):
@@ -421,108 +424,114 @@ def update_list(path, const_path, dirs=[], files=[], object_count=0,
obj_list)
return object_count, bytes_used
-def get_container_details_from_fs(cont_path, const_path,
- memcache=None):
+
+class ContainerDetails(object):
+ def __init__(self, bytes_used, object_count, obj_list, dir_list):
+ self.bytes_used = bytes_used
+ self.object_count = object_count
+ self.obj_list = obj_list
+ self.dir_list = dir_list
+
+
+def _get_container_details_from_fs(cont_path):
"""
get container details by traversing the filesystem
"""
bytes_used = 0
object_count = 0
- obj_list=[]
+ obj_list = []
dir_list = []
if os.path.isdir(cont_path):
for (path, dirs, files) in os.walk(cont_path):
- object_count, bytes_used = update_list(path, const_path, dirs, files,
+ object_count, bytes_used = update_list(path, cont_path, dirs, files,
object_count, bytes_used,
obj_list)
- dir_list.append(path + ':' + str(do_stat(path).st_mtime))
-
- if memcache:
- memcache.set(strip_obj_storage_path(cont_path), obj_list)
- memcache.set(strip_obj_storage_path(cont_path) + '-dir_list',
- ','.join(dir_list))
- memcache.set(strip_obj_storage_path(cont_path) + '-cont_meta',
- [object_count, bytes_used])
-
- return obj_list, object_count, bytes_used
-
-def get_container_details_from_memcache(cont_path, const_path,
- memcache):
- """
- get container details stored in memcache
- """
-
- bytes_used = 0
- object_count = 0
- obj_list=[]
-
- dir_contents = memcache.get(strip_obj_storage_path(cont_path) + '-dir_list')
- if not dir_contents:
- return get_container_details_from_fs(cont_path, const_path,
- memcache=memcache)
-
- for i in dir_contents.split(','):
- path, mtime = i.split(':')
- if mtime != str(do_stat(path).st_mtime):
- return get_container_details_from_fs(cont_path, const_path,
- memcache=memcache)
+ dir_list.append((path, do_stat(path).st_mtime))
- obj_list = memcache.get(strip_obj_storage_path(cont_path))
-
- object_count, bytes_used = memcache.get(strip_obj_storage_path(cont_path) + '-cont_meta')
-
- return obj_list, object_count, bytes_used
+ return ContainerDetails(bytes_used, object_count, obj_list, dir_list)
def get_container_details(cont_path, memcache=None):
"""
Return object_list, object_count and bytes_used.
"""
+ mkey = ''
if memcache:
- object_list, object_count, bytes_used = get_container_details_from_memcache(cont_path, cont_path,
- memcache)
+ mkey = MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + strip_obj_storage_path(cont_path)
+ cd = memcache.get(mkey)
+ if cd:
+ if not cd.dir_list:
+ cd = None
+ else:
+ for (path, mtime) in cd.dir_list:
+ if mtime != do_stat(path).st_mtime:
+ cd = None
else:
- object_list, object_count, bytes_used = get_container_details_from_fs(cont_path, cont_path)
+ cd = None
+ if not cd:
+ cd = _get_container_details_from_fs(cont_path)
+ if memcache:
+ memcache.set(mkey, cd)
+ return cd.obj_list, cd.object_count, cd.bytes_used
+
+
+class AccountDetails(object):
+ """ A simple class to store the three pieces of information associated
+ with an account:
+
+ 1. The last known modification time
+ 2. The count of containers in the following list
+ 3. The list of containers
+ """
+ def __init__(self, mtime, container_count, container_list):
+ self.mtime = mtime
+ self.container_count = container_count
+ self.container_list = container_list
- return object_list, object_count, bytes_used
-def get_account_details_from_fs(acc_path, memcache=None):
+def _get_account_details_from_fs(acc_path, acc_stats):
container_list = []
container_count = 0
- if os.path.isdir(acc_path):
+ if not acc_stats:
+ acc_stats = do_stat(acc_path)
+ is_dir = (acc_stats.st_mode & 0040000) != 0
+ if is_dir:
for name in do_listdir(acc_path):
- if not os.path.isdir(acc_path + '/' + name) or \
- name.lower() == 'tmp' or name.lower() == 'async_pending':
+ if name.lower() == 'tmp' \
+ or name.lower() == 'async_pending' \
+ or not os.path.isdir(os.path.join(acc_path, name)):
continue
container_count += 1
container_list.append(name)
- if memcache:
- memcache.set(strip_obj_storage_path(acc_path) + '_container_list', container_list)
- memcache.set(strip_obj_storage_path(acc_path)+'_mtime', str(do_stat(acc_path).st_mtime))
- memcache.set(strip_obj_storage_path(acc_path)+'_container_count', container_count)
-
- return container_list, container_count
-
-def get_account_details_from_memcache(acc_path, memcache=None):
- if memcache:
- mtime = memcache.get(strip_obj_storage_path(acc_path)+'_mtime')
- if not mtime or mtime != str(do_stat(acc_path).st_mtime):
- return get_account_details_from_fs(acc_path, memcache)
- container_list = memcache.get(strip_obj_storage_path(acc_path) + '_container_list')
- container_count = memcache.get(strip_obj_storage_path(acc_path)+'_container_count')
- return container_list, container_count
+ return AccountDetails(acc_stats.st_mtime, container_count, container_list)
def get_account_details(acc_path, memcache=None):
"""
Return container_list and container_count.
"""
+ acc_stats = None
+ mkey = ''
if memcache:
- return get_account_details_from_memcache(acc_path, memcache)
+ mkey = MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + strip_obj_storage_path(acc_path)
+ ad = memcache.get(mkey)
+ if ad:
+ # FIXME: Do we really need to stat the file? If we are object
+ # only, then we can track the other Swift HTTP APIs that would
+ # modify the account and invalidate the cached entry there. If we
+ # are not object only, are we even called on this path?
+ acc_stats = do_stat(acc_path)
+ if ad.mtime != acc_stats.st_mtime:
+ ad = None
else:
- return get_account_details_from_fs(acc_path, memcache)
+ ad = None
+ if not ad:
+ ad = _get_account_details_from_fs(acc_path, acc_stats)
+ if memcache:
+ memcache.set(mkey, ad)
+ return ad.container_list, ad.container_count
def _get_etag(path):
etag = md5()
diff --git a/swift/1.4.8/test/unit/plugins/data/account_tree.tar.bz2 b/swift/1.4.8/test/unit/plugins/data/account_tree.tar.bz2
new file mode 100644
index 00000000000..cb23e4dd71b
--- /dev/null
+++ b/swift/1.4.8/test/unit/plugins/data/account_tree.tar.bz2
Binary files differ
diff --git a/swift/1.4.8/test/unit/plugins/data/container_tree.tar.bz2 b/swift/1.4.8/test/unit/plugins/data/container_tree.tar.bz2
new file mode 100644
index 00000000000..b4a14928570
--- /dev/null
+++ b/swift/1.4.8/test/unit/plugins/data/container_tree.tar.bz2
Binary files differ
diff --git a/swift/1.4.8/test/unit/plugins/test_utils.py b/swift/1.4.8/test/unit/plugins/test_utils.py
index 0731c78656b..fb0cb1f2c8f 100644
--- a/swift/1.4.8/test/unit/plugins/test_utils.py
+++ b/swift/1.4.8/test/unit/plugins/test_utils.py
@@ -22,6 +22,8 @@ import xattr
import cPickle as pickle
import tempfile
import hashlib
+import tarfile
+import shutil
from swift.common.utils import normalize_timestamp
from collections import defaultdict
from plugins import utils
@@ -105,6 +107,17 @@ def _destroyxattr():
global _xattrs; _xattrs = None
+class SimMemcache(object):
+ def __init__(self):
+ self._d = {}
+
+ def get(self, key):
+ return self._d.get(key, None)
+
+ def set(self, key, value):
+ self._d[key] = value
+
+
class TestUtils(unittest.TestCase):
""" Tests for plugins.utils """
@@ -496,3 +509,310 @@ class TestUtils(unittest.TestCase):
assert md[utils.X_CONTAINER_COUNT] == (0, 0)
finally:
os.rmdir(td)
+
+ def test_container_details_uncached(self):
+ the_path = "/tmp/bar"
+ def mock_get_container_details_from_fs(cont_path):
+ bu = 5
+ oc = 1
+ ol = ['foo',]
+ dl = [('a',100),]
+ return utils.ContainerDetails(bu, oc, ol, dl)
+ orig_gcdff = utils._get_container_details_from_fs
+ utils._get_container_details_from_fs = mock_get_container_details_from_fs
+ try:
+ retval = utils.get_container_details(the_path)
+ cd = mock_get_container_details_from_fs(the_path)
+ assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
+ finally:
+ utils._get_container_details_from_fs = orig_gcdff
+
+ def test_container_details_cached_hit(self):
+ mc = SimMemcache()
+ the_path = "/tmp/bar"
+ def mock_get_container_details_from_fs(cont_path, bu_p=5):
+ bu = bu_p
+ oc = 1
+ ol = ['foo',]
+ dl = [('a',100),]
+ return utils.ContainerDetails(bu, oc, ol, dl)
+ def mock_do_stat(path):
+ class MockStat(object):
+ def __init__(self, mtime):
+ self.st_mtime = mtime
+ return MockStat(100)
+ cd = mock_get_container_details_from_fs(the_path, bu_p=6)
+ mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path), cd)
+ orig_gcdff = utils._get_container_details_from_fs
+ utils._get_container_details_from_fs = mock_get_container_details_from_fs
+ orig_ds = utils.do_stat
+ utils.do_stat = mock_do_stat
+ try:
+ retval = utils.get_container_details(the_path, memcache=mc)
+ # If it did not properly use memcache, the default mocked version
+ # of get details from fs would return 5 bytes used instead of the
+ # 6 we specified above.
+ cd = mock_get_container_details_from_fs(the_path, bu_p=6)
+ assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
+ finally:
+ utils._get_container_details_from_fs = orig_gcdff
+ utils.do_stat = orig_ds
+
+ def test_container_details_cached_miss_key(self):
+ mc = SimMemcache()
+ the_path = "/tmp/bar"
+ def mock_get_container_details_from_fs(cont_path, bu_p=5):
+ bu = bu_p
+ oc = 1
+ ol = ['foo',]
+ dl = [('a',100),]
+ return utils.ContainerDetails(bu, oc, ol, dl)
+ def mock_do_stat(path):
+ # Be sure we don't miss due to mtimes not matching
+ self.fail("do_stat should not have been called")
+ cd = mock_get_container_details_from_fs(the_path + "u", bu_p=6)
+ mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path + "u"), cd)
+ orig_gcdff = utils._get_container_details_from_fs
+ utils._get_container_details_from_fs = mock_get_container_details_from_fs
+ orig_ds = utils.do_stat
+ utils.do_stat = mock_do_stat
+ try:
+ retval = utils.get_container_details(the_path, memcache=mc)
+ cd = mock_get_container_details_from_fs(the_path)
+ assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
+ mkey = utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path)
+ assert mkey in mc._d
+ finally:
+ utils._get_container_details_from_fs = orig_gcdff
+ utils.do_stat = orig_ds
+
+ def test_container_details_cached_miss_dir_list(self):
+ mc = SimMemcache()
+ the_path = "/tmp/bar"
+ def mock_get_container_details_from_fs(cont_path, bu_p=5):
+ bu = bu_p
+ oc = 1
+ ol = ['foo',]
+ dl = []
+ return utils.ContainerDetails(bu, oc, ol, dl)
+ def mock_do_stat(path):
+ # Be sure we don't miss due to mtimes not matching
+ self.fail("do_stat should not have been called")
+ cd = mock_get_container_details_from_fs(the_path, bu_p=6)
+ mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path), cd)
+ orig_gcdff = utils._get_container_details_from_fs
+ utils._get_container_details_from_fs = mock_get_container_details_from_fs
+ orig_ds = utils.do_stat
+ utils.do_stat = mock_do_stat
+ try:
+ retval = utils.get_container_details(the_path, memcache=mc)
+ cd = mock_get_container_details_from_fs(the_path)
+ assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
+ mkey = utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path)
+ assert mkey in mc._d
+ assert 5 == mc._d[mkey].bytes_used
+ finally:
+ utils._get_container_details_from_fs = orig_gcdff
+ utils.do_stat = orig_ds
+
+ def test_container_details_cached_miss_mtime(self):
+ mc = SimMemcache()
+ the_path = "/tmp/bar"
+ def mock_get_container_details_from_fs(cont_path, bu_p=5):
+ bu = bu_p
+ oc = 1
+ ol = ['foo',]
+ dl = [('a',100),]
+ return utils.ContainerDetails(bu, oc, ol, dl)
+ def mock_do_stat(path):
+ # Be sure we miss due to mtimes not matching
+ class MockStat(object):
+ def __init__(self, mtime):
+ self.st_mtime = mtime
+ return MockStat(200)
+ cd = mock_get_container_details_from_fs(the_path, bu_p=6)
+ mc.set(utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path), cd)
+ orig_gcdff = utils._get_container_details_from_fs
+ utils._get_container_details_from_fs = mock_get_container_details_from_fs
+ orig_ds = utils.do_stat
+ utils.do_stat = mock_do_stat
+ try:
+ retval = utils.get_container_details(the_path, memcache=mc)
+ cd = mock_get_container_details_from_fs(the_path)
+ assert retval == (cd.obj_list, cd.object_count, cd.bytes_used)
+ mkey = utils.MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path)
+ assert mkey in mc._d
+ assert 5 == mc._d[mkey].bytes_used
+ finally:
+ utils._get_container_details_from_fs = orig_gcdff
+ utils.do_stat = orig_ds
+
+ def test_account_details_uncached(self):
+ the_path = "/tmp/bar"
+ def mock_get_account_details_from_fs(acc_path, acc_stats):
+ mt = 100
+ cc = 2
+ cl = ['a', 'b']
+ return utils.AccountDetails(mt, cc, cl)
+ orig_gcdff = utils._get_account_details_from_fs
+ utils._get_account_details_from_fs = mock_get_account_details_from_fs
+ try:
+ retval = utils.get_account_details(the_path)
+ ad = mock_get_account_details_from_fs(the_path, None)
+ assert retval == (ad.container_list, ad.container_count)
+ finally:
+ utils._get_account_details_from_fs = orig_gcdff
+
+ def test_account_details_cached_hit(self):
+ mc = SimMemcache()
+ the_path = "/tmp/bar"
+ def mock_get_account_details_from_fs(acc_path, acc_stats):
+ mt = 100
+ cc = 2
+ cl = ['a', 'b']
+ return utils.AccountDetails(mt, cc, cl)
+ def mock_do_stat(path):
+ class MockStat(object):
+ def __init__(self, mtime):
+ self.st_mtime = mtime
+ return MockStat(100)
+ ad = mock_get_account_details_from_fs(the_path, None)
+ ad.container_list = ['x', 'y']
+ mc.set(utils.MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path), ad)
+ orig_gcdff = utils._get_account_details_from_fs
+ orig_ds = utils.do_stat
+ utils._get_account_details_from_fs = mock_get_account_details_from_fs
+ utils.do_stat = mock_do_stat
+ try:
+ retval = utils.get_account_details(the_path, memcache=mc)
+ assert retval == (ad.container_list, ad.container_count)
+ wrong_ad = mock_get_account_details_from_fs(the_path, None)
+ assert wrong_ad != ad
+ finally:
+ utils._get_account_details_from_fs = orig_gcdff
+ utils.do_stat = orig_ds
+
+ def test_account_details_cached_miss(self):
+ mc = SimMemcache()
+ the_path = "/tmp/bar"
+ def mock_get_account_details_from_fs(acc_path, acc_stats):
+ mt = 100
+ cc = 2
+ cl = ['a', 'b']
+ return utils.AccountDetails(mt, cc, cl)
+ def mock_do_stat(path):
+ class MockStat(object):
+ def __init__(self, mtime):
+ self.st_mtime = mtime
+ return MockStat(100)
+ ad = mock_get_account_details_from_fs(the_path, None)
+ ad.container_list = ['x', 'y']
+ mc.set(utils.MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path + 'u'), ad)
+ orig_gcdff = utils._get_account_details_from_fs
+ orig_ds = utils.do_stat
+ utils._get_account_details_from_fs = mock_get_account_details_from_fs
+ utils.do_stat = mock_do_stat
+ try:
+ retval = utils.get_account_details(the_path, memcache=mc)
+ correct_ad = mock_get_account_details_from_fs(the_path, None)
+ assert retval == (correct_ad.container_list, correct_ad.container_count)
+ assert correct_ad != ad
+ finally:
+ utils._get_account_details_from_fs = orig_gcdff
+ utils.do_stat = orig_ds
+
+ def test_account_details_cached_miss_mtime(self):
+ mc = SimMemcache()
+ the_path = "/tmp/bar"
+ def mock_get_account_details_from_fs(acc_path, acc_stats):
+ mt = 100
+ cc = 2
+ cl = ['a', 'b']
+ return utils.AccountDetails(mt, cc, cl)
+ def mock_do_stat(path):
+ class MockStat(object):
+ def __init__(self, mtime):
+ self.st_mtime = mtime
+ return MockStat(100)
+ ad = mock_get_account_details_from_fs(the_path, None)
+ ad.container_list = ['x', 'y']
+ ad.mtime = 200
+ mc.set(utils.MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + utils.strip_obj_storage_path(the_path), ad)
+ orig_gcdff = utils._get_account_details_from_fs
+ orig_ds = utils.do_stat
+ utils._get_account_details_from_fs = mock_get_account_details_from_fs
+ utils.do_stat = mock_do_stat
+ try:
+ retval = utils.get_account_details(the_path, memcache=mc)
+ correct_ad = mock_get_account_details_from_fs(the_path, None)
+ assert retval == (correct_ad.container_list, correct_ad.container_count)
+ assert correct_ad != ad
+ finally:
+ utils._get_account_details_from_fs = orig_gcdff
+ utils.do_stat = orig_ds
+
+ def test_get_container_details_from_fs(self):
+ td = tempfile.mkdtemp()
+ tf = tarfile.open("plugins/data/account_tree.tar.bz2", "r:bz2")
+ orig_cwd = os.getcwd()
+ os.chdir(td)
+ tf.extractall()
+ try:
+ ad = utils._get_account_details_from_fs(td, None)
+ assert ad.mtime == os.path.getmtime(td)
+ assert ad.container_count == 3
+ assert set(ad.container_list) == set(['c1', 'c2', 'c3'])
+ finally:
+ os.chdir(orig_cwd)
+ shutil.rmtree(td)
+
+ def test_get_container_details_from_fs_notadir(self):
+ tf = tempfile.NamedTemporaryFile()
+ cd = utils._get_container_details_from_fs(tf.name)
+ assert cd.bytes_used == 0
+ assert cd.object_count == 0
+ assert cd.obj_list == []
+ assert cd.dir_list == []
+
+ def test_get_account_details_from_fs(self):
+ td = tempfile.mkdtemp()
+ tf = tarfile.open("plugins/data/container_tree.tar.bz2", "r:bz2")
+ orig_cwd = os.getcwd()
+ os.chdir(td)
+ tf.extractall()
+ try:
+ cd = utils._get_container_details_from_fs(td)
+ assert cd.bytes_used == 30, repr(cd.bytes_used)
+ assert cd.object_count == 8, repr(cd.object_count)
+ assert cd.obj_list == ['file1', 'file3', 'file2',
+ 'dir3', 'dir1', 'dir2',
+ 'dir1/file1', 'dir1/file2'
+ ], repr(cd.obj_list)
+ full_dir1 = os.path.join(td, 'dir1')
+ full_dir2 = os.path.join(td, 'dir2')
+ full_dir3 = os.path.join(td, 'dir3')
+ exp_dir_dict = { td: os.path.getmtime(td),
+ full_dir1: os.path.getmtime(full_dir1),
+ full_dir2: os.path.getmtime(full_dir2),
+ full_dir3: os.path.getmtime(full_dir3),
+ }
+ for d,m in cd.dir_list:
+ assert d in exp_dir_dict
+ assert exp_dir_dict[d] == m
+ finally:
+ os.chdir(orig_cwd)
+ shutil.rmtree(td)
+
+ def test_get_account_details_from_fs_notadir_w_stats(self):
+ tf = tempfile.NamedTemporaryFile()
+ ad = utils._get_account_details_from_fs(tf.name, os.stat(tf.name))
+ assert ad.mtime == os.path.getmtime(tf.name)
+ assert ad.container_count == 0
+ assert ad.container_list == []
+
+ def test_get_account_details_from_fs_notadir(self):
+ tf = tempfile.NamedTemporaryFile()
+ ad = utils._get_account_details_from_fs(tf.name, None)
+ assert ad.mtime == os.path.getmtime(tf.name)
+ assert ad.container_count == 0
+ assert ad.container_list == []