diff options
Diffstat (limited to 'ufo/gluster/swift/common/utils.py')
-rw-r--r-- | ufo/gluster/swift/common/utils.py | 496 |
1 files changed, 0 insertions, 496 deletions
diff --git a/ufo/gluster/swift/common/utils.py b/ufo/gluster/swift/common/utils.py deleted file mode 100644 index f2cd8dea10c..00000000000 --- a/ufo/gluster/swift/common/utils.py +++ /dev/null @@ -1,496 +0,0 @@ -# Copyright (c) 2012 Red Hat, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -# implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import os -import errno -import xattr -import random -from hashlib import md5 -from eventlet import sleep -import cPickle as pickle -from ConfigParser import ConfigParser, NoSectionError, NoOptionError -from swift.common.utils import normalize_timestamp, TRUE_VALUES -from gluster.swift.common.fs_utils import * -from gluster.swift.common import Glusterfs - -X_CONTENT_TYPE = 'Content-Type' -X_CONTENT_LENGTH = 'Content-Length' -X_TIMESTAMP = 'X-Timestamp' -X_PUT_TIMESTAMP = 'X-PUT-Timestamp' -X_TYPE = 'X-Type' -X_ETAG = 'ETag' -X_OBJECTS_COUNT = 'X-Object-Count' -X_BYTES_USED = 'X-Bytes-Used' -X_CONTAINER_COUNT = 'X-Container-Count' -X_OBJECT_TYPE = 'X-Object-Type' -DIR_TYPE = 'application/directory' -ACCOUNT = 'Account' -METADATA_KEY = 'user.swift.metadata' -MAX_XATTR_SIZE = 65536 -CONTAINER = 'container' -DIR = 'dir' -MARKER_DIR = 'marker_dir' -TEMP_DIR = 'tmp' -ASYNCDIR = 'async_pending' # Keep in sync with swift.obj.server.ASYNCDIR -FILE = 'file' -FILE_TYPE = 'application/octet-stream' -OBJECT = 'Object' -OBJECT_TYPE = 'application/octet-stream' -DEFAULT_UID = -1 -DEFAULT_GID = -1 -PICKLE_PROTOCOL = 2 -CHUNK_SIZE = 65536 -MEMCACHE_KEY_PREFIX = 'gluster.swift.' -MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX = MEMCACHE_KEY_PREFIX + 'account.details.' -MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX = MEMCACHE_KEY_PREFIX + 'container.details.' - -def read_metadata(path): - """ - Helper function to read the pickled metadata from a File/Directory. - - :param path: File/Directory to read metadata from. - - :returns: dictionary of metadata - """ - metadata = None - metadata_s = '' - key = 0 - while metadata is None: - try: - metadata_s += xattr.getxattr(path, '%s%s' % (METADATA_KEY, (key or ''))) - except IOError as err: - if err.errno == errno.ENODATA: - if key > 0: - # No errors reading the xattr keys, but since we have not - # been able to find enough chunks to get a successful - # unpickle operation, we consider the metadata lost, and - # drop the existing data so that the internal state can be - # recreated. - clean_metadata(path) - # We either could not find any metadata key, or we could find - # some keys, but were not successful in performing the - # unpickling (missing keys perhaps)? Either way, just report - # to the caller we have no metadata. - metadata = {} - else: - logging.exception("xattr.getxattr failed on %s key %s err: %s", - path, key, str(err)) - # Note that we don't touch the keys on errors fetching the - # data since it could be a transient state. - raise - else: - try: - # If this key provides all or the remaining part of the pickle - # data, we don't need to keep searching for more keys. This - # means if we only need to store data in N xattr key/value - # pair, we only need to invoke xattr get N times. With large - # keys sizes we are shooting for N = 1. - metadata = pickle.loads(metadata_s) - assert isinstance(metadata, dict) - except EOFError, pickle.UnpicklingError: - # We still are not able recognize this existing data collected - # as a pickled object. Make sure we loop around to try to get - # more from another xattr key. - metadata = None - key += 1 - return metadata - -def write_metadata(path, metadata): - """ - Helper function to write pickled metadata for a File/Directory. - - :param path: File/Directory path to write the metadata - :param metadata: dictionary to metadata write - """ - assert isinstance(metadata, dict) - metastr = pickle.dumps(metadata, PICKLE_PROTOCOL) - key = 0 - while metastr: - try: - xattr.setxattr(path, '%s%s' % (METADATA_KEY, key or ''), metastr[:MAX_XATTR_SIZE]) - except IOError as err: - logging.exception("setxattr failed on %s key %s err: %s", path, key, str(err)) - raise - metastr = metastr[MAX_XATTR_SIZE:] - key += 1 - -def clean_metadata(path): - key = 0 - while True: - try: - xattr.removexattr(path, '%s%s' % (METADATA_KEY, (key or ''))) - except IOError as err: - if err.errno == errno.ENODATA: - break - raise - key += 1 - -def check_user_xattr(path): - if not os_path.exists(path): - return False - try: - xattr.setxattr(path, 'user.test.key1', 'value1') - except IOError as err: - logging.exception("check_user_xattr: set failed on %s err: %s", path, str(err)) - raise - try: - xattr.removexattr(path, 'user.test.key1') - except IOError as err: - logging.exception("check_user_xattr: remove failed on %s err: %s", path, str(err)) - #Remove xattr may fail in case of concurrent remove. - return True - -def validate_container(metadata): - if not metadata: - logging.warn('validate_container: No metadata') - return False - - if X_TYPE not in metadata.keys() or \ - X_TIMESTAMP not in metadata.keys() or \ - X_PUT_TIMESTAMP not in metadata.keys() or \ - X_OBJECTS_COUNT not in metadata.keys() or \ - X_BYTES_USED not in metadata.keys(): - #logging.warn('validate_container: Metadata missing entries: %s' % metadata) - return False - - (value, timestamp) = metadata[X_TYPE] - if value == CONTAINER: - return True - - logging.warn('validate_container: metadata type is not CONTAINER (%r)' % (value,)) - return False - -def validate_account(metadata): - if not metadata: - logging.warn('validate_account: No metadata') - return False - - if X_TYPE not in metadata.keys() or \ - X_TIMESTAMP not in metadata.keys() or \ - X_PUT_TIMESTAMP not in metadata.keys() or \ - X_OBJECTS_COUNT not in metadata.keys() or \ - X_BYTES_USED not in metadata.keys() or \ - X_CONTAINER_COUNT not in metadata.keys(): - #logging.warn('validate_account: Metadata missing entries: %s' % metadata) - return False - - (value, timestamp) = metadata[X_TYPE] - if value == ACCOUNT: - return True - - logging.warn('validate_account: metadata type is not ACCOUNT (%r)' % (value,)) - return False - -def validate_object(metadata): - if not metadata: - logging.warn('validate_object: No metadata') - return False - - if X_TIMESTAMP not in metadata.keys() or \ - X_CONTENT_TYPE not in metadata.keys() or \ - X_ETAG not in metadata.keys() or \ - X_CONTENT_LENGTH not in metadata.keys() or \ - X_TYPE not in metadata.keys() or \ - X_OBJECT_TYPE not in metadata.keys(): - #logging.warn('validate_object: Metadata missing entries: %s' % metadata) - return False - - if metadata[X_TYPE] == OBJECT: - return True - - logging.warn('validate_object: metadata type is not OBJECT (%r)' % (metadata[X_TYPE],)) - return False - -def is_marker(metadata): - if not metadata: - logging.warn('is_marker: No metadata') - return False - - if X_OBJECT_TYPE not in metadata.keys(): - logging.warn('is_marker: X_OBJECT_TYPE missing from metadata: %s' % metadata) - return False - - if metadata[X_OBJECT_TYPE] == MARKER_DIR: - return True - else: - return False - -def _update_list(path, cont_path, src_list, reg_file=True, object_count=0, - bytes_used=0, obj_list=[]): - # strip the prefix off, also stripping the leading and trailing slashes - obj_path = path.replace(cont_path, '').strip(os.path.sep) - - for obj_name in src_list: - if obj_path: - obj_list.append(os.path.join(obj_path, obj_name)) - else: - obj_list.append(obj_name) - - object_count += 1 - - if Glusterfs._do_getsize and reg_file: - bytes_used += os_path.getsize(os.path.join(path, obj_name)) - sleep() - - return object_count, bytes_used - -def update_list(path, cont_path, dirs=[], files=[], object_count=0, - bytes_used=0, obj_list=[]): - if files: - object_count, bytes_used = _update_list(path, cont_path, files, True, - object_count, bytes_used, - obj_list) - if dirs: - object_count, bytes_used = _update_list(path, cont_path, dirs, False, - object_count, bytes_used, - obj_list) - return object_count, bytes_used - - -class ContainerDetails(object): - def __init__(self, bytes_used, object_count, obj_list, dir_list): - self.bytes_used = bytes_used - self.object_count = object_count - self.obj_list = obj_list - self.dir_list = dir_list - - -def _get_container_details_from_fs(cont_path): - """ - get container details by traversing the filesystem - """ - bytes_used = 0 - object_count = 0 - obj_list = [] - dir_list = [] - - if os_path.isdir(cont_path): - for (path, dirs, files) in do_walk(cont_path): - object_count, bytes_used = update_list(path, cont_path, dirs, files, - object_count, bytes_used, - obj_list) - - dir_list.append((path, do_stat(path).st_mtime)) - sleep() - - return ContainerDetails(bytes_used, object_count, obj_list, dir_list) - -def get_container_details(cont_path, memcache=None): - """ - Return object_list, object_count and bytes_used. - """ - mkey = '' - if memcache: - mkey = MEMCACHE_CONTAINER_DETAILS_KEY_PREFIX + cont_path - cd = memcache.get(mkey) - if cd: - if not cd.dir_list: - cd = None - else: - for (path, mtime) in cd.dir_list: - if mtime != do_stat(path).st_mtime: - cd = None - else: - cd = None - if not cd: - cd = _get_container_details_from_fs(cont_path) - if memcache: - memcache.set(mkey, cd) - return cd.obj_list, cd.object_count, cd.bytes_used - - -class AccountDetails(object): - """ A simple class to store the three pieces of information associated - with an account: - - 1. The last known modification time - 2. The count of containers in the following list - 3. The list of containers - """ - def __init__(self, mtime, container_count, container_list): - self.mtime = mtime - self.container_count = container_count - self.container_list = container_list - - -def _get_account_details_from_fs(acc_path, acc_stats): - container_list = [] - container_count = 0 - - if not acc_stats: - acc_stats = do_stat(acc_path) - is_dir = (acc_stats.st_mode & 0040000) != 0 - if is_dir: - for name in do_listdir(acc_path): - if name.lower() == TEMP_DIR \ - or name.lower() == ASYNCDIR \ - or not os_path.isdir(os.path.join(acc_path, name)): - continue - container_count += 1 - container_list.append(name) - - return AccountDetails(acc_stats.st_mtime, container_count, container_list) - -def get_account_details(acc_path, memcache=None): - """ - Return container_list and container_count. - """ - acc_stats = None - mkey = '' - if memcache: - mkey = MEMCACHE_ACCOUNT_DETAILS_KEY_PREFIX + acc_path - ad = memcache.get(mkey) - if ad: - # FIXME: Do we really need to stat the file? If we are object - # only, then we can track the other Swift HTTP APIs that would - # modify the account and invalidate the cached entry there. If we - # are not object only, are we even called on this path? - acc_stats = do_stat(acc_path) - if ad.mtime != acc_stats.st_mtime: - ad = None - else: - ad = None - if not ad: - ad = _get_account_details_from_fs(acc_path, acc_stats) - if memcache: - memcache.set(mkey, ad) - return ad.container_list, ad.container_count - -def _get_etag(path): - etag = md5() - with open(path, 'rb') as fp: - while True: - chunk = fp.read(CHUNK_SIZE) - if chunk: - etag.update(chunk) - else: - break - return etag.hexdigest() - -def get_object_metadata(obj_path): - """ - Return metadata of object. - """ - try: - stats = do_stat(obj_path) - except OSError as e: - if e.errno != errno.ENOENT: - raise - metadata = {} - else: - is_dir = (stats.st_mode & 0040000) != 0 - metadata = { - X_TYPE: OBJECT, - X_TIMESTAMP: normalize_timestamp(stats.st_ctime), - X_CONTENT_TYPE: DIR_TYPE if is_dir else FILE_TYPE, - X_OBJECT_TYPE: DIR if is_dir else FILE, - X_CONTENT_LENGTH: 0 if is_dir else stats.st_size, - X_ETAG: md5().hexdigest() if is_dir else _get_etag(obj_path), - } - return metadata - -def _add_timestamp(metadata_i): - # At this point we have a simple key/value dictionary, turn it into - # key/(value,timestamp) pairs. - timestamp = 0 - metadata = {} - for key, value_i in metadata_i.iteritems(): - if not isinstance(value_i, tuple): - metadata[key] = (value_i, timestamp) - else: - metadata[key] = value_i - return metadata - -def get_container_metadata(cont_path, memcache=None): - objects = [] - object_count = 0 - bytes_used = 0 - objects, object_count, bytes_used = get_container_details(cont_path, memcache) - metadata = {X_TYPE: CONTAINER, - X_TIMESTAMP: normalize_timestamp(os_path.getctime(cont_path)), - X_PUT_TIMESTAMP: normalize_timestamp(os_path.getmtime(cont_path)), - X_OBJECTS_COUNT: object_count, - X_BYTES_USED: bytes_used} - return _add_timestamp(metadata) - -def get_account_metadata(acc_path, memcache=None): - containers = [] - container_count = 0 - containers, container_count = get_account_details(acc_path, memcache) - metadata = {X_TYPE: ACCOUNT, - X_TIMESTAMP: normalize_timestamp(os_path.getctime(acc_path)), - X_PUT_TIMESTAMP: normalize_timestamp(os_path.getmtime(acc_path)), - X_OBJECTS_COUNT: 0, - X_BYTES_USED: 0, - X_CONTAINER_COUNT: container_count} - return _add_timestamp(metadata) - -def restore_metadata(path, metadata): - meta_orig = read_metadata(path) - if meta_orig: - meta_new = meta_orig.copy() - meta_new.update(metadata) - else: - meta_new = metadata - if meta_orig != meta_new: - write_metadata(path, meta_new) - return meta_new - -def create_object_metadata(obj_path): - metadata = get_object_metadata(obj_path) - return restore_metadata(obj_path, metadata) - -def create_container_metadata(cont_path, memcache=None): - metadata = get_container_metadata(cont_path, memcache) - return restore_metadata(cont_path, metadata) - -def create_account_metadata(acc_path, memcache=None): - metadata = get_account_metadata(acc_path, memcache) - return restore_metadata(acc_path, metadata) - -def write_pickle(obj, dest, tmp=None, pickle_protocol=0): - """ - Ensure that a pickle file gets written to disk. The file is first written - to a tmp file location in the destination directory path, ensured it is - synced to disk, then moved to its final destination name. - - This version takes advantage of Gluster's dot-prefix-dot-suffix naming - where the a file named ".thefile.name.9a7aasv" is hashed to the same - Gluster node as "thefile.name". This ensures the renaming of a temp file - once written does not move it to another Gluster node. - - :param obj: python object to be pickled - :param dest: path of final destination file - :param tmp: path to tmp to use, defaults to None (ignored) - :param pickle_protocol: protocol to pickle the obj with, defaults to 0 - """ - dirname = os.path.dirname(dest) - basename = os.path.basename(dest) - tmpname = '.' + basename + '.' + md5(basename + str(random.random())).hexdigest() - tmppath = os.path.join(dirname, tmpname) - with open(tmppath, 'wb') as fo: - pickle.dump(obj, fo, pickle_protocol) - # TODO: This flush() method call turns into a flush() system call - # We'll need to wrap this as well, but we would do this by writing - #a context manager for our own open() method which returns an object - # in fo which makes the gluster API call. - fo.flush() - do_fsync(fo) - do_rename(tmppath, dest) - -# Over-ride Swift's utils.write_pickle with ours -import swift.common.utils -swift.common.utils.write_pickle = write_pickle |