diff options
Diffstat (limited to 'gluster/swift/common/DiskFile.py')
-rw-r--r-- | gluster/swift/common/DiskFile.py | 338 |
1 files changed, 338 insertions, 0 deletions
diff --git a/gluster/swift/common/DiskFile.py b/gluster/swift/common/DiskFile.py new file mode 100644 index 0000000..900bd49 --- /dev/null +++ b/gluster/swift/common/DiskFile.py @@ -0,0 +1,338 @@ +# Copyright (c) 2012 Red Hat, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +# implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import errno +import random +from hashlib import md5 +from contextlib import contextmanager +from swift.common.utils import normalize_timestamp, renamer +from swift.common.exceptions import DiskFileNotExist +from gluster.swift.common.exceptions import AlreadyExistsAsDir +from gluster.swift.common.utils import mkdirs, rmdirs, validate_object, \ + create_object_metadata, do_open, do_close, do_unlink, do_chown, \ + do_listdir, read_metadata, write_metadata, os_path, do_fsync +from gluster.swift.common.utils import X_CONTENT_TYPE, X_CONTENT_LENGTH, \ + X_TIMESTAMP, X_PUT_TIMESTAMP, X_TYPE, X_ETAG, X_OBJECTS_COUNT, \ + X_BYTES_USED, X_OBJECT_TYPE, FILE, DIR, MARKER_DIR, OBJECT, DIR_TYPE, \ + FILE_TYPE, DEFAULT_UID, DEFAULT_GID + +import logging +from swift.obj.server import DiskFile + + +DEFAULT_DISK_CHUNK_SIZE = 65536 +# keep these lower-case +DISALLOWED_HEADERS = set('content-length content-type deleted etag'.split()) + + +def _adjust_metadata(metadata): + # Fix up the metadata to ensure it has a proper value for the + # Content-Type metadata, as well as an X_TYPE and X_OBJECT_TYPE + # metadata values. + content_type = metadata['Content-Type'] + if not content_type: + # FIXME: How can this be that our caller supplied us with metadata + # that has a content type that evaluates to False? + # + # FIXME: If the file exists, we would already know it is a + # directory. So why are we assuming it is a file object? + metadata['Content-Type'] = FILE_TYPE + x_object_type = FILE + else: + x_object_type = MARKER_DIR if content_type.lower() == DIR_TYPE else FILE + metadata[X_TYPE] = OBJECT + metadata[X_OBJECT_TYPE] = x_object_type + return metadata + + +class Gluster_DiskFile(DiskFile): + """ + Manage object files on disk. + + :param path: path to devices on the node/mount path for UFO. + :param device: device name/account_name for UFO. + :param partition: partition on the device the object lives in + :param account: account name for the object + :param container: container name for the object + :param obj: object name for the object + :param logger: logger object for writing out log file messages + :param keep_data_fp: if True, don't close the fp, otherwise close it + :param disk_chunk_Size: size of chunks on file reads + :param uid: user ID disk object should assume (file or directory) + :param gid: group ID disk object should assume (file or directory) + """ + + def __init__(self, path, device, partition, account, container, obj, + logger, keep_data_fp=False, + disk_chunk_size=DEFAULT_DISK_CHUNK_SIZE, + uid=DEFAULT_UID, gid=DEFAULT_GID, iter_hook=None): + self.disk_chunk_size = disk_chunk_size + self.iter_hook = iter_hook + # Don't support obj_name ending/begining with '/', like /a, a/, /a/b/, + # etc. + obj = obj.strip(os.path.sep) + if os.path.sep in obj: + self._obj_path, self._obj = os.path.split(obj) + else: + self._obj_path = '' + self._obj = obj + + if self._obj_path: + self.name = os.path.join(container, self._obj_path) + else: + self.name = container + # Absolute path for object directory. + self.datadir = os.path.join(path, device, self.name) + self.device_path = os.path.join(path, device) + self._container_path = os.path.join(path, device, container) + self._is_dir = False + self.tmppath = None + self.logger = logger + self.metadata = {} + self.meta_file = None + self.fp = None + self.iter_etag = None + self.started_at_0 = False + self.read_to_eof = False + self.quarantined_dir = None + self.keep_cache = False + self.uid = int(uid) + self.gid = int(gid) + + # Don't store a value for data_file until we know it exists. + self.data_file = None + data_file = os.path.join(self.datadir, self._obj) + if not os_path.exists(data_file): + return + + self.data_file = os.path.join(data_file) + self.metadata = read_metadata(data_file) + if not self.metadata: + create_object_metadata(data_file) + self.metadata = read_metadata(data_file) + + if not validate_object(self.metadata): + create_object_metadata(data_file) + self.metadata = read_metadata(data_file) + + self.filter_metadata() + + if os_path.isdir(data_file): + self._is_dir = True + else: + if keep_data_fp: + # The caller has an assumption that the "fp" field of this + # object is an file object if keep_data_fp is set. However, + # this implementation of the DiskFile object does not need to + # open the file for internal operations. So if the caller + # requests it, we'll just open the file for them. + self.fp = do_open(data_file, 'rb') + + def close(self, verify_file=True): + """ + Close the file. Will handle quarantining file if necessary. + + :param verify_file: Defaults to True. If false, will not check + file to see if it needs quarantining. + """ + #Marker directory + if self._is_dir: + return + if self.fp: + do_close(self.fp) + self.fp = None + + def is_deleted(self): + """ + Check if the file is deleted. + + :returns: True if the file doesn't exist or has been flagged as + deleted. + """ + return not self.data_file + + def _create_dir_object(self, dir_path): + #TODO: if object already exists??? + if os_path.exists(dir_path) and not os_path.isdir(dir_path): + self.logger.error("Deleting file %s", dir_path) + do_unlink(dir_path) + #If dir aleady exist just override metadata. + mkdirs(dir_path) + do_chown(dir_path, self.uid, self.gid) + create_object_metadata(dir_path) + + def put_metadata(self, metadata, tombstone=False): + """ + Short hand for putting metadata to .meta and .ts files. + + :param metadata: dictionary of metadata to be written + :param tombstone: whether or not we are writing a tombstone + """ + if tombstone: + # We don't write tombstone files. So do nothing. + return + assert self.data_file is not None, "put_metadata: no file to put metadata into" + metadata = _adjust_metadata(metadata) + write_metadata(self.data_file, metadata) + self.metadata = metadata + self.filter_metadata() + + def put(self, fd, metadata, extension='.data'): + """ + Finalize writing the file on disk, and renames it from the temp file to + the real location. This should be called after the data has been + written to the temp file. + + :param fd: file descriptor of the temp file + :param metadata: dictionary of metadata to be written + :param extension: extension to be used when making the file + """ + # Our caller will use '.data' here; we just ignore it since we map the + # URL directly to the file system. + extension = '' + + metadata = _adjust_metadata(metadata) + + if metadata[X_OBJECT_TYPE] == MARKER_DIR: + if not self.data_file: + self.data_file = os.path.join(self.datadir, self._obj) + self._create_dir_object(self.data_file) + self.put_metadata(metadata) + return + + # Check if directory already exists. + if self._is_dir: + # FIXME: How can we have a directory and it not be marked as a + # MARKER_DIR (see above)? + msg = 'File object exists as a directory: %s' % self.data_file + raise AlreadyExistsAsDir(msg) + + timestamp = normalize_timestamp(metadata[X_TIMESTAMP]) + write_metadata(self.tmppath, metadata) + if X_CONTENT_LENGTH in metadata: + self.drop_cache(fd, 0, int(metadata[X_CONTENT_LENGTH])) + do_fsync(fd) + if self._obj_path: + dir_objs = self._obj_path.split('/') + assert len(dir_objs) >= 1 + tmp_path = self._container_path + for dir_name in dir_objs: + tmp_path = os.path.join(tmp_path, dir_name) + self._create_dir_object(tmp_path) + + newpath = os.path.join(self.datadir, self._obj) + renamer(self.tmppath, newpath) + do_chown(newpath, self.uid, self.gid) + self.metadata = metadata + self.data_file = newpath + self.filter_metadata() + return + + def unlinkold(self, timestamp): + """ + Remove any older versions of the object file. Any file that has an + older timestamp than timestamp will be deleted. + + :param timestamp: timestamp to compare with each file + """ + if not self.metadata or self.metadata['X-Timestamp'] >= timestamp: + return + + assert self.data_file, \ + "Have metadata, %r, but no data_file" % self.metadata + + if self._is_dir: + # Marker directory object + if not rmdirs(self.data_file): + logging.error('Unable to delete dir object: %s', self.data_file) + return + else: + # File object + do_unlink(self.data_file) + + self.metadata = {} + self.data_file = None + + def get_data_file_size(self): + """ + Returns the os_path.getsize for the file. Raises an exception if this + file does not match the Content-Length stored in the metadata. Or if + self.data_file does not exist. + + :returns: file size as an int + :raises DiskFileError: on file size mismatch. + :raises DiskFileNotExist: on file not existing (including deleted) + """ + #Marker directory. + if self._is_dir: + return 0 + try: + file_size = 0 + if self.data_file: + file_size = os_path.getsize(self.data_file) + if X_CONTENT_LENGTH in self.metadata: + metadata_size = int(self.metadata[X_CONTENT_LENGTH]) + if file_size != metadata_size: + self.metadata[X_CONTENT_LENGTH] = file_size + write_metadata(self.data_file, self.metadata) + + return file_size + except OSError as err: + if err.errno != errno.ENOENT: + raise + raise DiskFileNotExist('Data File does not exist.') + + def filter_metadata(self): + if X_TYPE in self.metadata: + self.metadata.pop(X_TYPE) + if X_OBJECT_TYPE in self.metadata: + self.metadata.pop(X_OBJECT_TYPE) + + @contextmanager + def mkstemp(self): + """Contextmanager to make a temporary file.""" + + # Creating intermidiate directories and corresponding metadata. + # For optimization, check if the subdirectory already exists, + # if exists, then it means that it also has its metadata. + # Not checking for container, since the container should already + # exist for the call to come here. + if not os_path.exists(self.datadir): + path = self._container_path + subdir_list = self._obj_path.split(os.path.sep) + for i in range(len(subdir_list)): + path = os.path.join(path, subdir_list[i]); + if not os_path.exists(path): + self._create_dir_object(path) + + tmpfile = '.' + self._obj + '.' + md5(self._obj + \ + str(random.random())).hexdigest() + + self.tmppath = os.path.join(self.datadir, tmpfile) + fd = do_open(self.tmppath, os.O_RDWR | os.O_CREAT | os.O_EXCL) + try: + yield fd + finally: + try: + do_close(fd) + except OSError: + pass + tmppath, self.tmppath = self.tmppath, None + try: + do_unlink(tmppath) + except OSError as err: + if err.errno != errno.ENOENT: + raise |