From 2014cdb9066e273cf791f38b1c8247427c76cfa9 Mon Sep 17 00:00:00 2001 From: Prashanth Pai Date: Tue, 28 Jan 2014 12:13:33 +0530 Subject: Add support for Object Expiration feature Preventing access to expired objects ------------------------------------ Re-enabled accepting X-Delete-At and X-Delete-After headers. During a GET on an expired object, DiskFileExpired is raised by DiskFile class. This will result in object-server returning HTTPNotFound (404) to the client. Tracking objects to be deleted ------------------------------ Objects to be deleted are tracked using "tracker objects". These are PUT into a special account(a volume, for now). These zero size "tracker objects" have names that contain: * Expiration timestamp * Path of the actual object to be deleted Deleting actual objects from GlusterFS volume --------------------------------------------- The object-expirer daemon runs a pass once every X seconds. For every pass it makes, it queries the special account for "tracker objects". Based on (timestamp, path) present in name of "tracker objects", object-expirer then deletes the actual object and the corresponding tracker object. To run object-expirer forever: swift-init object-expirer start To run just once: swift-object-expirer -o -v /etc/swift/object-expirer.conf Caveat/Limitation: Object-expirer needs a separate account(volume) that is not used by other services like gswauth. By default, this volume is named "gsexpiring" and is configurable. More info about object expiration: http://docs.openstack.org/developer/swift/overview_expiring_objects.html Change-Id: I876995bf4f16ef4bfdff901561e0558ecf1dc38f Signed-off-by: Prashanth Pai Reviewed-on: http://review.gluster.org/6891 Tested-by: Chetan Risbud Reviewed-by: pushpesh sharma Tested-by: pushpesh sharma Reviewed-by: Chetan Risbud --- gluster/swift/common/constraints.py | 2 +- gluster/swift/obj/diskfile.py | 22 +++++++- gluster/swift/obj/server.py | 102 ++++++++++++++++++++++++++++++++---- 3 files changed, 113 insertions(+), 13 deletions(-) (limited to 'gluster/swift') diff --git a/gluster/swift/common/constraints.py b/gluster/swift/common/constraints.py index 7681c49..80616f2 100644 --- a/gluster/swift/common/constraints.py +++ b/gluster/swift/common/constraints.py @@ -23,7 +23,7 @@ import swift.common.ring as _ring from gluster.swift.common import Glusterfs, ring MAX_OBJECT_NAME_COMPONENT_LENGTH = 255 -UNSUPPORTED_HEADERS = ['x-delete-at', 'x-delete-after'] +UNSUPPORTED_HEADERS = [] def set_object_name_component_length(len=None): diff --git a/gluster/swift/obj/diskfile.py b/gluster/swift/obj/diskfile.py index 852f69f..b3e91bc 100644 --- a/gluster/swift/obj/diskfile.py +++ b/gluster/swift/obj/diskfile.py @@ -23,6 +23,7 @@ try: except ImportError: import random import logging +import time from collections import defaultdict from socket import gethostname from hashlib import md5 @@ -33,7 +34,8 @@ from gluster.swift.common.exceptions import AlreadyExistsAsFile, \ AlreadyExistsAsDir from swift.common.utils import TRUE_VALUES, ThreadPool, config_true_value from swift.common.exceptions import DiskFileNotExist, DiskFileError, \ - DiskFileNoSpace, DiskFileDeviceUnavailable, DiskFileNotOpen + DiskFileNoSpace, DiskFileDeviceUnavailable, DiskFileNotOpen, \ + DiskFileExpired from swift.common.swob import multi_range_iterator from gluster.swift.common.exceptions import GlusterFileSystemOSError @@ -701,6 +703,7 @@ class DiskFile(object): the object representation does not exist. :raises DiskFileNotExist: if the object does not exist + :raises DiskFileExpired: if the object has expired :returns: itself for use as a context manager """ # Writes are always performed to a temporary file @@ -731,11 +734,28 @@ class DiskFile(object): obj_size = 0 self._fd = -1 else: + if self._is_object_expired(self._metadata): + raise DiskFileExpired(metadata=self._metadata) self._fd = fd self._obj_size = obj_size return self + def _is_object_expired(self, metadata): + try: + x_delete_at = int(metadata['X-Delete-At']) + except KeyError: + pass + except ValueError: + # x-delete-at key is present but not an integer. + # TODO: Openstack Swift "quarrantines" the object. + # We just let it pass + pass + else: + if x_delete_at <= time.time(): + return True + return False + def _filter_metadata(self): if X_TYPE in self._metadata: self._metadata.pop(X_TYPE) diff --git a/gluster/swift/obj/server.py b/gluster/swift/obj/server.py index 3cdd3c0..8bc080a 100644 --- a/gluster/swift/obj/server.py +++ b/gluster/swift/obj/server.py @@ -28,6 +28,14 @@ from swift.obj import server from gluster.swift.obj.diskfile import OnDiskManager +import os +from swift.common.exceptions import ConnectionTimeout +from swift.common.bufferedhttp import http_connect +from eventlet import Timeout +from swift.common.http import is_success +from gluster.swift.common.ring import Ring +from swift import gettext_ as _ + class ObjectController(server.ObjectController): """ @@ -44,16 +52,10 @@ class ObjectController(server.ObjectController): :param conf: WSGI configuration parameter """ - # FIXME: Gluster currently does not support x-delete-at, as there is - # no mechanism in GlusterFS itself to expire an object, or an external - # process that will cull expired objects. - try: - self.allowed_headers.remove('x-delete-at') - except KeyError: - pass # Common on-disk hierarchy shared across account, container and object # servers. self._ondisk_mgr = OnDiskManager(conf, self.logger) + self.swift_dir = conf.get('swift_dir', '/etc/swift') def get_diskfile(self, device, partition, account, container, obj, **kwargs): @@ -77,12 +79,90 @@ class ObjectController(server.ObjectController): """ return - def delete_at_update(self, *args, **kwargs): - """ - Update the expiring objects container when objects are updated. + def get_object_ring(self): + if hasattr(self, 'object_ring'): + if not self.object_ring: + self.object_ring = Ring(self.swift_dir, ring_name='object') + else: + self.object_ring = Ring(self.swift_dir, ring_name='object') + return self.object_ring - FIXME: Gluster currently does not support delete_at headers. + def async_update(self, op, account, container, obj, host, partition, + contdevice, headers_out, objdevice): """ + In Openstack Swift, this method is called by: + * container_update (a no-op in gluster-swift) + * delete_at_update (to PUT objects into .expiring_objects account) + + The Swift's version of async_update only sends the request to + container-server to PUT the object. The container-server calls + container_update method which makes an entry for the object in it's + database. No actual object is created on disk. + + But in gluster-swift container_update is a no-op, so we'll + have to PUT an actual object. We override async_update to create a + container first and then the corresponding "tracker object" which + tracks expired objects scheduled for deletion. + """ + + headers_out['user-agent'] = 'obj-server %s' % os.getpid() + if all([host, partition, contdevice]): + # PUT the container. Send request directly to container-server + container_path = '/%s/%s' % (account, container) + try: + with ConnectionTimeout(self.conn_timeout): + ip, port = host.rsplit(':', 1) + conn = http_connect(ip, port, contdevice, partition, op, + container_path, headers_out) + with Timeout(self.node_timeout): + response = conn.getresponse() + response.read() + if not is_success(response.status): + self.logger.error(_( + 'async_update : ' + 'ERROR Container update failed :%(status)d ' + 'response from %(ip)s:%(port)s/%(dev)s'), + {'status': response.status, 'ip': ip, 'port': port, + 'dev': contdevice}) + return + except (Exception, Timeout): + self.logger.exception(_( + 'async_update : ' + 'ERROR Container update failed :%(ip)s:%(port)s/%(dev)s'), + {'ip': ip, 'port': port, 'dev': contdevice}) + + # PUT the tracker object. Send request directly to object-server + object_path = '/%s/%s/%s' % (account, container, obj) + headers_out['Content-Length'] = 0 + headers_out['Content-Type'] = 'text/plain' + try: + with ConnectionTimeout(self.conn_timeout): + # FIXME: Assuming that get_nodes returns single node + part, nodes = self.get_object_ring().get_nodes(account, + container, + obj) + ip = nodes[0]['ip'] + port = nodes[0]['port'] + objdevice = nodes[0]['device'] + conn = http_connect(ip, port, objdevice, partition, op, + object_path, headers_out) + with Timeout(self.node_timeout): + response = conn.getresponse() + response.read() + if is_success(response.status): + return + else: + self.logger.error(_( + 'async_update : ' + 'ERROR Object PUT failed : %(status)d ' + 'response from %(ip)s:%(port)s/%(dev)s'), + {'status': response.status, 'ip': ip, 'port': port, + 'dev': objdevice}) + except (Exception, Timeout): + self.logger.exception(_( + 'async_update : ' + 'ERROR Object PUT failed :%(ip)s:%(port)s/%(dev)s'), + {'ip': ip, 'port': port, 'dev': objdevice}) return @public -- cgit