diff options
Diffstat (limited to 'xlators/features/qemu-block')
| -rw-r--r-- | xlators/features/qemu-block/Makefile.am | 1 | ||||
| -rw-r--r-- | xlators/features/qemu-block/src/Makefile.am | 155 | ||||
| -rw-r--r-- | xlators/features/qemu-block/src/bdrv-xlator.c | 397 | ||||
| -rw-r--r-- | xlators/features/qemu-block/src/bh-syncop.c | 48 | ||||
| -rw-r--r-- | xlators/features/qemu-block/src/clock-timer.c | 60 | ||||
| -rw-r--r-- | xlators/features/qemu-block/src/coroutine-synctask.c | 116 | ||||
| -rw-r--r-- | xlators/features/qemu-block/src/monitor-logging.c | 50 | ||||
| -rw-r--r-- | xlators/features/qemu-block/src/qb-coroutines.c | 662 | ||||
| -rw-r--r-- | xlators/features/qemu-block/src/qb-coroutines.h | 30 | ||||
| -rw-r--r-- | xlators/features/qemu-block/src/qemu-block-memory-types.h | 25 | ||||
| -rw-r--r-- | xlators/features/qemu-block/src/qemu-block.c | 1140 | ||||
| -rw-r--r-- | xlators/features/qemu-block/src/qemu-block.h | 109 |
12 files changed, 2793 insertions, 0 deletions
diff --git a/xlators/features/qemu-block/Makefile.am b/xlators/features/qemu-block/Makefile.am new file mode 100644 index 000000000..af437a64d --- /dev/null +++ b/xlators/features/qemu-block/Makefile.am @@ -0,0 +1 @@ +SUBDIRS = src diff --git a/xlators/features/qemu-block/src/Makefile.am b/xlators/features/qemu-block/src/Makefile.am new file mode 100644 index 000000000..08a7b62a0 --- /dev/null +++ b/xlators/features/qemu-block/src/Makefile.am @@ -0,0 +1,155 @@ +if ENABLE_QEMU_BLOCK +xlator_LTLIBRARIES = qemu-block.la +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features + +qemu_block_la_LDFLAGS = -module -avoid-version +qemu_block_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GLIB_LIBS) -lz -lrt + +qemu_block_la_SOURCES_qemu = \ + $(CONTRIBDIR)/qemu/qemu-coroutine.c \ + $(CONTRIBDIR)/qemu/qemu-coroutine-lock.c \ + $(CONTRIBDIR)/qemu/qemu-coroutine-sleep.c \ + $(CONTRIBDIR)/qemu/coroutine-ucontext.c \ + $(CONTRIBDIR)/qemu/block.c \ + $(CONTRIBDIR)/qemu/nop-symbols.c + +qemu_block_la_SOURCES_qemu_util = \ + $(CONTRIBDIR)/qemu/util/aes.c \ + $(CONTRIBDIR)/qemu/util/bitmap.c \ + $(CONTRIBDIR)/qemu/util/bitops.c \ + $(CONTRIBDIR)/qemu/util/cutils.c \ + $(CONTRIBDIR)/qemu/util/error.c \ + $(CONTRIBDIR)/qemu/util/hbitmap.c \ + $(CONTRIBDIR)/qemu/util/iov.c \ + $(CONTRIBDIR)/qemu/util/module.c \ + $(CONTRIBDIR)/qemu/util/oslib-posix.c \ + $(CONTRIBDIR)/qemu/util/qemu-option.c \ + $(CONTRIBDIR)/qemu/util/qemu-error.c \ + $(CONTRIBDIR)/qemu/util/qemu-thread-posix.c \ + $(CONTRIBDIR)/qemu/util/unicode.c \ + $(CONTRIBDIR)/qemu/util/hexdump.c + +qemu_block_la_SOURCES_qemu_block = \ + $(CONTRIBDIR)/qemu/block/snapshot.c \ + $(CONTRIBDIR)/qemu/block/qcow2-cache.c \ + $(CONTRIBDIR)/qemu/block/qcow2-cluster.c \ + $(CONTRIBDIR)/qemu/block/qcow2-refcount.c \ + $(CONTRIBDIR)/qemu/block/qcow2-snapshot.c \ + $(CONTRIBDIR)/qemu/block/qcow2.c \ + $(CONTRIBDIR)/qemu/block/qed-check.c \ + $(CONTRIBDIR)/qemu/block/qed-cluster.c \ + $(CONTRIBDIR)/qemu/block/qed-gencb.c \ + $(CONTRIBDIR)/qemu/block/qed-l2-cache.c \ + $(CONTRIBDIR)/qemu/block/qed-table.c \ + $(CONTRIBDIR)/qemu/block/qed.c + +qemu_block_la_SOURCES_qemu_qobject = \ + $(CONTRIBDIR)/qemu/qobject/json-lexer.c \ + $(CONTRIBDIR)/qemu/qobject/json-parser.c \ + $(CONTRIBDIR)/qemu/qobject/json-streamer.c \ + $(CONTRIBDIR)/qemu/qobject/qbool.c \ + $(CONTRIBDIR)/qemu/qobject/qdict.c \ + $(CONTRIBDIR)/qemu/qobject/qerror.c \ + $(CONTRIBDIR)/qemu/qobject/qfloat.c \ + $(CONTRIBDIR)/qemu/qobject/qint.c \ + $(CONTRIBDIR)/qemu/qobject/qjson.c \ + $(CONTRIBDIR)/qemu/qobject/qlist.c \ + $(CONTRIBDIR)/qemu/qobject/qstring.c + +qemu_block_la_SOURCES = \ + $(qemu_block_la_SOURCES_qemu) \ + $(qemu_block_la_SOURCES_qemu_util) \ + $(qemu_block_la_SOURCES_qemu_block) \ + $(qemu_block_la_SOURCES_qemu_qobject) \ + bdrv-xlator.c \ + coroutine-synctask.c \ + bh-syncop.c \ + monitor-logging.c \ + clock-timer.c \ + qemu-block.c \ + qb-coroutines.c + +noinst_HEADERS_qemu = \ + $(CONTRIBDIR)/qemu/config-host.h \ + $(CONTRIBDIR)/qemu/qapi-types.h \ + $(CONTRIBDIR)/qemu/qmp-commands.h \ + $(CONTRIBDIR)/qemu/trace/generated-tracers.h \ + $(CONTRIBDIR)/qemu/include/config.h \ + $(CONTRIBDIR)/qemu/include/glib-compat.h \ + $(CONTRIBDIR)/qemu/include/qemu-common.h \ + $(CONTRIBDIR)/qemu/include/trace.h \ + $(CONTRIBDIR)/qemu/include/block/coroutine.h \ + $(CONTRIBDIR)/qemu/include/block/aio.h \ + $(CONTRIBDIR)/qemu/include/block/block.h \ + $(CONTRIBDIR)/qemu/include/block/block_int.h \ + $(CONTRIBDIR)/qemu/include/block/blockjob.h \ + $(CONTRIBDIR)/qemu/include/block/coroutine.h \ + $(CONTRIBDIR)/qemu/include/block/coroutine_int.h \ + $(CONTRIBDIR)/qemu/include/block/snapshot.h \ + $(CONTRIBDIR)/qemu/include/exec/cpu-common.h \ + $(CONTRIBDIR)/qemu/include/exec/hwaddr.h \ + $(CONTRIBDIR)/qemu/include/exec/poison.h \ + $(CONTRIBDIR)/qemu/include/fpu/softfloat.h \ + $(CONTRIBDIR)/qemu/include/migration/migration.h \ + $(CONTRIBDIR)/qemu/include/migration/qemu-file.h \ + $(CONTRIBDIR)/qemu/include/migration/vmstate.h \ + $(CONTRIBDIR)/qemu/include/monitor/monitor.h \ + $(CONTRIBDIR)/qemu/include/monitor/readline.h \ + $(CONTRIBDIR)/qemu/include/qapi/error.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/json-lexer.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/json-parser.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/json-streamer.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qbool.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qdict.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qerror.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qfloat.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qint.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qjson.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qlist.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qobject.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/qstring.h \ + $(CONTRIBDIR)/qemu/include/qapi/qmp/types.h \ + $(CONTRIBDIR)/qemu/include/qemu/aes.h \ + $(CONTRIBDIR)/qemu/include/qemu/atomic.h \ + $(CONTRIBDIR)/qemu/include/qemu/bitmap.h \ + $(CONTRIBDIR)/qemu/include/qemu/bitops.h \ + $(CONTRIBDIR)/qemu/include/qemu/bswap.h \ + $(CONTRIBDIR)/qemu/include/qemu/compiler.h \ + $(CONTRIBDIR)/qemu/include/qemu/error-report.h \ + $(CONTRIBDIR)/qemu/include/qemu/event_notifier.h \ + $(CONTRIBDIR)/qemu/include/qemu/hbitmap.h \ + $(CONTRIBDIR)/qemu/include/qemu/host-utils.h \ + $(CONTRIBDIR)/qemu/include/qemu/iov.h \ + $(CONTRIBDIR)/qemu/include/qemu/main-loop.h \ + $(CONTRIBDIR)/qemu/include/qemu/module.h \ + $(CONTRIBDIR)/qemu/include/qemu/notify.h \ + $(CONTRIBDIR)/qemu/include/qemu/option.h \ + $(CONTRIBDIR)/qemu/include/qemu/option_int.h \ + $(CONTRIBDIR)/qemu/include/qemu/osdep.h \ + $(CONTRIBDIR)/qemu/include/qemu/queue.h \ + $(CONTRIBDIR)/qemu/include/qemu/sockets.h \ + $(CONTRIBDIR)/qemu/include/qemu/thread-posix.h \ + $(CONTRIBDIR)/qemu/include/qemu/thread.h \ + $(CONTRIBDIR)/qemu/include/qemu/timer.h \ + $(CONTRIBDIR)/qemu/include/qemu/typedefs.h \ + $(CONTRIBDIR)/qemu/include/sysemu/sysemu.h \ + $(CONTRIBDIR)/qemu/include/sysemu/os-posix.h \ + $(CONTRIBDIR)/qemu/block/qcow2.h \ + $(CONTRIBDIR)/qemu/block/qed.h + +noinst_HEADERS = \ + $(noinst_HEADERS_qemu) \ + qemu-block.h \ + qemu-block-memory-types.h \ + qb-coroutines.h + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(CONTRIBDIR)/qemu \ + -I$(CONTRIBDIR)/qemu/include \ + -DGLUSTER_XLATOR + +AM_CFLAGS = -fno-strict-aliasing -Wall $(GF_CFLAGS) $(GLIB_CFLAGS) + +CLEANFILES = + +endif diff --git a/xlators/features/qemu-block/src/bdrv-xlator.c b/xlators/features/qemu-block/src/bdrv-xlator.c new file mode 100644 index 000000000..106c59775 --- /dev/null +++ b/xlators/features/qemu-block/src/bdrv-xlator.c @@ -0,0 +1,397 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "inode.h" +#include "syncop.h" +#include "qemu-block.h" +#include "block/block_int.h" + +typedef struct BDRVGlusterState { + inode_t *inode; +} BDRVGlusterState; + +static QemuOptsList runtime_opts = { + .name = "gluster", + .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head), + .desc = { + { + .name = "filename", + .type = QEMU_OPT_STRING, + .help = "GFID of file", + }, + { /* end of list */ } + }, +}; + +inode_t * +qb_inode_from_filename (const char *filename) +{ + const char *iptr = NULL; + inode_t *inode = NULL; + + iptr = filename + 17; + sscanf (iptr, "%p", &inode); + + return inode; +} + + +int +qb_inode_to_filename (inode_t *inode, char *filename, int size) +{ + return snprintf (filename, size, "gluster://inodep:%p", inode); +} + + +static fd_t * +fd_from_bs (BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + + return fd_anonymous (s->inode); +} + + +static int +qemu_gluster_open (BlockDriverState *bs, QDict *options, int bdrv_flags) +{ + inode_t *inode = NULL; + BDRVGlusterState *s = bs->opaque; + QemuOpts *opts = NULL; + Error *local_err = NULL; + const char *filename = NULL; + char gfid_str[128]; + int ret; + qb_conf_t *conf = THIS->private; + + opts = qemu_opts_create_nofail(&runtime_opts); + qemu_opts_absorb_qdict(opts, options, &local_err); + if (error_is_set(&local_err)) { + qerror_report_err(local_err); + error_free(local_err); + return -EINVAL; + } + + filename = qemu_opt_get(opts, "filename"); + + /* + * gfid:<gfid> format means we're opening a backing image. + */ + ret = sscanf(filename, "gluster://gfid:%s", gfid_str); + if (ret) { + loc_t loc = {0,}; + struct iatt buf = {0,}; + uuid_t gfid; + + uuid_parse(gfid_str, gfid); + + loc.inode = inode_find(conf->root_inode->table, gfid); + if (!loc.inode) { + loc.inode = inode_new(conf->root_inode->table); + uuid_copy(loc.inode->gfid, gfid); + } + + uuid_copy(loc.gfid, loc.inode->gfid); + ret = syncop_lookup(FIRST_CHILD(THIS), &loc, NULL, &buf, NULL, + NULL); + if (ret) { + loc_wipe(&loc); + return -errno; + } + + s->inode = inode_ref(loc.inode); + loc_wipe(&loc); + } else { + inode = qb_inode_from_filename (filename); + if (!inode) + return -EINVAL; + + s->inode = inode_ref(inode); + } + + return 0; +} + + +static int +qemu_gluster_create (const char *filename, QEMUOptionParameter *options) +{ + uint64_t total_size = 0; + inode_t *inode = NULL; + fd_t *fd = NULL; + struct iatt stat = {0, }; + int ret = 0; + + inode = qb_inode_from_filename (filename); + if (!inode) + return -EINVAL; + + while (options && options->name) { + if (!strcmp(options->name, BLOCK_OPT_SIZE)) { + total_size = options->value.n / BDRV_SECTOR_SIZE; + } + options++; + } + + fd = fd_anonymous (inode); + if (!fd) + return -ENOMEM; + + ret = syncop_fstat (FIRST_CHILD(THIS), fd, &stat); + if (ret) { + fd_unref (fd); + return -errno; + } + + if (stat.ia_size) { + /* format ONLY if the filesize is 0 bytes */ + fd_unref (fd); + return -EFBIG; + } + + if (total_size) { + ret = syncop_ftruncate (FIRST_CHILD(THIS), fd, total_size); + if (ret) { + fd_unref (fd); + return -errno; + } + } + + fd_unref (fd); + return 0; +} + + +static int +qemu_gluster_co_readv (BlockDriverState *bs, int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov) +{ + fd_t *fd = NULL; + off_t offset = 0; + size_t size = 0; + struct iovec *iov = NULL; + int count = 0; + struct iobref *iobref = NULL; + int ret = 0; + + fd = fd_from_bs (bs); + if (!fd) + return -EIO; + + offset = sector_num * BDRV_SECTOR_SIZE; + size = nb_sectors * BDRV_SECTOR_SIZE; + + ret = syncop_readv (FIRST_CHILD(THIS), fd, size, offset, 0, + &iov, &count, &iobref); + if (ret < 0) { + ret = -errno; + goto out; + } + + iov_copy (qiov->iov, qiov->niov, iov, count); /* *choke!* */ + +out: + GF_FREE (iov); + if (iobref) + iobref_unref (iobref); + fd_unref (fd); + return ret; +} + + +static int +qemu_gluster_co_writev (BlockDriverState *bs, int64_t sector_num, int nb_sectors, + QEMUIOVector *qiov) +{ + fd_t *fd = NULL; + off_t offset = 0; + size_t size = 0; + struct iobref *iobref = NULL; + struct iobuf *iobuf = NULL; + struct iovec iov = {0, }; + int ret = -ENOMEM; + + fd = fd_from_bs (bs); + if (!fd) + return -EIO; + + offset = sector_num * BDRV_SECTOR_SIZE; + size = nb_sectors * BDRV_SECTOR_SIZE; + + iobuf = iobuf_get2 (THIS->ctx->iobuf_pool, size); + if (!iobuf) + goto out; + + iobref = iobref_new (); + if (!iobref) { + iobuf_unref (iobuf); + goto out; + } + + iobref_add (iobref, iobuf); + + iov_unload (iobuf_ptr (iobuf), qiov->iov, qiov->niov); /* *choke!* */ + + iov.iov_base = iobuf_ptr (iobuf); + iov.iov_len = size; + + ret = syncop_writev (FIRST_CHILD(THIS), fd, &iov, 1, offset, iobref, 0); + if (ret < 0) + ret = -errno; + +out: + if (iobuf) + iobuf_unref (iobuf); + if (iobref) + iobref_unref (iobref); + fd_unref (fd); + return ret; +} + + +static int +qemu_gluster_co_flush (BlockDriverState *bs) +{ + fd_t *fd = NULL; + int ret = 0; + + fd = fd_from_bs (bs); + + ret = syncop_flush (FIRST_CHILD(THIS), fd); + + fd_unref (fd); + + return ret; +} + + +static int +qemu_gluster_co_fsync (BlockDriverState *bs) +{ + fd_t *fd = NULL; + int ret = 0; + + fd = fd_from_bs (bs); + + ret = syncop_fsync (FIRST_CHILD(THIS), fd, 0); + + fd_unref (fd); + + return ret; +} + + +static int +qemu_gluster_truncate (BlockDriverState *bs, int64_t offset) +{ + fd_t *fd = NULL; + int ret = 0; + + fd = fd_from_bs (bs); + + ret = syncop_ftruncate (FIRST_CHILD(THIS), fd, offset); + + fd_unref (fd); + + if (ret < 0) + return ret; + + return ret; +} + + +static int64_t +qemu_gluster_getlength (BlockDriverState *bs) +{ + fd_t *fd = NULL; + int ret = 0; + struct iatt iatt = {0, }; + + fd = fd_from_bs (bs); + + ret = syncop_fstat (FIRST_CHILD(THIS), fd, &iatt); + if (ret < 0) + return -1; + + return iatt.ia_size; +} + + +static int64_t +qemu_gluster_allocated_file_size (BlockDriverState *bs) +{ + fd_t *fd = NULL; + int ret = 0; + struct iatt iatt = {0, }; + + fd = fd_from_bs (bs); + + ret = syncop_fstat (FIRST_CHILD(THIS), fd, &iatt); + if (ret < 0) + return -1; + + return iatt.ia_blocks * 512; +} + + +static void +qemu_gluster_close (BlockDriverState *bs) +{ + BDRVGlusterState *s = NULL; + + s = bs->opaque; + + inode_unref (s->inode); + + return; +} + + +static QEMUOptionParameter qemu_gluster_create_options[] = { + { + .name = BLOCK_OPT_SIZE, + .type = OPT_SIZE, + .help = "Virtual disk size" + }, + { NULL } +}; + + +static BlockDriver bdrv_gluster = { + .format_name = "gluster", + .protocol_name = "gluster", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_co_readv = qemu_gluster_co_readv, + .bdrv_co_writev = qemu_gluster_co_writev, + .bdrv_co_flush_to_os = qemu_gluster_co_flush, + .bdrv_co_flush_to_disk = qemu_gluster_co_fsync, + .bdrv_truncate = qemu_gluster_truncate, + .create_options = qemu_gluster_create_options, +}; + + +static void bdrv_gluster_init(void) +{ + bdrv_register(&bdrv_gluster); +} + + +block_init(bdrv_gluster_init); diff --git a/xlators/features/qemu-block/src/bh-syncop.c b/xlators/features/qemu-block/src/bh-syncop.c new file mode 100644 index 000000000..e8686f6d4 --- /dev/null +++ b/xlators/features/qemu-block/src/bh-syncop.c @@ -0,0 +1,48 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "syncop.h" +#include "qemu-block-memory-types.h" + +#include "block/aio.h" + +void +qemu_bh_schedule (QEMUBH *bh) +{ + return; +} + +void +qemu_bh_cancel (QEMUBH *bh) +{ + return; +} + +void +qemu_bh_delete (QEMUBH *bh) +{ + +} + +QEMUBH * +qemu_bh_new (QEMUBHFunc *cb, void *opaque) +{ + return NULL; +} diff --git a/xlators/features/qemu-block/src/clock-timer.c b/xlators/features/qemu-block/src/clock-timer.c new file mode 100644 index 000000000..fcbec6ad1 --- /dev/null +++ b/xlators/features/qemu-block/src/clock-timer.c @@ -0,0 +1,60 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "syncop.h" +#include "qemu-block-memory-types.h" + +#include "qemu/timer.h" + +QEMUClock *vm_clock; +int use_rt_clock = 0; + +QEMUTimer *qemu_new_timer (QEMUClock *clock, int scale, + QEMUTimerCB *cb, void *opaque) +{ + return NULL; +} + +int64_t qemu_get_clock_ns (QEMUClock *clock) +{ + return 0; +} + +void qemu_mod_timer (QEMUTimer *ts, int64_t expire_time) +{ + return; +} + +void qemu_free_timer (QEMUTimer *ts) +{ + +} + +void qemu_del_timer (QEMUTimer *ts) +{ + +} + +bool qemu_aio_wait() +{ + synctask_wake (synctask_get()); + synctask_yield (synctask_get()); + return 0; +} diff --git a/xlators/features/qemu-block/src/coroutine-synctask.c b/xlators/features/qemu-block/src/coroutine-synctask.c new file mode 100644 index 000000000..e43988a95 --- /dev/null +++ b/xlators/features/qemu-block/src/coroutine-synctask.c @@ -0,0 +1,116 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "syncop.h" +#include "qemu-block-memory-types.h" + +#include "qemu-block.h" + +/* + * This code serves as the bridge from the main glusterfs context to the qemu + * coroutine context via synctask. We create a single threaded syncenv with a + * single synctask responsible for processing a queue of coroutines. The qemu + * code invoked from within the synctask function handlers uses the ucontext + * coroutine implementation and scheduling logic internal to qemu. This + * effectively donates a thread of execution to qemu and its internal coroutine + * management. + * + * NOTE: The existence of concurrent synctasks has proven quite racy with regard + * to qemu coroutine management, particularly related to the lifecycle + * differences with top-level synctasks and internally created coroutines and + * interactions with qemu-internal queues (and locks, in turn). We explicitly + * disallow this scenario, via the queue, until it is more well supported. + */ + +static struct { + struct list_head queue; + gf_lock_t lock; + struct synctask *task; +} qb_co; + +static void +init_qbco() +{ + INIT_LIST_HEAD(&qb_co.queue); + LOCK_INIT(&qb_co.lock); +} + +static int +synctask_nop_cbk (int ret, call_frame_t *frame, void *opaque) +{ + return 0; +} + +static int +qb_synctask_wrap (void *opaque) +{ + qb_local_t *qb_local, *tmp; + + LOCK(&qb_co.lock); + + while (!list_empty(&qb_co.queue)) { + list_for_each_entry_safe(qb_local, tmp, &qb_co.queue, list) { + list_del_init(&qb_local->list); + break; + } + + UNLOCK(&qb_co.lock); + + qb_local->synctask_fn(qb_local); + /* qb_local is now unwound and gone! */ + + LOCK(&qb_co.lock); + } + + qb_co.task = NULL; + + UNLOCK(&qb_co.lock); + + return 0; +} + +int +qb_coroutine (call_frame_t *frame, synctask_fn_t fn) +{ + qb_local_t *qb_local = NULL; + qb_conf_t *qb_conf = NULL; + static int init = 0; + + qb_local = frame->local; + qb_local->synctask_fn = fn; + qb_conf = frame->this->private; + + if (!init) { + init = 1; + init_qbco(); + } + + LOCK(&qb_co.lock); + + if (!qb_co.task) + qb_co.task = synctask_create(qb_conf->env, qb_synctask_wrap, + synctask_nop_cbk, frame, NULL); + + list_add_tail(&qb_local->list, &qb_co.queue); + + UNLOCK(&qb_co.lock); + + return 0; +} diff --git a/xlators/features/qemu-block/src/monitor-logging.c b/xlators/features/qemu-block/src/monitor-logging.c new file mode 100644 index 000000000..d37c37f0f --- /dev/null +++ b/xlators/features/qemu-block/src/monitor-logging.c @@ -0,0 +1,50 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "qemu-block-memory-types.h" + +#include "block/block_int.h" + +Monitor *cur_mon; + +int +monitor_cur_is_qmp() +{ + /* No QMP support here */ + return 0; +} + +void +monitor_set_error (Monitor *mon, QError *qerror) +{ + /* NOP here */ + return; +} + + +void +monitor_vprintf(Monitor *mon, const char *fmt, va_list ap) +{ + char buf[4096]; + + vsnprintf(buf, sizeof(buf), fmt, ap); + + gf_log (THIS->name, GF_LOG_ERROR, "%s", buf); +} diff --git a/xlators/features/qemu-block/src/qb-coroutines.c b/xlators/features/qemu-block/src/qb-coroutines.c new file mode 100644 index 000000000..7c52adb21 --- /dev/null +++ b/xlators/features/qemu-block/src/qb-coroutines.c @@ -0,0 +1,662 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "inode.h" +#include "call-stub.h" +#include "defaults.h" +#include "qemu-block-memory-types.h" +#include "qemu-block.h" +#include "qb-coroutines.h" + + +int +qb_format_and_resume (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + char filename[64]; + char base_filename[128]; + int use_base = 0; + qb_inode_t *qb_inode = NULL; + Error *local_err = NULL; + fd_t *fd = NULL; + dict_t *xattr = NULL; + qb_conf_t *qb_conf = NULL; + int ret = -1; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + qb_conf = frame->this->private; + + qb_inode_to_filename (inode, filename, 64); + + qb_inode = qb_inode_ctx_get (frame->this, inode); + + /* + * See if the caller specified a backing image. + */ + if (!uuid_is_null(qb_inode->backing_gfid) || qb_inode->backing_fname) { + loc_t loc = {0,}; + char gfid_str[64]; + struct iatt buf; + + if (!uuid_is_null(qb_inode->backing_gfid)) { + loc.inode = inode_find(qb_conf->root_inode->table, + qb_inode->backing_gfid); + if (!loc.inode) { + loc.inode = inode_new(qb_conf->root_inode->table); + uuid_copy(loc.inode->gfid, + qb_inode->backing_gfid); + } + uuid_copy(loc.gfid, loc.inode->gfid); + } else if (qb_inode->backing_fname) { + loc.inode = inode_new(qb_conf->root_inode->table); + loc.name = qb_inode->backing_fname; + loc.parent = inode_parent(inode, NULL, NULL); + loc_path(&loc, loc.name); + } + + /* + * Lookup the backing image. Verify existence and/or get the + * gfid if we don't already have it. + */ + ret = syncop_lookup(FIRST_CHILD(frame->this), &loc, NULL, &buf, + NULL, NULL); + GF_FREE(qb_inode->backing_fname); + if (ret) { + loc_wipe(&loc); + ret = errno; + goto err; + } + + uuid_copy(qb_inode->backing_gfid, buf.ia_gfid); + loc_wipe(&loc); + + /* + * We pass the filename of the backing image into the qemu block + * subsystem as the associated gfid. This is embedded into the + * clone image and passed along to the gluster bdrv backend when + * the block subsystem needs to operate on the backing image on + * behalf of the clone. + */ + uuid_unparse(qb_inode->backing_gfid, gfid_str); + snprintf(base_filename, sizeof(base_filename), + "gluster://gfid:%s", gfid_str); + use_base = 1; + } + + bdrv_img_create (filename, qb_inode->fmt, + use_base ? base_filename : NULL, 0, 0, qb_inode->size, + 0, &local_err, true); + + if (error_is_set (&local_err)) { + gf_log (frame->this->name, GF_LOG_ERROR, "%s", + error_get_pretty (local_err)); + error_free (local_err); + QB_STUB_UNWIND (stub, -1, EIO); + return 0; + } + + fd = fd_anonymous (inode); + if (!fd) { + gf_log (frame->this->name, GF_LOG_ERROR, + "could not create anonymous fd for %s", + uuid_utoa (inode->gfid)); + QB_STUB_UNWIND (stub, -1, ENOMEM); + return 0; + } + + xattr = dict_new (); + if (!xattr) { + gf_log (frame->this->name, GF_LOG_ERROR, + "could not allocate xattr dict for %s", + uuid_utoa (inode->gfid)); + QB_STUB_UNWIND (stub, -1, ENOMEM); + fd_unref (fd); + return 0; + } + + ret = dict_set_str (xattr, qb_conf->qb_xattr_key, local->fmt); + if (ret) { + gf_log (frame->this->name, GF_LOG_ERROR, + "could not dict_set for %s", + uuid_utoa (inode->gfid)); + QB_STUB_UNWIND (stub, -1, ENOMEM); + fd_unref (fd); + dict_unref (xattr); + return 0; + } + + ret = syncop_fsetxattr (FIRST_CHILD(THIS), fd, xattr, 0); + if (ret) { + ret = errno; + gf_log (frame->this->name, GF_LOG_ERROR, + "failed to setxattr for %s", + uuid_utoa (inode->gfid)); + QB_STUB_UNWIND (stub, -1, ret); + fd_unref (fd); + dict_unref (xattr); + return 0; + } + + fd_unref (fd); + dict_unref (xattr); + + QB_STUB_UNWIND (stub, 0, 0); + + return 0; + +err: + QB_STUB_UNWIND(stub, -1, ret); + return 0; +} + + +static BlockDriverState * +qb_bs_create (inode_t *inode, const char *fmt) +{ + char filename[64]; + BlockDriverState *bs = NULL; + BlockDriver *drv = NULL; + int op_errno = 0; + int ret = 0; + + bs = bdrv_new (uuid_utoa (inode->gfid)); + if (!bs) { + op_errno = ENOMEM; + gf_log (THIS->name, GF_LOG_ERROR, + "could not allocate @bdrv for gfid:%s", + uuid_utoa (inode->gfid)); + goto err; + } + + drv = bdrv_find_format (fmt); + if (!drv) { + op_errno = EINVAL; + gf_log (THIS->name, GF_LOG_ERROR, + "Unknown file format: %s for gfid:%s", + fmt, uuid_utoa (inode->gfid)); + goto err; + } + + qb_inode_to_filename (inode, filename, 64); + + ret = bdrv_open (bs, filename, NULL, BDRV_O_RDWR, drv); + if (ret < 0) { + op_errno = -ret; + gf_log (THIS->name, GF_LOG_ERROR, + "Unable to bdrv_open() gfid:%s (%s)", + uuid_utoa (inode->gfid), strerror (op_errno)); + goto err; + } + + return bs; +err: + errno = op_errno; + return NULL; +} + + +int +qb_co_open (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (frame->this, inode); + if (!qb_inode->bs) { + /* FIXME: we need locks around this when + enabling multithreaded syncop/coroutine + for qemu-block + */ + + qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); + if (!qb_inode->bs) { + QB_STUB_UNWIND (stub, -1, errno); + return 0; + } + } + qb_inode->refcnt++; + + QB_STUB_RESUME (stub); + + return 0; +} + + +int +qb_co_writev (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + QEMUIOVector qiov = {0, }; + int ret = 0; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (frame->this, inode); + if (!qb_inode->bs) { + /* FIXME: we need locks around this when + enabling multithreaded syncop/coroutine + for qemu-block + */ + + qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); + if (!qb_inode->bs) { + QB_STUB_UNWIND (stub, -1, errno); + return 0; + } + } + + qemu_iovec_init_external (&qiov, stub->args.vector, stub->args.count); + + ret = bdrv_pwritev (qb_inode->bs, stub->args.offset, &qiov); + + if (ret < 0) { + QB_STUB_UNWIND (stub, -1, -ret); + } else { + QB_STUB_UNWIND (stub, ret, 0); + } + + return 0; +} + + +int +qb_co_readv (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + struct iobuf *iobuf = NULL; + struct iobref *iobref = NULL; + struct iovec iov = {0, }; + int ret = 0; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (frame->this, inode); + if (!qb_inode->bs) { + /* FIXME: we need locks around this when + enabling multithreaded syncop/coroutine + for qemu-block + */ + + qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); + if (!qb_inode->bs) { + QB_STUB_UNWIND (stub, -1, errno); + return 0; + } + } + + if (stub->args.offset >= qb_inode->size) { + QB_STUB_UNWIND (stub, 0, 0); + return 0; + } + + iobuf = iobuf_get2 (frame->this->ctx->iobuf_pool, stub->args.size); + if (!iobuf) { + QB_STUB_UNWIND (stub, -1, ENOMEM); + return 0; + } + + iobref = iobref_new (); + if (!iobref) { + QB_STUB_UNWIND (stub, -1, ENOMEM); + iobuf_unref (iobuf); + return 0; + } + + if (iobref_add (iobref, iobuf) < 0) { + iobuf_unref (iobuf); + iobref_unref (iobref); + QB_STUB_UNWIND (stub, -1, ENOMEM); + return 0; + } + + ret = bdrv_pread (qb_inode->bs, stub->args.offset, iobuf_ptr (iobuf), + stub->args.size); + + if (ret < 0) { + QB_STUB_UNWIND (stub, -1, -ret); + iobref_unref (iobref); + return 0; + } + + iov.iov_base = iobuf_ptr (iobuf); + iov.iov_len = ret; + + stub->args_cbk.vector = iov_dup (&iov, 1); + stub->args_cbk.count = 1; + stub->args_cbk.iobref = iobref; + + QB_STUB_UNWIND (stub, ret, 0); + + return 0; +} + + +int +qb_co_fsync (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + int ret = 0; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (frame->this, inode); + if (!qb_inode->bs) { + /* FIXME: we need locks around this when + enabling multithreaded syncop/coroutine + for qemu-block + */ + + qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); + if (!qb_inode->bs) { + QB_STUB_UNWIND (stub, -1, errno); + return 0; + } + } + + ret = bdrv_flush (qb_inode->bs); + + if (ret < 0) { + QB_STUB_UNWIND (stub, -1, -ret); + } else { + QB_STUB_UNWIND (stub, ret, 0); + } + + return 0; +} + + +static void +qb_update_size_xattr (xlator_t *this, fd_t *fd, const char *fmt, off_t offset) +{ + char val[QB_XATTR_VAL_MAX]; + qb_conf_t *qb_conf = NULL; + dict_t *xattr = NULL; + + qb_conf = this->private; + + snprintf (val, QB_XATTR_VAL_MAX, "%s:%llu", + fmt, (long long unsigned) offset); + + xattr = dict_new (); + if (!xattr) + return; + + if (dict_set_str (xattr, qb_conf->qb_xattr_key, val) != 0) { + dict_unref (xattr); + return; + } + + syncop_fsetxattr (FIRST_CHILD(this), fd, xattr, 0); + dict_unref (xattr); +} + + +int +qb_co_truncate (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + int ret = 0; + off_t offset = 0; + xlator_t *this = NULL; + + this = THIS; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (frame->this, inode); + if (!qb_inode->bs) { + /* FIXME: we need locks around this when + enabling multithreaded syncop/coroutine + for qemu-block + */ + + qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); + if (!qb_inode->bs) { + QB_STUB_UNWIND (stub, -1, errno); + return 0; + } + } + + syncop_fstat (FIRST_CHILD(this), local->fd, &stub->args_cbk.prestat); + stub->args_cbk.prestat.ia_size = qb_inode->size; + + ret = bdrv_truncate (qb_inode->bs, stub->args.offset); + if (ret < 0) + goto out; + + offset = bdrv_getlength (qb_inode->bs); + + qb_inode->size = offset; + + syncop_fstat (FIRST_CHILD(this), local->fd, &stub->args_cbk.poststat); + stub->args_cbk.poststat.ia_size = qb_inode->size; + + qb_update_size_xattr (this, local->fd, qb_inode->fmt, qb_inode->size); + +out: + if (ret < 0) { + QB_STUB_UNWIND (stub, -1, -ret); + } else { + QB_STUB_UNWIND (stub, ret, 0); + } + + return 0; +} + + +int +qb_co_close (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + BlockDriverState *bs = NULL; + + local = opaque; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (THIS, inode); + + if (!--qb_inode->refcnt) { + bs = qb_inode->bs; + qb_inode->bs = NULL; + bdrv_delete (bs); + } + + frame = local->frame; + frame->local = NULL; + qb_local_free (THIS, local); + STACK_DESTROY (frame->root); + + return 0; +} + + +int +qb_snapshot_create (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + QEMUSnapshotInfo sn; + struct timeval tv = {0, }; + int ret = 0; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (frame->this, inode); + if (!qb_inode->bs) { + /* FIXME: we need locks around this when + enabling multithreaded syncop/coroutine + for qemu-block + */ + + qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); + if (!qb_inode->bs) { + QB_STUB_UNWIND (stub, -1, errno); + return 0; + } + } + + memset (&sn, 0, sizeof (sn)); + pstrcpy (sn.name, sizeof(sn.name), local->name); + gettimeofday (&tv, NULL); + sn.date_sec = tv.tv_sec; + sn.date_nsec = tv.tv_usec * 1000; + + ret = bdrv_snapshot_create (qb_inode->bs, &sn); + if (ret < 0) { + QB_STUB_UNWIND (stub, -1, -ret); + } else { + QB_STUB_UNWIND (stub, ret, 0); + } + + return 0; +} + + +int +qb_snapshot_delete (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + int ret = 0; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (frame->this, inode); + if (!qb_inode->bs) { + /* FIXME: we need locks around this when + enabling multithreaded syncop/coroutine + for qemu-block + */ + + qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); + if (!qb_inode->bs) { + QB_STUB_UNWIND (stub, -1, errno); + return 0; + } + } + + ret = bdrv_snapshot_delete (qb_inode->bs, local->name); + + if (ret < 0) { + QB_STUB_UNWIND (stub, -1, -ret); + } else { + QB_STUB_UNWIND (stub, ret, 0); + } + + return 0; +} + + +int +qb_snapshot_goto (void *opaque) +{ + qb_local_t *local = NULL; + call_frame_t *frame = NULL; + call_stub_t *stub = NULL; + inode_t *inode = NULL; + qb_inode_t *qb_inode = NULL; + int ret = 0; + + local = opaque; + frame = local->frame; + stub = local->stub; + inode = local->inode; + + qb_inode = qb_inode_ctx_get (frame->this, inode); + if (!qb_inode->bs) { + /* FIXME: we need locks around this when + enabling multithreaded syncop/coroutine + for qemu-block + */ + + qb_inode->bs = qb_bs_create (inode, qb_inode->fmt); + if (!qb_inode->bs) { + QB_STUB_UNWIND (stub, -1, errno); + return 0; + } + } + + ret = bdrv_snapshot_goto (qb_inode->bs, local->name); + + if (ret < 0) { + QB_STUB_UNWIND (stub, -1, -ret); + } else { + QB_STUB_UNWIND (stub, ret, 0); + } + + return 0; +} diff --git a/xlators/features/qemu-block/src/qb-coroutines.h b/xlators/features/qemu-block/src/qb-coroutines.h new file mode 100644 index 000000000..583319f3b --- /dev/null +++ b/xlators/features/qemu-block/src/qb-coroutines.h @@ -0,0 +1,30 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __QB_COROUTINES_H +#define __QB_COROUTINES_H + +#include "syncop.h" +#include "call-stub.h" +#include "block/block_int.h" +#include "monitor/monitor.h" + +int qb_format_and_resume (void *opaque); +int qb_snapshot_create (void *opaque); +int qb_snapshot_delete (void *opaque); +int qb_snapshot_goto (void *opaque); +int qb_co_open (void *opaque); +int qb_co_close (void *opaque); +int qb_co_writev (void *opaque); +int qb_co_readv (void *opaque); +int qb_co_fsync (void *opaque); +int qb_co_truncate (void *opaque); + +#endif /* __QB_COROUTINES_H */ diff --git a/xlators/features/qemu-block/src/qemu-block-memory-types.h b/xlators/features/qemu-block/src/qemu-block-memory-types.h new file mode 100644 index 000000000..267b3893f --- /dev/null +++ b/xlators/features/qemu-block/src/qemu-block-memory-types.h @@ -0,0 +1,25 @@ +/* + Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef __QB_MEM_TYPES_H__ +#define __QB_MEM_TYPES_H__ + +#include "mem-types.h" + +enum gf_qb_mem_types_ { + gf_qb_mt_qb_conf_t = gf_common_mt_end + 1, + gf_qb_mt_qb_inode_t, + gf_qb_mt_qb_local_t, + gf_qb_mt_coroutinesynctask_t, + gf_qb_mt_end +}; +#endif + diff --git a/xlators/features/qemu-block/src/qemu-block.c b/xlators/features/qemu-block/src/qemu-block.c new file mode 100644 index 000000000..48bbf3140 --- /dev/null +++ b/xlators/features/qemu-block/src/qemu-block.c @@ -0,0 +1,1140 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "logging.h" +#include "dict.h" +#include "xlator.h" +#include "inode.h" +#include "call-stub.h" +#include "defaults.h" +#include "qemu-block-memory-types.h" +#include "qemu-block.h" +#include "qb-coroutines.h" + + +qb_inode_t * +__qb_inode_ctx_get (xlator_t *this, inode_t *inode) +{ + uint64_t value = 0; + qb_inode_t *qb_inode = NULL; + + __inode_ctx_get (inode, this, &value); + qb_inode = (qb_inode_t *)(unsigned long) value; + + return qb_inode; +} + + +qb_inode_t * +qb_inode_ctx_get (xlator_t *this, inode_t *inode) +{ + qb_inode_t *qb_inode = NULL; + + LOCK (&inode->lock); + { + qb_inode = __qb_inode_ctx_get (this, inode); + } + UNLOCK (&inode->lock); + + return qb_inode; +} + + +qb_inode_t * +qb_inode_ctx_del (xlator_t *this, inode_t *inode) +{ + uint64_t value = 0; + qb_inode_t *qb_inode = NULL; + + inode_ctx_del (inode, this, &value); + qb_inode = (qb_inode_t *)(unsigned long) value; + + return qb_inode; +} + + +int +qb_inode_cleanup (xlator_t *this, inode_t *inode, int warn) +{ + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_del (this, inode); + + if (!qb_inode) + return 0; + + if (warn) + gf_log (this->name, GF_LOG_WARNING, + "inode %s no longer block formatted", + uuid_utoa (inode->gfid)); + + /* free (qb_inode->bs); */ + + GF_FREE (qb_inode); + + return 0; +} + + +int +qb_iatt_fixup (xlator_t *this, inode_t *inode, struct iatt *iatt) +{ + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_get (this, inode); + if (!qb_inode) + return 0; + + iatt->ia_size = qb_inode->size; + + return 0; +} + + +int +qb_format_extract (xlator_t *this, char *format, inode_t *inode) +{ + char *s, *save; + uint64_t size = 0; + char fmt[QB_XATTR_VAL_MAX+1] = {0, }; + qb_inode_t *qb_inode = NULL; + char *formatstr = NULL; + uuid_t gfid = {0,}; + char gfid_str[64] = {0,}; + int ret; + + strncpy(fmt, format, QB_XATTR_VAL_MAX); + + s = strtok_r(fmt, ":", &save); + if (!s) + goto invalid; + formatstr = gf_strdup(s); + + s = strtok_r(NULL, ":", &save); + if (!s) + goto invalid; + if (gf_string2bytesize (s, &size)) + goto invalid; + if (!size) + goto invalid; + + s = strtok_r(NULL, "\0", &save); + if (s && !strncmp(s, "<gfid:", strlen("<gfid:"))) { + /* + * Check for valid gfid backing image specifier. + */ + if (strlen(s) + 1 > sizeof(gfid_str)) + goto invalid; + ret = sscanf(s, "<gfid:%[^>]s", gfid_str); + if (ret == 1) { + ret = uuid_parse(gfid_str, gfid); + if (ret < 0) + goto invalid; + } + } + + qb_inode = qb_inode_ctx_get (this, inode); + if (!qb_inode) + qb_inode = GF_CALLOC (1, sizeof (*qb_inode), + gf_qb_mt_qb_inode_t); + if (!qb_inode) { + GF_FREE(formatstr); + return ENOMEM; + } + + strncpy(qb_inode->fmt, formatstr, QB_XATTR_VAL_MAX); + qb_inode->size = size; + + /* + * If a backing gfid was not specified, interpret any remaining bytes + * associated with a backing image as a filename local to the parent + * directory. The format processing will validate further. + */ + if (!uuid_is_null(gfid)) + uuid_copy(qb_inode->backing_gfid, gfid); + else if (s) + qb_inode->backing_fname = gf_strdup(s); + + inode_ctx_set (inode, this, (void *)&qb_inode); + + GF_FREE(formatstr); + + return 0; + +invalid: + GF_FREE(formatstr); + + gf_log (this->name, GF_LOG_WARNING, + "invalid format '%s' in inode %s", format, + uuid_utoa (inode->gfid)); + return EINVAL; +} + + +void +qb_local_free (xlator_t *this, qb_local_t *local) +{ + if (local->inode) + inode_unref (local->inode); + if (local->fd) + fd_unref (local->fd); + GF_FREE (local); +} + + +int +qb_local_init (call_frame_t *frame) +{ + qb_local_t *qb_local = NULL; + + qb_local = GF_CALLOC (1, sizeof (*qb_local), gf_qb_mt_qb_local_t); + if (!qb_local) + return -1; + INIT_LIST_HEAD(&qb_local->list); + + qb_local->frame = frame; + frame->local = qb_local; + + return 0; +} + + +int +qb_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, inode_t *inode, struct iatt *buf, + dict_t *xdata, struct iatt *postparent) +{ + char *format = NULL; + qb_conf_t *conf = NULL; + + conf = this->private; + + if (op_ret == -1) + goto out; + + /* + * Cache the root inode for dealing with backing images. The format + * coroutine and the gluster qemu backend driver both use the root inode + * table to verify and/or redirect I/O to the backing image via + * anonymous fd's. + */ + if (!conf->root_inode && __is_root_gfid(inode->gfid)) + conf->root_inode = inode_ref(inode); + + if (!xdata) + goto out; + + if (dict_get_str (xdata, conf->qb_xattr_key, &format)) + goto out; + + if (!format) { + qb_inode_cleanup (this, inode, 1); + goto out; + } + + op_errno = qb_format_extract (this, format, inode); + if (op_errno) + op_ret = -1; + + qb_iatt_fixup (this, inode, buf); +out: + QB_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf, + xdata, postparent); + return 0; +} + + +int +qb_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + qb_conf_t *conf = NULL; + + conf = this->private; + + xdata = xdata ? dict_ref (xdata) : dict_new (); + + if (!xdata) + goto enomem; + + if (dict_set_int32 (xdata, conf->qb_xattr_key, 0)) + goto enomem; + + STACK_WIND (frame, qb_lookup_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lookup, loc, xdata); + dict_unref (xdata); + return 0; +enomem: + QB_STACK_UNWIND (lookup, frame, -1, ENOMEM, 0, 0, 0, 0); + if (xdata) + dict_unref (xdata); + return 0; +} + + +int +qb_setxattr_format (call_frame_t *frame, xlator_t *this, call_stub_t *stub, + dict_t *xattr, inode_t *inode) +{ + char *format = NULL; + int op_errno = 0; + qb_local_t *qb_local = NULL; + data_t *data = NULL; + qb_inode_t *qb_inode; + + if (!(data = dict_get (xattr, "trusted.glusterfs.block-format"))) { + QB_STUB_RESUME (stub); + return 0; + } + + format = alloca (data->len + 1); + memcpy (format, data->data, data->len); + format[data->len] = 0; + + op_errno = qb_format_extract (this, format, inode); + if (op_errno) { + QB_STUB_UNWIND (stub, -1, op_errno); + return 0; + } + qb_inode = qb_inode_ctx_get(this, inode); + + qb_local = frame->local; + + qb_local->stub = stub; + qb_local->inode = inode_ref (inode); + + snprintf(qb_local->fmt, QB_XATTR_VAL_MAX, "%s:%lu", qb_inode->fmt, + qb_inode->size); + + qb_coroutine (frame, qb_format_and_resume); + + return 0; +} + + +int +qb_setxattr_snapshot_create (call_frame_t *frame, xlator_t *this, + call_stub_t *stub, dict_t *xattr, inode_t *inode) +{ + qb_local_t *qb_local = NULL; + char *name = NULL; + data_t *data = NULL; + + if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-create"))) { + QB_STUB_RESUME (stub); + return 0; + } + + name = alloca (data->len + 1); + memcpy (name, data->data, data->len); + name[data->len] = 0; + + qb_local = frame->local; + + qb_local->stub = stub; + qb_local->inode = inode_ref (inode); + strncpy (qb_local->name, name, 128); + + qb_coroutine (frame, qb_snapshot_create); + + return 0; +} + + +int +qb_setxattr_snapshot_delete (call_frame_t *frame, xlator_t *this, + call_stub_t *stub, dict_t *xattr, inode_t *inode) +{ + qb_local_t *qb_local = NULL; + char *name = NULL; + data_t *data = NULL; + + if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-delete"))) { + QB_STUB_RESUME (stub); + return 0; + } + + name = alloca (data->len + 1); + memcpy (name, data->data, data->len); + name[data->len] = 0; + + qb_local = frame->local; + + qb_local->stub = stub; + qb_local->inode = inode_ref (inode); + strncpy (qb_local->name, name, 128); + + qb_coroutine (frame, qb_snapshot_delete); + + return 0; +} + +int +qb_setxattr_snapshot_goto (call_frame_t *frame, xlator_t *this, + call_stub_t *stub, dict_t *xattr, inode_t *inode) +{ + qb_local_t *qb_local = NULL; + char *name = NULL; + data_t *data = NULL; + + if (!(data = dict_get (xattr, "trusted.glusterfs.block-snapshot-goto"))) { + QB_STUB_RESUME (stub); + return 0; + } + + name = alloca (data->len + 1); + memcpy (name, data->data, data->len); + name[data->len] = 0; + + qb_local = frame->local; + + qb_local->stub = stub; + qb_local->inode = inode_ref (inode); + strncpy (qb_local->name, name, 128); + + qb_coroutine (frame, qb_snapshot_goto); + + return 0; +} + + +int +qb_setxattr_common (call_frame_t *frame, xlator_t *this, call_stub_t *stub, + dict_t *xattr, inode_t *inode) +{ + data_t *data = NULL; + + if ((data = dict_get (xattr, "trusted.glusterfs.block-format"))) { + qb_setxattr_format (frame, this, stub, xattr, inode); + return 0; + } + + if ((data = dict_get (xattr, "trusted.glusterfs.block-snapshot-create"))) { + qb_setxattr_snapshot_create (frame, this, stub, xattr, inode); + return 0; + } + + if ((data = dict_get (xattr, "trusted.glusterfs.block-snapshot-delete"))) { + qb_setxattr_snapshot_delete (frame, this, stub, xattr, inode); + return 0; + } + + if ((data = dict_get (xattr, "trusted.glusterfs.block-snapshot-goto"))) { + qb_setxattr_snapshot_goto (frame, this, stub, xattr, inode); + return 0; + } + + QB_STUB_RESUME (stub); + + return 0; +} + + +int +qb_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr, + int flags, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + if (qb_local_init (frame) != 0) + goto enomem; + + stub = fop_setxattr_stub (frame, default_setxattr_resume, loc, xattr, + flags, xdata); + if (!stub) + goto enomem; + + qb_setxattr_common (frame, this, stub, xattr, loc->inode); + + return 0; +enomem: + QB_STACK_UNWIND (setxattr, frame, -1, ENOMEM, 0); + return 0; +} + + +int +qb_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr, + int flags, dict_t *xdata) +{ + call_stub_t *stub = NULL; + + if (qb_local_init (frame) != 0) + goto enomem; + + stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume, fd, xattr, + flags, xdata); + if (!stub) + goto enomem; + + qb_setxattr_common (frame, this, stub, xattr, fd->inode); + + return 0; +enomem: + QB_STACK_UNWIND (fsetxattr, frame, -1, ENOMEM, 0); + return 0; +} + + +int +qb_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, fd_t *fd, dict_t *xdata) +{ + call_stub_t *stub = NULL; + qb_local_t *qb_local = NULL; + + qb_local = frame->local; + + if (op_ret < 0) + goto unwind; + + if (!qb_inode_ctx_get (this, qb_local->inode)) + goto unwind; + + stub = fop_open_cbk_stub (frame, NULL, op_ret, op_errno, fd, xdata); + if (!stub) { + op_ret = -1; + op_errno = ENOMEM; + goto unwind; + } + + qb_local->stub = stub; + + qb_coroutine (frame, qb_co_open); + + return 0; +unwind: + QB_STACK_UNWIND (open, frame, op_ret, op_errno, fd, xdata); + return 0; +} + + +int +qb_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, + fd_t *fd, dict_t *xdata) +{ + qb_local_t *qb_local = NULL; + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_get (this, loc->inode); + if (!qb_inode) { + STACK_WIND (frame, default_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, + xdata); + return 0; + } + + if (qb_local_init (frame) != 0) + goto enomem; + + qb_local = frame->local; + + qb_local->inode = inode_ref (loc->inode); + qb_local->fd = fd_ref (fd); + + STACK_WIND (frame, qb_open_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata); + return 0; +enomem: + QB_STACK_UNWIND (open, frame, -1, ENOMEM, 0, 0); + return 0; +} + + +int +qb_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector, + int count, off_t offset, uint32_t flags, struct iobref *iobref, + dict_t *xdata) +{ + qb_local_t *qb_local = NULL; + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_get (this, fd->inode); + if (!qb_inode) { + STACK_WIND (frame, default_writev_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->writev, fd, vector, count, + offset, flags, iobref, xdata); + return 0; + } + + if (qb_local_init (frame) != 0) + goto enomem; + + qb_local = frame->local; + + qb_local->inode = inode_ref (fd->inode); + qb_local->fd = fd_ref (fd); + + qb_local->stub = fop_writev_stub (frame, NULL, fd, vector, count, + offset, flags, iobref, xdata); + if (!qb_local->stub) + goto enomem; + + qb_coroutine (frame, qb_co_writev); + + return 0; +enomem: + QB_STACK_UNWIND (writev, frame, -1, ENOMEM, 0, 0, 0); + return 0; +} + + +int +qb_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t offset, uint32_t flags, dict_t *xdata) +{ + qb_local_t *qb_local = NULL; + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_get (this, fd->inode); + if (!qb_inode) { + STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readv, fd, size, offset, + flags, xdata); + return 0; + } + + if (qb_local_init (frame) != 0) + goto enomem; + + qb_local = frame->local; + + qb_local->inode = inode_ref (fd->inode); + qb_local->fd = fd_ref (fd); + + qb_local->stub = fop_readv_stub (frame, NULL, fd, size, offset, + flags, xdata); + if (!qb_local->stub) + goto enomem; + + qb_coroutine (frame, qb_co_readv); + + return 0; +enomem: + QB_STACK_UNWIND (readv, frame, -1, ENOMEM, 0, 0, 0, 0, 0); + return 0; +} + + +int +qb_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int dsync, + dict_t *xdata) +{ + qb_local_t *qb_local = NULL; + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_get (this, fd->inode); + if (!qb_inode) { + STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, dsync, xdata); + return 0; + } + + if (qb_local_init (frame) != 0) + goto enomem; + + qb_local = frame->local; + + qb_local->inode = inode_ref (fd->inode); + qb_local->fd = fd_ref (fd); + + qb_local->stub = fop_fsync_stub (frame, NULL, fd, dsync, xdata); + + if (!qb_local->stub) + goto enomem; + + qb_coroutine (frame, qb_co_fsync); + + return 0; +enomem: + QB_STACK_UNWIND (fsync, frame, -1, ENOMEM, 0, 0, 0); + return 0; +} + + +int +qb_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + qb_local_t *qb_local = NULL; + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_get (this, fd->inode); + if (!qb_inode) { + STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->flush, fd, xdata); + return 0; + } + + if (qb_local_init (frame) != 0) + goto enomem; + + qb_local = frame->local; + + qb_local->inode = inode_ref (fd->inode); + qb_local->fd = fd_ref (fd); + + qb_local->stub = fop_flush_stub (frame, NULL, fd, xdata); + + if (!qb_local->stub) + goto enomem; + + qb_coroutine (frame, qb_co_fsync); + + return 0; +enomem: + QB_STACK_UNWIND (flush, frame, -1, ENOMEM, 0); + return 0; +} + +static int32_t +qb_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, + dict_t *xdata) +{ + qb_conf_t *conf = this->private; + gf_dirent_t *entry; + char *format; + + list_for_each_entry(entry, &entries->list, list) { + if (!entry->inode || !entry->dict) + continue; + + format = NULL; + if (dict_get_str(entry->dict, conf->qb_xattr_key, &format)) + continue; + + if (!format) { + qb_inode_cleanup(this, entry->inode, 1); + continue; + } + + if (qb_format_extract(this, format, entry->inode)) + continue; + + qb_iatt_fixup(this, entry->inode, &entry->d_stat); + } + + STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata); + return 0; +} + +static int32_t +qb_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, + off_t off, dict_t *xdata) +{ + qb_conf_t *conf = this->private; + + xdata = xdata ? dict_ref(xdata) : dict_new(); + if (!xdata) + goto enomem; + + if (dict_set_int32 (xdata, conf->qb_xattr_key, 0)) + goto enomem; + + STACK_WIND(frame, qb_readdirp_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata); + + dict_unref(xdata); + return 0; + +enomem: + QB_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL); + if (xdata) + dict_unref(xdata); + return 0; +} + +int +qb_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, + dict_t *xdata) +{ + qb_local_t *qb_local = NULL; + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_get (this, loc->inode); + if (!qb_inode) { + STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->truncate, loc, offset, + xdata); + return 0; + } + + if (qb_local_init (frame) != 0) + goto enomem; + + qb_local = frame->local; + + qb_local->inode = inode_ref (loc->inode); + qb_local->fd = fd_anonymous (loc->inode); + + qb_local->stub = fop_truncate_stub (frame, NULL, loc, offset, xdata); + + if (!qb_local->stub) + goto enomem; + + qb_coroutine (frame, qb_co_truncate); + + return 0; +enomem: + QB_STACK_UNWIND (truncate, frame, -1, ENOMEM, 0, 0, 0); + return 0; +} + + +int +qb_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + dict_t *xdata) +{ + qb_local_t *qb_local = NULL; + qb_inode_t *qb_inode = NULL; + + qb_inode = qb_inode_ctx_get (this, fd->inode); + if (!qb_inode) { + STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ftruncate, fd, offset, + xdata); + return 0; + } + + if (qb_local_init (frame) != 0) + goto enomem; + + qb_local = frame->local; + + qb_local->inode = inode_ref (fd->inode); + qb_local->fd = fd_ref (fd); + + qb_local->stub = fop_ftruncate_stub (frame, NULL, fd, offset, xdata); + + if (!qb_local->stub) + goto enomem; + + qb_coroutine (frame, qb_co_truncate); + + return 0; +enomem: + QB_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, 0, 0, 0); + return 0; +} + + +int +qb_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *iatt, dict_t *xdata) +{ + inode_t *inode = NULL; + + inode = frame->local; + frame->local = NULL; + + if (inode) { + qb_iatt_fixup (this, inode, iatt); + inode_unref (inode); + } + + QB_STACK_UNWIND (stat, frame, op_ret, op_errno, iatt, xdata); + + return 0; +} + +int +qb_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata) +{ + if (qb_inode_ctx_get (this, loc->inode)) + frame->local = inode_ref (loc->inode); + + STACK_WIND (frame, qb_stat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->stat, loc, xdata); + return 0; +} + + +int +qb_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *iatt, dict_t *xdata) +{ + inode_t *inode = NULL; + + inode = frame->local; + frame->local = NULL; + + if (inode) { + qb_iatt_fixup (this, inode, iatt); + inode_unref (inode); + } + + QB_STACK_UNWIND (fstat, frame, op_ret, op_errno, iatt, xdata); + + return 0; +} + + +int +qb_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata) +{ + if (qb_inode_ctx_get (this, fd->inode)) + frame->local = inode_ref (fd->inode); + + STACK_WIND (frame, qb_fstat_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fstat, fd, xdata); + return 0; +} + + +int +qb_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *pre, struct iatt *post, + dict_t *xdata) +{ + inode_t *inode = NULL; + + inode = frame->local; + frame->local = NULL; + + if (inode) { + qb_iatt_fixup (this, inode, pre); + qb_iatt_fixup (this, inode, post); + inode_unref (inode); + } + + QB_STACK_UNWIND (setattr, frame, op_ret, op_errno, pre, post, xdata); + + return 0; +} + + +int +qb_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf, + int valid, dict_t *xdata) +{ + if (qb_inode_ctx_get (this, loc->inode)) + frame->local = inode_ref (loc->inode); + + STACK_WIND (frame, qb_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->setattr, loc, buf, valid, xdata); + return 0; +} + + +int +qb_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int op_ret, int op_errno, struct iatt *pre, struct iatt *post, + dict_t *xdata) +{ + inode_t *inode = NULL; + + inode = frame->local; + frame->local = NULL; + + if (inode) { + qb_iatt_fixup (this, inode, pre); + qb_iatt_fixup (this, inode, post); + inode_unref (inode); + } + + QB_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, pre, post, xdata); + + return 0; +} + + +int +qb_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf, + int valid, dict_t *xdata) +{ + if (qb_inode_ctx_get (this, fd->inode)) + frame->local = inode_ref (fd->inode); + + STACK_WIND (frame, qb_setattr_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsetattr, fd, buf, valid, xdata); + return 0; +} + + +int +qb_forget (xlator_t *this, inode_t *inode) +{ + return qb_inode_cleanup (this, inode, 0); +} + + +int +qb_release (xlator_t *this, fd_t *fd) +{ + call_frame_t *frame = NULL; + + frame = create_frame (this, this->ctx->pool); + if (!frame) { + gf_log (this->name, GF_LOG_ERROR, + "Could not allocate frame. " + "Leaking QEMU BlockDriverState"); + return -1; + } + + if (qb_local_init (frame) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "Could not allocate local. " + "Leaking QEMU BlockDriverState"); + STACK_DESTROY (frame->root); + return -1; + } + + if (qb_coroutine (frame, qb_co_close) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "Could not allocate coroutine. " + "Leaking QEMU BlockDriverState"); + qb_local_free (this, frame->local); + frame->local = NULL; + STACK_DESTROY (frame->root); + } + + return 0; +} + +int +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + ret = xlator_mem_acct_init (this, gf_qb_mt_end + 1); + + if (ret) + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init " + "failed"); + return ret; +} + + +int +reconfigure (xlator_t *this, dict_t *options) +{ + return 0; +} + + +int +init (xlator_t *this) +{ + qb_conf_t *conf = NULL; + int32_t ret = -1; + static int bdrv_inited = 0; + + if (!this->children || this->children->next) { + gf_log (this->name, GF_LOG_ERROR, + "FATAL: qemu-block (%s) not configured with exactly " + "one child", this->name); + goto out; + } + + conf = GF_CALLOC (1, sizeof (*conf), gf_qb_mt_qb_conf_t); + if (!conf) + goto out; + + /* configure 'option window-size <size>' */ + GF_OPTION_INIT ("default-password", conf->default_password, str, out); + + /* qemu coroutines use "co_mutex" for synchronizing among themselves. + However "co_mutex" itself is not threadsafe if the coroutine framework + is multithreaded (which usually is not). However synctasks are + fundamentally multithreaded, so for now create a syncenv which has + scaling limits set to max 1 thread so that the qemu coroutines can + execute "safely". + + Future work: provide an implementation of "co_mutex" which is + threadsafe and use the global multithreaded ctx->env syncenv. + */ + conf->env = syncenv_new (0, 1, 1); + + this->private = conf; + + ret = 0; + + snprintf (conf->qb_xattr_key, QB_XATTR_KEY_MAX, QB_XATTR_KEY_FMT, + this->name); + + cur_mon = (void *) 1; + + if (!bdrv_inited) { + bdrv_init (); + bdrv_inited = 1; + } + +out: + if (ret) + GF_FREE (conf); + + return ret; +} + + +void +fini (xlator_t *this) +{ + qb_conf_t *conf = NULL; + + conf = this->private; + + this->private = NULL; + + if (conf->root_inode) + inode_unref(conf->root_inode); + GF_FREE (conf); + + return; +} + + +struct xlator_fops fops = { + .lookup = qb_lookup, + .fsetxattr = qb_fsetxattr, + .setxattr = qb_setxattr, + .open = qb_open, + .writev = qb_writev, + .readv = qb_readv, + .fsync = qb_fsync, + .truncate = qb_truncate, + .ftruncate = qb_ftruncate, + .stat = qb_stat, + .fstat = qb_fstat, + .setattr = qb_setattr, + .fsetattr = qb_fsetattr, + .flush = qb_flush, +/* + .getxattr = qb_getxattr, + .fgetxattr = qb_fgetxattr +*/ + .readdirp = qb_readdirp, +}; + + +struct xlator_cbks cbks = { + .forget = qb_forget, + .release = qb_release, +}; + + +struct xlator_dumpops dumpops = { +}; + + +struct volume_options options[] = { + { .key = {"default-password"}, + .type = GF_OPTION_TYPE_STR, + .default_value = "", + .description = "Default password for the AES encrypted block images." + }, + { .key = {NULL} }, +}; diff --git a/xlators/features/qemu-block/src/qemu-block.h b/xlators/features/qemu-block/src/qemu-block.h new file mode 100644 index 000000000..c95f2799a --- /dev/null +++ b/xlators/features/qemu-block/src/qemu-block.h @@ -0,0 +1,109 @@ +/* + Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef __QEMU_BLOCK_H +#define __QEMU_BLOCK_H + +#include "syncop.h" +#include "call-stub.h" +#include "block/block_int.h" +#include "monitor/monitor.h" + +/* QB_XATTR_KEY_FMT is the on-disk xattr stored in the inode which + indicates that the file must be "interpreted" by the block format + logic. The value of the key is of the pattern: + + "format:virtual_size" + + e.g + + "qcow2:20GB" or "qed:100GB" + + The format and virtual size are colon separated. The format is + a case sensitive string which qemu recognizes. virtual_size is + specified as a size which glusterfs recognizes as size (i.e., + value accepted by gf_string2bytesize()) +*/ +#define QB_XATTR_KEY_FMT "trusted.glusterfs.%s.format" + +#define QB_XATTR_KEY_MAX 64 + +#define QB_XATTR_VAL_MAX 64 + + +typedef struct qb_inode { + char fmt[QB_XATTR_VAL_MAX]; /* this is only the format, not "format:size" */ + size_t size; /* virtual size in bytes */ + BlockDriverState *bs; + int refcnt; + uuid_t backing_gfid; + char *backing_fname; +} qb_inode_t; + + +typedef struct qb_conf { + Monitor *mon; + struct syncenv *env; + char qb_xattr_key[QB_XATTR_KEY_MAX]; + char *default_password; + inode_t *root_inode; +} qb_conf_t; + + +typedef struct qb_local { + call_frame_t *frame; /* backpointer */ + call_stub_t *stub; + inode_t *inode; + fd_t *fd; + char fmt[QB_XATTR_VAL_MAX+1]; + char name[256]; + synctask_fn_t synctask_fn; + struct list_head list; +} qb_local_t; + +void qb_local_free (xlator_t *this, qb_local_t *local); +int qb_coroutine (call_frame_t *frame, synctask_fn_t fn); +inode_t *qb_inode_from_filename (const char *filename); +int qb_inode_to_filename (inode_t *inode, char *filename, int size); +int qb_format_extract (xlator_t *this, char *format, inode_t *inode); + +qb_inode_t *qb_inode_ctx_get (xlator_t *this, inode_t *inode); + +#define QB_STACK_UNWIND(typ, frame, args ...) do { \ + qb_local_t *__local = frame->local; \ + xlator_t *__this = frame->this; \ + \ + frame->local = NULL; \ + STACK_UNWIND_STRICT (typ, frame, args); \ + if (__local) \ + qb_local_free (__this, __local); \ + } while (0) + +#define QB_STUB_UNWIND(stub, op_ret, op_errno) do { \ + qb_local_t *__local = stub->frame->local; \ + xlator_t *__this = stub->frame->this; \ + \ + stub->frame->local = NULL; \ + call_unwind_error (stub, op_ret, op_errno); \ + if (__local) \ + qb_local_free (__this, __local); \ + } while (0) + +#define QB_STUB_RESUME(stub_errno) do { \ + qb_local_t *__local = stub->frame->local; \ + xlator_t *__this = stub->frame->this; \ + \ + stub->frame->local = NULL; \ + call_resume (stub); \ + if (__local) \ + qb_local_free (__this, __local); \ + } while (0) + +#endif /* !__QEMU_BLOCK_H */ |
