diff options
Diffstat (limited to 'xlators/experimental/fdl/src/fdl-tmpl.c.in')
-rw-r--r-- | xlators/experimental/fdl/src/fdl-tmpl.c.in | 513 |
1 files changed, 0 insertions, 513 deletions
diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c.in b/xlators/experimental/fdl/src/fdl-tmpl.c.in deleted file mode 100644 index c99157be957..00000000000 --- a/xlators/experimental/fdl/src/fdl-tmpl.c.in +++ /dev/null @@ -1,513 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include <fcntl.h> -#include <unistd.h> -#include <sys/mman.h> -#include "call-stub.h" -#include "iatt.h" -#include "defaults.h" -#include "syscall.h" -#include "xlator.h" -#include "fdl.h" - -/* TBD: make tunable */ -#define META_FILE_SIZE (1 << 20) -#define DATA_FILE_SIZE (1 << 24) - -enum gf_fdl { gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, gf_fdl_mt_end }; - -typedef struct { - char *type; - off_t size; - char *path; - int fd; - void *ptr; - off_t max_offset; -} log_obj_t; - -typedef struct { - struct list_head reqs; - pthread_mutex_t req_lock; - pthread_cond_t req_cond; - char *log_dir; - pthread_t worker; - gf_boolean_t should_stop; - gf_boolean_t change_term; - log_obj_t meta_log; - log_obj_t data_log; - int term; - int first_term; -} fdl_private_t; - -int32_t -fdl_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata); - -void -fdl_enqueue(xlator_t *this, call_stub_t *stub) -{ - fdl_private_t *priv = this->private; - - pthread_mutex_lock(&priv->req_lock); - list_add_tail(&stub->list, &priv->reqs); - pthread_mutex_unlock(&priv->req_lock); - - pthread_cond_signal(&priv->req_cond); -} - -#pragma generate - -char * -fdl_open_term_log(xlator_t *this, log_obj_t *obj, int term) -{ - fdl_private_t *priv = this->private; - int ret; - char *ptr = NULL; - - /* - * Use .jnl instead of .log so that we don't get test info (mistakenly) - * appended to our journal files. - */ - if (this->ctx->cmd_args.log_ident) { - ret = gf_asprintf(&obj->path, "%s/%s-%s-%d.jnl", priv->log_dir, - this->ctx->cmd_args.log_ident, obj->type, term); - } else { - ret = gf_asprintf(&obj->path, "%s/fubar-%s-%d.jnl", priv->log_dir, - obj->type, term); - } - if ((ret <= 0) || !obj->path) { - gf_log(this->name, GF_LOG_ERROR, "failed to construct log-file path"); - goto err; - } - - gf_log(this->name, GF_LOG_INFO, "opening %s (size %" PRId64 ")", obj->path, - obj->size); - - obj->fd = open(obj->path, O_RDWR | O_CREAT | O_TRUNC, 0666); - if (obj->fd < 0) { - gf_log(this->name, GF_LOG_ERROR, "failed to open log file (%s)", - strerror(errno)); - goto err; - } - -#if !defined(GF_BSD_HOST_OS) - /* - * NetBSD can just go die in a fire. Even though it claims to support - * fallocate/posix_fallocate they don't actually *do* anything so the - * file size remains zero. Then mmap succeeds anyway, but any access - * to the mmap'ed region will segfault. It would be acceptable for - * fallocate to do what it says, for mmap to fail, or for access to - * extend the file. NetBSD managed to hit the trifecta of Getting - * Everything Wrong, and debugging in that environment to get this far - * has already been painful enough (systems I worked on in 1990 were - * better that way). We'll fall through to the lseek/write method, and - * performance will be worse, and TOO BAD. - */ - if (sys_fallocate(obj->fd, 0, 0, obj->size) < 0) -#endif - { - gf_log(this->name, GF_LOG_WARNING, - "failed to fallocate space for log file"); - /* Have to do this the ugly page-faulty way. */ - (void)sys_lseek(obj->fd, obj->size - 1, SEEK_SET); - (void)sys_write(obj->fd, "", 1); - } - - ptr = mmap(NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0); - if (ptr == MAP_FAILED) { - gf_log(this->name, GF_LOG_ERROR, "failed to mmap log (%s)", - strerror(errno)); - goto err; - } - - obj->ptr = ptr; - obj->max_offset = 0; - return ptr; - -err: - if (obj->fd >= 0) { - sys_close(obj->fd); - obj->fd = (-1); - } - if (obj->path) { - GF_FREE(obj->path); - obj->path = NULL; - } - return ptr; -} - -void -fdl_close_term_log(xlator_t *this, log_obj_t *obj) -{ - fdl_private_t *priv = this->private; - - if (obj->ptr) { - (void)munmap(obj->ptr, obj->size); - obj->ptr = NULL; - } - - if (obj->fd >= 0) { - gf_log(this->name, GF_LOG_INFO, - "truncating term %d %s journal to %" PRId64, - priv->term, obj->type, obj->max_offset); - if (sys_ftruncate(obj->fd, obj->max_offset) < 0) { - gf_log(this->name, GF_LOG_WARNING, - "failed to truncate journal (%s)", strerror(errno)); - } - sys_close(obj->fd); - obj->fd = (-1); - } - - if (obj->path) { - GF_FREE(obj->path); - obj->path = NULL; - } -} - -gf_boolean_t -fdl_change_term(xlator_t *this, char **meta_ptr, char **data_ptr) -{ - fdl_private_t *priv = this->private; - - fdl_close_term_log(this, &priv->meta_log); - fdl_close_term_log(this, &priv->data_log); - - ++(priv->term); - - *meta_ptr = fdl_open_term_log(this, &priv->meta_log, priv->term); - if (!*meta_ptr) { - return _gf_false; - } - - *data_ptr = fdl_open_term_log(this, &priv->data_log, priv->term); - if (!*data_ptr) { - return _gf_false; - } - - return _gf_true; -} - -void * -fdl_worker(void *arg) -{ - xlator_t *this = arg; - fdl_private_t *priv = this->private; - call_stub_t *stub; - char *meta_ptr = NULL; - off_t *meta_offset = &priv->meta_log.max_offset; - char *data_ptr = NULL; - off_t *data_offset = &priv->data_log.max_offset; - unsigned long base_as_ul; - void *msync_ptr; - size_t msync_len; - gf_boolean_t recycle; - void *err_label = &&err_unlocked; - - priv->meta_log.type = "meta"; - priv->meta_log.size = META_FILE_SIZE; - priv->meta_log.path = NULL; - priv->meta_log.fd = (-1); - priv->meta_log.ptr = NULL; - - priv->data_log.type = "data"; - priv->data_log.size = DATA_FILE_SIZE; - priv->data_log.path = NULL; - priv->data_log.fd = (-1); - priv->data_log.ptr = NULL; - - /* TBD: initial term should come from persistent storage (e.g. etcd) */ - priv->first_term = ++(priv->term); - meta_ptr = fdl_open_term_log(this, &priv->meta_log, priv->term); - if (!meta_ptr) { - goto *err_label; - } - data_ptr = fdl_open_term_log(this, &priv->data_log, priv->term); - if (!data_ptr) { - fdl_close_term_log(this, &priv->meta_log); - goto *err_label; - } - - for (;;) { - pthread_mutex_lock(&priv->req_lock); - err_label = &&err_locked; - while (list_empty(&priv->reqs)) { - pthread_cond_wait(&priv->req_cond, &priv->req_lock); - if (priv->should_stop) { - goto *err_label; - } - if (priv->change_term) { - if (!fdl_change_term(this, &meta_ptr, &data_ptr)) { - goto *err_label; - } - priv->change_term = _gf_false; - continue; - } - } - stub = list_entry(priv->reqs.next, call_stub_t, list); - list_del_init(&stub->list); - pthread_mutex_unlock(&priv->req_lock); - err_label = &&err_unlocked; - /* - * TBD: batch requests - * - * What we should do here is gather up *all* of the requests - * that have accumulated since we were last at this point, - * blast them all out in one big writev, and then dispatch them - * all before coming back for more. That maximizes throughput, - * at some cost to latency (due to queuing effects at the log - * stage). Note that we're likely to be above io-threads, so - * the dispatch itself will be parallelized (at further cost to - * latency). For now, we just do the simplest thing and handle - * one request all the way through before fetching the next. - * - * So, why mmap/msync instead of writev/fdatasync? Because it's - * faster. Much faster. So much faster that I half-suspect - * cheating, but it's more convenient for now than having to - * ensure that everything's page-aligned for O_DIRECT (the only - * alternative that still might avoid ridiculous levels of - * local-FS overhead). - * - * TBD: check that msync really does get our data to disk. - */ - gf_log(this->name, GF_LOG_DEBUG, "logging %u+%u bytes for op %d", - stub->jnl_meta_len, stub->jnl_data_len, stub->fop); - recycle = _gf_false; - if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) { - recycle = _gf_true; - } - if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) { - recycle = _gf_true; - } - if (recycle && !fdl_change_term(this, &meta_ptr, &data_ptr)) { - goto *err_label; - } - meta_ptr = priv->meta_log.ptr; - data_ptr = priv->data_log.ptr; - gf_log(this->name, GF_LOG_DEBUG, "serializing to %p/%p", - meta_ptr + *meta_offset, data_ptr + *data_offset); - stub->serialize(stub, meta_ptr + *meta_offset, data_ptr + *data_offset); - if (stub->jnl_meta_len > 0) { - base_as_ul = (unsigned long)(meta_ptr + *meta_offset); - msync_ptr = (void *)(base_as_ul & ~0x0fff); - msync_len = (size_t)(base_as_ul & 0x0fff); - if (msync(msync_ptr, msync_len + stub->jnl_meta_len, MS_SYNC) < 0) { - gf_log(this->name, GF_LOG_WARNING, - "failed to log request meta (%s)", strerror(errno)); - } - *meta_offset += stub->jnl_meta_len; - } - if (stub->jnl_data_len > 0) { - base_as_ul = (unsigned long)(data_ptr + *data_offset); - msync_ptr = (void *)(base_as_ul & ~0x0fff); - msync_len = (size_t)(base_as_ul & 0x0fff); - if (msync(msync_ptr, msync_len + stub->jnl_data_len, MS_SYNC) < 0) { - gf_log(this->name, GF_LOG_WARNING, - "failed to log request data (%s)", strerror(errno)); - } - *data_offset += stub->jnl_data_len; - } - call_resume(stub); - } - -err_locked: - pthread_mutex_unlock(&priv->req_lock); -err_unlocked: - fdl_close_term_log(this, &priv->meta_log); - fdl_close_term_log(this, &priv->data_log); - return NULL; -} - -int32_t -fdl_ipc_continue(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) -{ - /* - * Nothing to be done here. Just Unwind. * - */ - STACK_UNWIND_STRICT(ipc, frame, 0, 0, xdata); - - return 0; -} - -int32_t -fdl_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) -{ - call_stub_t *stub; - fdl_private_t *priv = this->private; - dict_t *tdict; - int32_t gt_err = EIO; - - switch (op) { - case FDL_IPC_CHANGE_TERM: - gf_log(this->name, GF_LOG_INFO, "got CHANGE_TERM op"); - priv->change_term = _gf_true; - pthread_cond_signal(&priv->req_cond); - STACK_UNWIND_STRICT(ipc, frame, 0, 0, NULL); - break; - - case FDL_IPC_GET_TERMS: - gf_log(this->name, GF_LOG_INFO, "got GET_TERMS op"); - tdict = dict_new(); - if (!tdict) { - gt_err = ENOMEM; - goto gt_done; - } - if (dict_set_int32(tdict, "first", priv->first_term) != 0) { - goto gt_done; - } - if (dict_set_int32(tdict, "last", priv->term) != 0) { - goto gt_done; - } - gt_err = 0; - gt_done: - if (gt_err) { - STACK_UNWIND_STRICT(ipc, frame, -1, gt_err, NULL); - } else { - STACK_UNWIND_STRICT(ipc, frame, 0, 0, tdict); - } - if (tdict) { - dict_unref(tdict); - } - break; - - case FDL_IPC_JBR_SERVER_ROLLBACK: - /* - * In case of a rollback from jbr-server, dump * - * the term and index number in the journal, * - * which will later be used to rollback the fop * - */ - stub = fop_ipc_stub(frame, fdl_ipc_continue, op, xdata); - fdl_len_ipc(stub); - stub->serialize = fdl_serialize_ipc; - fdl_enqueue(this, stub); - - break; - - default: - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ipc, op, xdata); - } - - return 0; -} - -int -fdl_init(xlator_t *this) -{ - fdl_private_t *priv = NULL; - - priv = GF_CALLOC(1, sizeof(*priv), gf_fdl_mt_fdl_private_t); - if (!priv) { - gf_log(this->name, GF_LOG_ERROR, "failed to allocate fdl_private"); - goto err; - } - - INIT_LIST_HEAD(&priv->reqs); - if (pthread_mutex_init(&priv->req_lock, NULL) != 0) { - gf_log(this->name, GF_LOG_ERROR, "failed to initialize req_lock"); - goto err; - } - if (pthread_cond_init(&priv->req_cond, NULL) != 0) { - gf_log(this->name, GF_LOG_ERROR, "failed to initialize req_cond"); - goto err; - } - - GF_OPTION_INIT("log-path", priv->log_dir, path, err); - - this->private = priv; - /* - * The rest of the fop table is automatically generated, so this is a - * bit cleaner than messing with the generation to add a hand-written - * exception. - */ - - if (gf_thread_create(&priv->worker, NULL, fdl_worker, this, "fdlwrker") != - 0) { - gf_log(this->name, GF_LOG_ERROR, "failed to start fdl_worker"); - goto err; - } - - return 0; - -err: - if (priv) { - GF_FREE(priv); - } - return -1; -} - -void -fdl_fini(xlator_t *this) -{ - fdl_private_t *priv = this->private; - - if (priv) { - priv->should_stop = _gf_true; - pthread_cond_signal(&priv->req_cond); - pthread_join(priv->worker, NULL); - GF_FREE(priv); - } -} - -int -fdl_reconfigure(xlator_t *this, dict_t *options) -{ - fdl_private_t *priv = this->private; - - GF_OPTION_RECONF("log_dir", priv->log_dir, options, path, out); - /* TBD: react if it changed */ - -out: - return 0; -} - -int32_t -mem_acct_init(xlator_t *this) -{ - int ret = -1; - - GF_VALIDATE_OR_GOTO("fdl", this, out); - - ret = xlator_mem_acct_init(this, gf_fdl_mt_end + 1); - - if (ret != 0) { - gf_log(this->name, GF_LOG_ERROR, - "Memory accounting init" - "failed"); - return ret; - } -out: - return ret; -} - -class_methods_t class_methods = { - .init = fdl_init, - .fini = fdl_fini, - .reconfigure = fdl_reconfigure, - .notify = default_notify, -}; - -struct volume_options options[] = { - {.key = {"log-path"}, - .type = GF_OPTION_TYPE_PATH, - .default_value = DEFAULT_LOG_FILE_DIRECTORY, - .description = "Directory for FDL files."}, - {.key = {NULL}}, -}; - -struct xlator_cbks cbks = { - .release = default_release, - .releasedir = default_releasedir, - .forget = default_forget, -}; |