diff options
author | Amar Tumballi <amarts@redhat.com> | 2018-09-17 17:17:54 +0530 |
---|---|---|
committer | Xavi Hernandez <xhernandez@redhat.com> | 2018-10-05 10:12:29 +0000 |
commit | 76bd93c7b889deafd4d5e1e9387714a6c1918ac7 (patch) | |
tree | 269da17c97019b79687839b02e7049ef78d6f8d3 /xlators/experimental | |
parent | c1f04098227c17bbebe286871c75524c80eb8b3a (diff) |
xlators/experimental: move template files to '.c.in' type
This is critical because we shouldn't be applying the automated
coding standard (clang-format) tool on these files.
This patchset is done by below steps:
* clang-format -i ${filename}
This creates syntax errors. Fix them using below two commands:
* sed -i -e 's/ @/@/g' ${filename}
* sed -i -e 's/,@/, @/g' ${filename}
With this, these files are having minimum changes requried to
compile, and is as close to the coding standard as possible.
* git rename ${filename} ${filename}.in
Updates: bz#1564149
Change-Id: Icf90f7f81d6fa4400be4826e094fdff8e64508d0
Signed-off-by: Amar Tumballi <amarts@redhat.com>
Diffstat (limited to 'xlators/experimental')
-rw-r--r-- | xlators/experimental/fdl/src/Makefile.am | 14 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/dump-tmpl.c | 187 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/dump-tmpl.c.in | 177 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/fdl-tmpl.c | 536 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/fdl-tmpl.c.in | 512 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/recon-tmpl.c | 304 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/recon-tmpl.c.in | 297 | ||||
-rw-r--r-- | xlators/experimental/jbr-client/src/Makefile.am | 2 | ||||
-rw-r--r-- | xlators/experimental/jbr-client/src/fop-template.c | 113 | ||||
-rw-r--r-- | xlators/experimental/jbr-client/src/fop-template.c.in | 102 | ||||
-rw-r--r-- | xlators/experimental/jbr-server/src/Makefile.am | 2 | ||||
-rw-r--r-- | xlators/experimental/jbr-server/src/all-templates.c | 542 | ||||
-rw-r--r-- | xlators/experimental/jbr-server/src/all-templates.c.in | 501 |
13 files changed, 1598 insertions, 1691 deletions
diff --git a/xlators/experimental/fdl/src/Makefile.am b/xlators/experimental/fdl/src/Makefile.am index f39978c3930..bdcaaf6c38d 100644 --- a/xlators/experimental/fdl/src/Makefile.am +++ b/xlators/experimental/fdl/src/Makefile.am @@ -33,16 +33,16 @@ AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ AM_CFLAGS = -Wall $(GF_CFLAGS) noinst_PYTHON = gen_fdl.py gen_dumper.py gen_recon.py -EXTRA_DIST = fdl-tmpl.c dump-tmpl.c recon-tmpl.c +EXTRA_DIST = fdl-tmpl.c.in dump-tmpl.c.in recon-tmpl.c.in CLEANFILES = $(nodist_fdl_la_SOURCES) $(nodist_gf_logdump_SOURCES) \ $(nodist_gf_recon_SOURCES) -fdl.c: fdl-tmpl.c gen_fdl.py - $(PYTHON) $(srcdir)/gen_fdl.py $(srcdir)/fdl-tmpl.c > $@ +fdl.c: fdl-tmpl.c.in gen_fdl.py + $(PYTHON) $(srcdir)/gen_fdl.py $(srcdir)/fdl-tmpl.c.in > $@ -libfdl.c: dump-tmpl.c gen_dumper.py - $(PYTHON) $(srcdir)/gen_dumper.py $(srcdir)/dump-tmpl.c > $@ +libfdl.c: dump-tmpl.c.in gen_dumper.py + $(PYTHON) $(srcdir)/gen_dumper.py $(srcdir)/dump-tmpl.c.in > $@ -librecon.c: recon-tmpl.c gen_recon.py - $(PYTHON) $(srcdir)/gen_recon.py $(srcdir)/recon-tmpl.c > $@ +librecon.c: recon-tmpl.c.in gen_recon.py + $(PYTHON) $(srcdir)/gen_recon.py $(srcdir)/recon-tmpl.c.in > $@ diff --git a/xlators/experimental/fdl/src/dump-tmpl.c b/xlators/experimental/fdl/src/dump-tmpl.c deleted file mode 100644 index 32b0fef6af3..00000000000 --- a/xlators/experimental/fdl/src/dump-tmpl.c +++ /dev/null @@ -1,187 +0,0 @@ -#pragma fragment PROLOG -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#include <ctype.h> -#endif - -#include "glfs.h" -#include "iatt.h" -#include "xlator.h" -#include "fdl.h" - -/* - * Returns 0 if the string is ASCII printable * - * and -1 if it's not ASCII printable * - */ -int str_isprint (char *s) -{ - int ret = -1; - - if (!s) - goto out; - - while (s[0] != '\0') { - if (!isprint(s[0])) - goto out; - else - s++; - } - - ret = 0; -out: - return ret; -} - -#pragma fragment DICT - { - int key_len, data_len; - char *key_ptr; - char *key_val; - printf ("@ARGNAME@ = dict {\n"); - for (;;) { - key_len = *((int *)new_meta); - new_meta += sizeof(int); - if (!key_len) { - break; - } - key_ptr = new_meta; - new_meta += key_len; - data_len = *((int *)new_meta); - key_val = new_meta + sizeof(int); - new_meta += sizeof(int) + data_len; - if (str_isprint(key_val)) - printf (" %s = <%d bytes>\n", - key_ptr, data_len); - else - printf (" %s = %s <%d bytes>\n", - key_ptr, key_val, data_len); - } - printf ("}\n"); - } - -#pragma fragment DOUBLE - printf ("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta), - *((uint64_t *)new_meta)); - new_meta += sizeof(uint64_t); - -#pragma fragment GFID - printf ("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta))); - new_meta += 16; - -#pragma fragment INTEGER - printf ("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta), - *((uint32_t *)new_meta)); - new_meta += sizeof(uint32_t); - -#pragma fragment LOC - printf ("@ARGNAME@ = loc {\n"); - printf (" gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); - new_meta += 16; - printf (" pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); - new_meta += 16; - if (*(new_meta++)) { - printf (" name = %s\n", new_meta); - new_meta += (strlen(new_meta) + 1); - } - printf ("}\n"); - -#pragma fragment STRING - if (*(new_meta++)) { - printf ("@ARGNAME@ = %s\n", new_meta); - new_meta += (strlen(new_meta) + 1); - } - -#pragma fragment VECTOR - { - size_t len = *((size_t *)new_meta); - new_meta += sizeof(len); - printf ("@ARGNAME@ = <%zu bytes>\n", len); - new_data += len; - } - -#pragma fragment IATT - { - ia_prot_t *myprot = ((ia_prot_t *)new_meta); - printf ("@ARGNAME@ = iatt {\n"); - printf (" ia_prot = %c%c%c", - myprot->suid ? 'S' : '-', - myprot->sgid ? 'S' : '-', - myprot->sticky ? 'T' : '-'); - printf ("%c%c%c", - myprot->owner.read ? 'r' : '-', - myprot->owner.write ? 'w' : '-', - myprot->owner.exec ? 'x' : '-'); - printf ("%c%c%c", - myprot->group.read ? 'r' : '-', - myprot->group.write ? 'w' : '-', - myprot->group.exec ? 'x' : '-'); - printf ("%c%c%c\n", - myprot->other.read ? 'r' : '-', - myprot->other.write ? 'w' : '-', - myprot->other.exec ? 'x' : '-'); - new_meta += sizeof(ia_prot_t); - uint32_t *myints = (uint32_t *)new_meta; - printf (" ia_uid = %u\n", myints[0]); - printf (" ia_gid = %u\n", myints[1]); - printf (" ia_atime = %u.%09u\n", myints[2], myints[3]); - printf (" ia_mtime = %u.%09u\n", myints[4], myints[5]); - new_meta += sizeof(*myints) * 6; - } - -#pragma fragment FOP -void -fdl_dump_@NAME@ (char **old_meta, char **old_data) -{ - char *new_meta = *old_meta; - char *new_data = *old_data; - - /* TBD: word size/endianness */ -@FUNCTION_BODY@ - - *old_meta = new_meta; - *old_data = new_data; -} - -#pragma fragment CASE - case GF_FOP_@UPNAME@: - printf ("=== GF_FOP_@UPNAME@\n"); - fdl_dump_@NAME@ (&new_meta, &new_data); - break; - -#pragma fragment EPILOG -int -fdl_dump (char **old_meta, char **old_data) -{ - char *new_meta = *old_meta; - char *new_data = *old_data; - static glfs_t *fs = NULL; - int recognized = 1; - event_header_t *eh; - - /* - * We don't really call anything else in GFAPI, but this is the most - * convenient way to satisfy all of the spurious dependencies on how it - * or glusterfsd initialize (e.g. setting up THIS). - */ - if (!fs) { - fs = glfs_new ("dummy"); - } - - eh = (event_header_t *)new_meta; - new_meta += sizeof (*eh); - - /* TBD: check event_type instead of assuming NEW_REQUEST */ - - switch (eh->fop_type) { -@SWITCH_BODY@ - - default: - printf ("unknown fop %u\n", eh->fop_type); - recognized = 0; - } - - *old_meta = new_meta; - *old_data = new_data; - return recognized; -} diff --git a/xlators/experimental/fdl/src/dump-tmpl.c.in b/xlators/experimental/fdl/src/dump-tmpl.c.in new file mode 100644 index 00000000000..97249ac3e71 --- /dev/null +++ b/xlators/experimental/fdl/src/dump-tmpl.c.in @@ -0,0 +1,177 @@ +#pragma fragment PROLOG +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#include <ctype.h> +#endif + +#include "glfs.h" +#include "iatt.h" +#include "xlator.h" +#include "fdl.h" + +/* + * Returns 0 if the string is ASCII printable * + * and -1 if it's not ASCII printable * + */ +int +str_isprint(char *s) +{ + int ret = -1; + + if (!s) + goto out; + + while (s[0] != '\0') { + if (!isprint(s[0])) + goto out; + else + s++; + } + + ret = 0; +out: + return ret; +} + +#pragma fragment DICT +{ + int key_len, data_len; + char *key_ptr; + char *key_val; + printf("@ARGNAME@ = dict {\n"); + for (;;) { + key_len = *((int *)new_meta); + new_meta += sizeof(int); + if (!key_len) { + break; + } + key_ptr = new_meta; + new_meta += key_len; + data_len = *((int *)new_meta); + key_val = new_meta + sizeof(int); + new_meta += sizeof(int) + data_len; + if (str_isprint(key_val)) + printf(" %s = <%d bytes>\n", key_ptr, data_len); + else + printf(" %s = %s <%d bytes>\n", key_ptr, key_val, data_len); + } + printf("}\n"); +} + +#pragma fragment DOUBLE +printf("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta), + *((uint64_t *)new_meta)); +new_meta += sizeof(uint64_t); + +#pragma fragment GFID +printf("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta))); +new_meta += 16; + +#pragma fragment INTEGER +printf("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta), + *((uint32_t *)new_meta)); +new_meta += sizeof(uint32_t); + +#pragma fragment LOC +printf("@ARGNAME@ = loc {\n"); +printf(" gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); +new_meta += 16; +printf(" pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); +new_meta += 16; +if (*(new_meta++)) { + printf(" name = %s\n", new_meta); + new_meta += (strlen(new_meta) + 1); +} +printf("}\n"); + +#pragma fragment STRING +if (*(new_meta++)) { + printf("@ARGNAME@ = %s\n", new_meta); + new_meta += (strlen(new_meta) + 1); +} + +#pragma fragment VECTOR +{ + size_t len = *((size_t *)new_meta); + new_meta += sizeof(len); + printf("@ARGNAME@ = <%zu bytes>\n", len); + new_data += len; +} + +#pragma fragment IATT +{ + ia_prot_t *myprot = ((ia_prot_t *)new_meta); + printf("@ARGNAME@ = iatt {\n"); + printf(" ia_prot = %c%c%c", myprot->suid ? 'S' : '-', + myprot->sgid ? 'S' : '-', myprot->sticky ? 'T' : '-'); + printf("%c%c%c", myprot->owner.read ? 'r' : '-', + myprot->owner.write ? 'w' : '-', myprot->owner.exec ? 'x' : '-'); + printf("%c%c%c", myprot->group.read ? 'r' : '-', + myprot->group.write ? 'w' : '-', myprot->group.exec ? 'x' : '-'); + printf("%c%c%c\n", myprot->other.read ? 'r' : '-', + myprot->other.write ? 'w' : '-', myprot->other.exec ? 'x' : '-'); + new_meta += sizeof(ia_prot_t); + uint32_t *myints = (uint32_t *)new_meta; + printf(" ia_uid = %u\n", myints[0]); + printf(" ia_gid = %u\n", myints[1]); + printf(" ia_atime = %u.%09u\n", myints[2], myints[3]); + printf(" ia_mtime = %u.%09u\n", myints[4], myints[5]); + new_meta += sizeof(*myints) * 6; +} + +#pragma fragment FOP +void fdl_dump_@NAME@(char **old_meta, char **old_data) +{ + char *new_meta = *old_meta; + char *new_data = *old_data; + + /* TBD: word size/endianness */ + @FUNCTION_BODY@ + + *old_meta = new_meta; + *old_data = new_data; +} + +#pragma fragment CASE +case GF_FOP_@UPNAME@: + printf("=== GF_FOP_@UPNAME@\n"); + fdl_dump_@NAME@(&new_meta, &new_data); + break; + +#pragma fragment EPILOG + int + fdl_dump(char **old_meta, char **old_data) + { + char *new_meta = *old_meta; + char *new_data = *old_data; + static glfs_t *fs = NULL; + int recognized = 1; + event_header_t *eh; + + /* + * We don't really call anything else in GFAPI, but this is the most + * convenient way to satisfy all of the spurious dependencies on how it + * or glusterfsd initialize (e.g. setting up THIS). + */ + if (!fs) { + fs = glfs_new("dummy"); + } + + eh = (event_header_t *)new_meta; + new_meta += sizeof(*eh); + + /* TBD: check event_type instead of assuming NEW_REQUEST */ + + switch (eh->fop_type) { + @SWITCH_BODY@ + + default : + printf("unknown fop %u\n", eh->fop_type); + recognized = 0; + } + + *old_meta = new_meta; + *old_data = new_data; + return recognized; + } diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c b/xlators/experimental/fdl/src/fdl-tmpl.c deleted file mode 100644 index 145dad7964a..00000000000 --- a/xlators/experimental/fdl/src/fdl-tmpl.c +++ /dev/null @@ -1,536 +0,0 @@ -/* - Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> - This file is part of GlusterFS. - - This file is licensed to you under your choice of the GNU Lesser - General Public License, version 3 or any later version (LGPLv3 or - later), or the GNU General Public License, version 2 (GPLv2), in all - cases as published by the Free Software Foundation. -*/ - -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include <fcntl.h> -#include <unistd.h> -#include <sys/mman.h> -#include "call-stub.h" -#include "iatt.h" -#include "defaults.h" -#include "syscall.h" -#include "xlator.h" -#include "fdl.h" - -/* TBD: make tunable */ -#define META_FILE_SIZE (1 << 20) -#define DATA_FILE_SIZE (1 << 24) - -enum gf_fdl { - gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, - gf_fdl_mt_end -}; - -typedef struct { - char *type; - off_t size; - char *path; - int fd; - void * ptr; - off_t max_offset; -} log_obj_t; - -typedef struct { - struct list_head reqs; - pthread_mutex_t req_lock; - pthread_cond_t req_cond; - char *log_dir; - pthread_t worker; - gf_boolean_t should_stop; - gf_boolean_t change_term; - log_obj_t meta_log; - log_obj_t data_log; - int term; - int first_term; -} fdl_private_t; - -int32_t -fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata); - -void -fdl_enqueue (xlator_t *this, call_stub_t *stub) -{ - fdl_private_t *priv = this->private; - - pthread_mutex_lock (&priv->req_lock); - list_add_tail (&stub->list, &priv->reqs); - pthread_mutex_unlock (&priv->req_lock); - - pthread_cond_signal (&priv->req_cond); -} - -#pragma generate - -char * -fdl_open_term_log (xlator_t *this, log_obj_t *obj, int term) -{ - fdl_private_t *priv = this->private; - int ret; - char * ptr = NULL; - - /* - * Use .jnl instead of .log so that we don't get test info (mistakenly) - * appended to our journal files. - */ - if (this->ctx->cmd_args.log_ident) { - ret = gf_asprintf (&obj->path, "%s/%s-%s-%d.jnl", - priv->log_dir, this->ctx->cmd_args.log_ident, - obj->type, term); - } - else { - ret = gf_asprintf (&obj->path, "%s/fubar-%s-%d.jnl", - priv->log_dir, obj->type, term); - } - if ((ret <= 0) || !obj->path) { - gf_log (this->name, GF_LOG_ERROR, - "failed to construct log-file path"); - goto err; - } - - gf_log (this->name, GF_LOG_INFO, "opening %s (size %ld)", - obj->path, obj->size); - - obj->fd = open (obj->path, O_RDWR|O_CREAT|O_TRUNC, 0666); - if (obj->fd < 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to open log file (%s)", strerror(errno)); - goto err; - } - -#if !defined(GF_BSD_HOST_OS) - /* - * NetBSD can just go die in a fire. Even though it claims to support - * fallocate/posix_fallocate they don't actually *do* anything so the - * file size remains zero. Then mmap succeeds anyway, but any access - * to the mmap'ed region will segfault. It would be acceptable for - * fallocate to do what it says, for mmap to fail, or for access to - * extend the file. NetBSD managed to hit the trifecta of Getting - * Everything Wrong, and debugging in that environment to get this far - * has already been painful enough (systems I worked on in 1990 were - * better that way). We'll fall through to the lseek/write method, and - * performance will be worse, and TOO BAD. - */ - if (sys_fallocate(obj->fd,0,0,obj->size) < 0) -#endif - { - gf_log (this->name, GF_LOG_WARNING, - "failed to fallocate space for log file"); - /* Have to do this the ugly page-faulty way. */ - (void) sys_lseek (obj->fd, obj->size-1, SEEK_SET); - (void) sys_write (obj->fd, "", 1); - } - - ptr = mmap (NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0); - if (ptr == MAP_FAILED) { - gf_log (this->name, GF_LOG_ERROR, "failed to mmap log (%s)", - strerror(errno)); - goto err; - } - - obj->ptr = ptr; - obj->max_offset = 0; - return ptr; - -err: - if (obj->fd >= 0) { - sys_close (obj->fd); - obj->fd = (-1); - } - if (obj->path) { - GF_FREE (obj->path); - obj->path = NULL; - } - return ptr; -} - -void -fdl_close_term_log (xlator_t *this, log_obj_t *obj) -{ - fdl_private_t *priv = this->private; - - if (obj->ptr) { - (void) munmap (obj->ptr, obj->size); - obj->ptr = NULL; - } - - if (obj->fd >= 0) { - gf_log (this->name, GF_LOG_INFO, - "truncating term %d %s journal to %ld", - priv->term, obj->type, obj->max_offset); - if (sys_ftruncate(obj->fd,obj->max_offset) < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to truncate journal (%s)", - strerror(errno)); - } - sys_close (obj->fd); - obj->fd = (-1); - } - - if (obj->path) { - GF_FREE (obj->path); - obj->path = NULL; - } -} - -gf_boolean_t -fdl_change_term (xlator_t *this, char **meta_ptr, char **data_ptr) -{ - fdl_private_t *priv = this->private; - - fdl_close_term_log (this, &priv->meta_log); - fdl_close_term_log (this, &priv->data_log); - - ++(priv->term); - - *meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); - if (!*meta_ptr) { - return _gf_false; - } - - *data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); - if (!*data_ptr) { - return _gf_false; - } - - return _gf_true; -} - -void * -fdl_worker (void *arg) -{ - xlator_t *this = arg; - fdl_private_t *priv = this->private; - call_stub_t *stub; - char * meta_ptr = NULL; - off_t *meta_offset = &priv->meta_log.max_offset; - char * data_ptr = NULL; - off_t *data_offset = &priv->data_log.max_offset; - unsigned long base_as_ul; - void * msync_ptr; - size_t msync_len; - gf_boolean_t recycle; - void *err_label = &&err_unlocked; - - priv->meta_log.type = "meta"; - priv->meta_log.size = META_FILE_SIZE; - priv->meta_log.path = NULL; - priv->meta_log.fd = (-1); - priv->meta_log.ptr = NULL; - - priv->data_log.type = "data"; - priv->data_log.size = DATA_FILE_SIZE; - priv->data_log.path = NULL; - priv->data_log.fd = (-1); - priv->data_log.ptr = NULL; - - /* TBD: initial term should come from persistent storage (e.g. etcd) */ - priv->first_term = ++(priv->term); - meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); - if (!meta_ptr) { - goto *err_label; - } - data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); - if (!data_ptr) { - fdl_close_term_log (this, &priv->meta_log); - goto *err_label; - } - - for (;;) { - pthread_mutex_lock (&priv->req_lock); - err_label = &&err_locked; - while (list_empty(&priv->reqs)) { - pthread_cond_wait (&priv->req_cond, &priv->req_lock); - if (priv->should_stop) { - goto *err_label; - } - if (priv->change_term) { - if (!fdl_change_term(this, &meta_ptr, - &data_ptr)) { - goto *err_label; - } - priv->change_term = _gf_false; - continue; - } - } - stub = list_entry (priv->reqs.next, call_stub_t, list); - list_del_init (&stub->list); - pthread_mutex_unlock (&priv->req_lock); - err_label = &&err_unlocked; - /* - * TBD: batch requests - * - * What we should do here is gather up *all* of the requests - * that have accumulated since we were last at this point, - * blast them all out in one big writev, and then dispatch them - * all before coming back for more. That maximizes throughput, - * at some cost to latency (due to queuing effects at the log - * stage). Note that we're likely to be above io-threads, so - * the dispatch itself will be parallelized (at further cost to - * latency). For now, we just do the simplest thing and handle - * one request all the way through before fetching the next. - * - * So, why mmap/msync instead of writev/fdatasync? Because it's - * faster. Much faster. So much faster that I half-suspect - * cheating, but it's more convenient for now than having to - * ensure that everything's page-aligned for O_DIRECT (the only - * alternative that still might avoid ridiculous levels of - * local-FS overhead). - * - * TBD: check that msync really does get our data to disk. - */ - gf_log (this->name, GF_LOG_DEBUG, - "logging %u+%u bytes for op %d", - stub->jnl_meta_len, stub->jnl_data_len, stub->fop); - recycle = _gf_false; - if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) { - recycle = _gf_true; - } - if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) { - recycle = _gf_true; - } - if (recycle && !fdl_change_term(this,&meta_ptr,&data_ptr)) { - goto *err_label; - } - meta_ptr = priv->meta_log.ptr; - data_ptr = priv->data_log.ptr; - gf_log (this->name, GF_LOG_DEBUG, "serializing to %p/%p", - meta_ptr + *meta_offset, data_ptr + *data_offset); - stub->serialize (stub, meta_ptr + *meta_offset, - data_ptr + *data_offset); - if (stub->jnl_meta_len > 0) { - base_as_ul = (unsigned long) (meta_ptr + *meta_offset); - msync_ptr = (void *) (base_as_ul & ~0x0fff); - msync_len = (size_t) (base_as_ul & 0x0fff); - if (msync (msync_ptr, msync_len+stub->jnl_meta_len, - MS_SYNC) < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to log request meta (%s)", - strerror(errno)); - } - *meta_offset += stub->jnl_meta_len; - } - if (stub->jnl_data_len > 0) { - base_as_ul = (unsigned long) (data_ptr + *data_offset); - msync_ptr = (void *) (base_as_ul & ~0x0fff); - msync_len = (size_t) (base_as_ul & 0x0fff); - if (msync (msync_ptr, msync_len+stub->jnl_data_len, - MS_SYNC) < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to log request data (%s)", - strerror(errno)); - } - *data_offset += stub->jnl_data_len; - } - call_resume (stub); - } - -err_locked: - pthread_mutex_unlock (&priv->req_lock); -err_unlocked: - fdl_close_term_log (this, &priv->meta_log); - fdl_close_term_log (this, &priv->data_log); - return NULL; -} - -int32_t -fdl_ipc_continue (call_frame_t *frame, xlator_t *this, - int32_t op, dict_t *xdata) -{ - /* - * Nothing to be done here. Just Unwind. * - */ - STACK_UNWIND_STRICT (ipc, frame, 0, 0, xdata); - - return 0; -} - -int32_t -fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) -{ - call_stub_t *stub; - fdl_private_t *priv = this->private; - dict_t *tdict; - int32_t gt_err = EIO; - - switch (op) { - - case FDL_IPC_CHANGE_TERM: - gf_log (this->name, GF_LOG_INFO, "got CHANGE_TERM op"); - priv->change_term = _gf_true; - pthread_cond_signal (&priv->req_cond); - STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL); - break; - - case FDL_IPC_GET_TERMS: - gf_log (this->name, GF_LOG_INFO, "got GET_TERMS op"); - tdict = dict_new (); - if (!tdict) { - gt_err = ENOMEM; - goto gt_done; - } - if (dict_set_int32(tdict,"first",priv->first_term) != 0) { - goto gt_done; - } - if (dict_set_int32(tdict,"last",priv->term) != 0) { - goto gt_done; - } - gt_err = 0; - gt_done: - if (gt_err) { - STACK_UNWIND_STRICT (ipc, frame, -1, gt_err, NULL); - } else { - STACK_UNWIND_STRICT (ipc, frame, 0, 0, tdict); - } - if (tdict) { - dict_unref (tdict); - } - break; - - case FDL_IPC_JBR_SERVER_ROLLBACK: - /* - * In case of a rollback from jbr-server, dump * - * the term and index number in the journal, * - * which will later be used to rollback the fop * - */ - stub = fop_ipc_stub (frame, fdl_ipc_continue, - op, xdata); - fdl_len_ipc (stub); - stub->serialize = fdl_serialize_ipc; - fdl_enqueue (this, stub); - - break; - - default: - STACK_WIND_TAIL (frame, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ipc, - op, xdata); - } - - return 0; -} - -int -fdl_init (xlator_t *this) -{ - fdl_private_t *priv = NULL; - - priv = GF_CALLOC (1, sizeof (*priv), gf_fdl_mt_fdl_private_t); - if (!priv) { - gf_log (this->name, GF_LOG_ERROR, - "failed to allocate fdl_private"); - goto err; - } - - INIT_LIST_HEAD (&priv->reqs); - if (pthread_mutex_init (&priv->req_lock, NULL) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to initialize req_lock"); - goto err; - } - if (pthread_cond_init (&priv->req_cond, NULL) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to initialize req_cond"); - goto err; - } - - GF_OPTION_INIT ("log-path", priv->log_dir, path, err); - - this->private = priv; - /* - * The rest of the fop table is automatically generated, so this is a - * bit cleaner than messing with the generation to add a hand-written - * exception. - */ - - if (gf_thread_create (&priv->worker, NULL, fdl_worker, this, - "fdlwrker") != 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to start fdl_worker"); - goto err; - } - - return 0; - -err: - if (priv) { - GF_FREE(priv); - } - return -1; -} - -void -fdl_fini (xlator_t *this) -{ - fdl_private_t *priv = this->private; - - if (priv) { - priv->should_stop = _gf_true; - pthread_cond_signal (&priv->req_cond); - pthread_join (priv->worker, NULL); - GF_FREE(priv); - } -} - -int -fdl_reconfigure (xlator_t *this, dict_t *options) -{ - fdl_private_t *priv = this->private; - - GF_OPTION_RECONF ("log_dir", priv->log_dir, options, path, out); - /* TBD: react if it changed */ - -out: - return 0; -} - -int32_t -mem_acct_init (xlator_t *this) -{ - int ret = -1; - - GF_VALIDATE_OR_GOTO ("fdl", this, out); - - ret = xlator_mem_acct_init (this, gf_fdl_mt_end + 1); - - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - return ret; - } -out: - return ret; -} - -class_methods_t class_methods = { - .init = fdl_init, - .fini = fdl_fini, - .reconfigure = fdl_reconfigure, - .notify = default_notify, -}; - -struct volume_options options[] = { - { .key = {"log-path"}, - .type = GF_OPTION_TYPE_PATH, - .default_value = DEFAULT_LOG_FILE_DIRECTORY, - .description = "Directory for FDL files." - }, - { .key = {NULL} }, -}; - -struct xlator_cbks cbks = { - .release = default_release, - .releasedir = default_releasedir, - .forget = default_forget, -}; diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c.in b/xlators/experimental/fdl/src/fdl-tmpl.c.in new file mode 100644 index 00000000000..7388b83e0bc --- /dev/null +++ b/xlators/experimental/fdl/src/fdl-tmpl.c.in @@ -0,0 +1,512 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <fcntl.h> +#include <unistd.h> +#include <sys/mman.h> +#include "call-stub.h" +#include "iatt.h" +#include "defaults.h" +#include "syscall.h" +#include "xlator.h" +#include "fdl.h" + +/* TBD: make tunable */ +#define META_FILE_SIZE (1 << 20) +#define DATA_FILE_SIZE (1 << 24) + +enum gf_fdl { gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, gf_fdl_mt_end }; + +typedef struct { + char *type; + off_t size; + char *path; + int fd; + void *ptr; + off_t max_offset; +} log_obj_t; + +typedef struct { + struct list_head reqs; + pthread_mutex_t req_lock; + pthread_cond_t req_cond; + char *log_dir; + pthread_t worker; + gf_boolean_t should_stop; + gf_boolean_t change_term; + log_obj_t meta_log; + log_obj_t data_log; + int term; + int first_term; +} fdl_private_t; + +int32_t +fdl_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata); + +void +fdl_enqueue(xlator_t *this, call_stub_t *stub) +{ + fdl_private_t *priv = this->private; + + pthread_mutex_lock(&priv->req_lock); + list_add_tail(&stub->list, &priv->reqs); + pthread_mutex_unlock(&priv->req_lock); + + pthread_cond_signal(&priv->req_cond); +} + +#pragma generate + +char * +fdl_open_term_log(xlator_t *this, log_obj_t *obj, int term) +{ + fdl_private_t *priv = this->private; + int ret; + char *ptr = NULL; + + /* + * Use .jnl instead of .log so that we don't get test info (mistakenly) + * appended to our journal files. + */ + if (this->ctx->cmd_args.log_ident) { + ret = gf_asprintf(&obj->path, "%s/%s-%s-%d.jnl", priv->log_dir, + this->ctx->cmd_args.log_ident, obj->type, term); + } else { + ret = gf_asprintf(&obj->path, "%s/fubar-%s-%d.jnl", priv->log_dir, + obj->type, term); + } + if ((ret <= 0) || !obj->path) { + gf_log(this->name, GF_LOG_ERROR, "failed to construct log-file path"); + goto err; + } + + gf_log(this->name, GF_LOG_INFO, "opening %s (size %ld)", obj->path, + obj->size); + + obj->fd = open(obj->path, O_RDWR | O_CREAT | O_TRUNC, 0666); + if (obj->fd < 0) { + gf_log(this->name, GF_LOG_ERROR, "failed to open log file (%s)", + strerror(errno)); + goto err; + } + +#if !defined(GF_BSD_HOST_OS) + /* + * NetBSD can just go die in a fire. Even though it claims to support + * fallocate/posix_fallocate they don't actually *do* anything so the + * file size remains zero. Then mmap succeeds anyway, but any access + * to the mmap'ed region will segfault. It would be acceptable for + * fallocate to do what it says, for mmap to fail, or for access to + * extend the file. NetBSD managed to hit the trifecta of Getting + * Everything Wrong, and debugging in that environment to get this far + * has already been painful enough (systems I worked on in 1990 were + * better that way). We'll fall through to the lseek/write method, and + * performance will be worse, and TOO BAD. + */ + if (sys_fallocate(obj->fd, 0, 0, obj->size) < 0) +#endif + { + gf_log(this->name, GF_LOG_WARNING, + "failed to fallocate space for log file"); + /* Have to do this the ugly page-faulty way. */ + (void)sys_lseek(obj->fd, obj->size - 1, SEEK_SET); + (void)sys_write(obj->fd, "", 1); + } + + ptr = mmap(NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0); + if (ptr == MAP_FAILED) { + gf_log(this->name, GF_LOG_ERROR, "failed to mmap log (%s)", + strerror(errno)); + goto err; + } + + obj->ptr = ptr; + obj->max_offset = 0; + return ptr; + +err: + if (obj->fd >= 0) { + sys_close(obj->fd); + obj->fd = (-1); + } + if (obj->path) { + GF_FREE(obj->path); + obj->path = NULL; + } + return ptr; +} + +void +fdl_close_term_log(xlator_t *this, log_obj_t *obj) +{ + fdl_private_t *priv = this->private; + + if (obj->ptr) { + (void)munmap(obj->ptr, obj->size); + obj->ptr = NULL; + } + + if (obj->fd >= 0) { + gf_log(this->name, GF_LOG_INFO, "truncating term %d %s journal to %ld", + priv->term, obj->type, obj->max_offset); + if (sys_ftruncate(obj->fd, obj->max_offset) < 0) { + gf_log(this->name, GF_LOG_WARNING, + "failed to truncate journal (%s)", strerror(errno)); + } + sys_close(obj->fd); + obj->fd = (-1); + } + + if (obj->path) { + GF_FREE(obj->path); + obj->path = NULL; + } +} + +gf_boolean_t +fdl_change_term(xlator_t *this, char **meta_ptr, char **data_ptr) +{ + fdl_private_t *priv = this->private; + + fdl_close_term_log(this, &priv->meta_log); + fdl_close_term_log(this, &priv->data_log); + + ++(priv->term); + + *meta_ptr = fdl_open_term_log(this, &priv->meta_log, priv->term); + if (!*meta_ptr) { + return _gf_false; + } + + *data_ptr = fdl_open_term_log(this, &priv->data_log, priv->term); + if (!*data_ptr) { + return _gf_false; + } + + return _gf_true; +} + +void * +fdl_worker(void *arg) +{ + xlator_t *this = arg; + fdl_private_t *priv = this->private; + call_stub_t *stub; + char *meta_ptr = NULL; + off_t *meta_offset = &priv->meta_log.max_offset; + char *data_ptr = NULL; + off_t *data_offset = &priv->data_log.max_offset; + unsigned long base_as_ul; + void *msync_ptr; + size_t msync_len; + gf_boolean_t recycle; + void *err_label = &&err_unlocked; + + priv->meta_log.type = "meta"; + priv->meta_log.size = META_FILE_SIZE; + priv->meta_log.path = NULL; + priv->meta_log.fd = (-1); + priv->meta_log.ptr = NULL; + + priv->data_log.type = "data"; + priv->data_log.size = DATA_FILE_SIZE; + priv->data_log.path = NULL; + priv->data_log.fd = (-1); + priv->data_log.ptr = NULL; + + /* TBD: initial term should come from persistent storage (e.g. etcd) */ + priv->first_term = ++(priv->term); + meta_ptr = fdl_open_term_log(this, &priv->meta_log, priv->term); + if (!meta_ptr) { + goto *err_label; + } + data_ptr = fdl_open_term_log(this, &priv->data_log, priv->term); + if (!data_ptr) { + fdl_close_term_log(this, &priv->meta_log); + goto *err_label; + } + + for (;;) { + pthread_mutex_lock(&priv->req_lock); + err_label = &&err_locked; + while (list_empty(&priv->reqs)) { + pthread_cond_wait(&priv->req_cond, &priv->req_lock); + if (priv->should_stop) { + goto *err_label; + } + if (priv->change_term) { + if (!fdl_change_term(this, &meta_ptr, &data_ptr)) { + goto *err_label; + } + priv->change_term = _gf_false; + continue; + } + } + stub = list_entry(priv->reqs.next, call_stub_t, list); + list_del_init(&stub->list); + pthread_mutex_unlock(&priv->req_lock); + err_label = &&err_unlocked; + /* + * TBD: batch requests + * + * What we should do here is gather up *all* of the requests + * that have accumulated since we were last at this point, + * blast them all out in one big writev, and then dispatch them + * all before coming back for more. That maximizes throughput, + * at some cost to latency (due to queuing effects at the log + * stage). Note that we're likely to be above io-threads, so + * the dispatch itself will be parallelized (at further cost to + * latency). For now, we just do the simplest thing and handle + * one request all the way through before fetching the next. + * + * So, why mmap/msync instead of writev/fdatasync? Because it's + * faster. Much faster. So much faster that I half-suspect + * cheating, but it's more convenient for now than having to + * ensure that everything's page-aligned for O_DIRECT (the only + * alternative that still might avoid ridiculous levels of + * local-FS overhead). + * + * TBD: check that msync really does get our data to disk. + */ + gf_log(this->name, GF_LOG_DEBUG, "logging %u+%u bytes for op %d", + stub->jnl_meta_len, stub->jnl_data_len, stub->fop); + recycle = _gf_false; + if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) { + recycle = _gf_true; + } + if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) { + recycle = _gf_true; + } + if (recycle && !fdl_change_term(this, &meta_ptr, &data_ptr)) { + goto *err_label; + } + meta_ptr = priv->meta_log.ptr; + data_ptr = priv->data_log.ptr; + gf_log(this->name, GF_LOG_DEBUG, "serializing to %p/%p", + meta_ptr + *meta_offset, data_ptr + *data_offset); + stub->serialize(stub, meta_ptr + *meta_offset, data_ptr + *data_offset); + if (stub->jnl_meta_len > 0) { + base_as_ul = (unsigned long)(meta_ptr + *meta_offset); + msync_ptr = (void *)(base_as_ul & ~0x0fff); + msync_len = (size_t)(base_as_ul & 0x0fff); + if (msync(msync_ptr, msync_len + stub->jnl_meta_len, MS_SYNC) < 0) { + gf_log(this->name, GF_LOG_WARNING, + "failed to log request meta (%s)", strerror(errno)); + } + *meta_offset += stub->jnl_meta_len; + } + if (stub->jnl_data_len > 0) { + base_as_ul = (unsigned long)(data_ptr + *data_offset); + msync_ptr = (void *)(base_as_ul & ~0x0fff); + msync_len = (size_t)(base_as_ul & 0x0fff); + if (msync(msync_ptr, msync_len + stub->jnl_data_len, MS_SYNC) < 0) { + gf_log(this->name, GF_LOG_WARNING, + "failed to log request data (%s)", strerror(errno)); + } + *data_offset += stub->jnl_data_len; + } + call_resume(stub); + } + +err_locked: + pthread_mutex_unlock(&priv->req_lock); +err_unlocked: + fdl_close_term_log(this, &priv->meta_log); + fdl_close_term_log(this, &priv->data_log); + return NULL; +} + +int32_t +fdl_ipc_continue(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +{ + /* + * Nothing to be done here. Just Unwind. * + */ + STACK_UNWIND_STRICT(ipc, frame, 0, 0, xdata); + + return 0; +} + +int32_t +fdl_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +{ + call_stub_t *stub; + fdl_private_t *priv = this->private; + dict_t *tdict; + int32_t gt_err = EIO; + + switch (op) { + case FDL_IPC_CHANGE_TERM: + gf_log(this->name, GF_LOG_INFO, "got CHANGE_TERM op"); + priv->change_term = _gf_true; + pthread_cond_signal(&priv->req_cond); + STACK_UNWIND_STRICT(ipc, frame, 0, 0, NULL); + break; + + case FDL_IPC_GET_TERMS: + gf_log(this->name, GF_LOG_INFO, "got GET_TERMS op"); + tdict = dict_new(); + if (!tdict) { + gt_err = ENOMEM; + goto gt_done; + } + if (dict_set_int32(tdict, "first", priv->first_term) != 0) { + goto gt_done; + } + if (dict_set_int32(tdict, "last", priv->term) != 0) { + goto gt_done; + } + gt_err = 0; + gt_done: + if (gt_err) { + STACK_UNWIND_STRICT(ipc, frame, -1, gt_err, NULL); + } else { + STACK_UNWIND_STRICT(ipc, frame, 0, 0, tdict); + } + if (tdict) { + dict_unref(tdict); + } + break; + + case FDL_IPC_JBR_SERVER_ROLLBACK: + /* + * In case of a rollback from jbr-server, dump * + * the term and index number in the journal, * + * which will later be used to rollback the fop * + */ + stub = fop_ipc_stub(frame, fdl_ipc_continue, op, xdata); + fdl_len_ipc(stub); + stub->serialize = fdl_serialize_ipc; + fdl_enqueue(this, stub); + + break; + + default: + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, op, xdata); + } + + return 0; +} + +int +fdl_init(xlator_t *this) +{ + fdl_private_t *priv = NULL; + + priv = GF_CALLOC(1, sizeof(*priv), gf_fdl_mt_fdl_private_t); + if (!priv) { + gf_log(this->name, GF_LOG_ERROR, "failed to allocate fdl_private"); + goto err; + } + + INIT_LIST_HEAD(&priv->reqs); + if (pthread_mutex_init(&priv->req_lock, NULL) != 0) { + gf_log(this->name, GF_LOG_ERROR, "failed to initialize req_lock"); + goto err; + } + if (pthread_cond_init(&priv->req_cond, NULL) != 0) { + gf_log(this->name, GF_LOG_ERROR, "failed to initialize req_cond"); + goto err; + } + + GF_OPTION_INIT("log-path", priv->log_dir, path, err); + + this->private = priv; + /* + * The rest of the fop table is automatically generated, so this is a + * bit cleaner than messing with the generation to add a hand-written + * exception. + */ + + if (gf_thread_create(&priv->worker, NULL, fdl_worker, this, "fdlwrker") != + 0) { + gf_log(this->name, GF_LOG_ERROR, "failed to start fdl_worker"); + goto err; + } + + return 0; + +err: + if (priv) { + GF_FREE(priv); + } + return -1; +} + +void +fdl_fini(xlator_t *this) +{ + fdl_private_t *priv = this->private; + + if (priv) { + priv->should_stop = _gf_true; + pthread_cond_signal(&priv->req_cond); + pthread_join(priv->worker, NULL); + GF_FREE(priv); + } +} + +int +fdl_reconfigure(xlator_t *this, dict_t *options) +{ + fdl_private_t *priv = this->private; + + GF_OPTION_RECONF("log_dir", priv->log_dir, options, path, out); + /* TBD: react if it changed */ + +out: + return 0; +} + +int32_t +mem_acct_init(xlator_t *this) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO("fdl", this, out); + + ret = xlator_mem_acct_init(this, gf_fdl_mt_end + 1); + + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "Memory accounting init" + "failed"); + return ret; + } +out: + return ret; +} + +class_methods_t class_methods = { + .init = fdl_init, + .fini = fdl_fini, + .reconfigure = fdl_reconfigure, + .notify = default_notify, +}; + +struct volume_options options[] = { + {.key = {"log-path"}, + .type = GF_OPTION_TYPE_PATH, + .default_value = DEFAULT_LOG_FILE_DIRECTORY, + .description = "Directory for FDL files."}, + {.key = {NULL}}, +}; + +struct xlator_cbks cbks = { + .release = default_release, + .releasedir = default_releasedir, + .forget = default_forget, +}; diff --git a/xlators/experimental/fdl/src/recon-tmpl.c b/xlators/experimental/fdl/src/recon-tmpl.c deleted file mode 100644 index 228860401ae..00000000000 --- a/xlators/experimental/fdl/src/recon-tmpl.c +++ /dev/null @@ -1,304 +0,0 @@ -#pragma fragment PROLOG -#ifndef _CONFIG_H -#define _CONFIG_H -#include "config.h" -#endif - -#include "glusterfs.h" -#include "iatt.h" -#include "syncop.h" -#include "xlator.h" -#include "glfs-internal.h" - -#include "fdl.h" - -#define GFAPI_SUCCESS 0 - -inode_t * -recon_get_inode (glfs_t *fs, uuid_t gfid) -{ - inode_t *inode; - loc_t loc = {NULL,}; - struct iatt iatt; - int ret; - inode_t *newinode; - - inode = inode_find (fs->active_subvol->itable, gfid); - if (inode) { - printf ("=== FOUND %s IN TABLE\n", uuid_utoa(gfid)); - return inode; - } - - loc.inode = inode_new (fs->active_subvol->itable); - if (!loc.inode) { - return NULL; - } - gf_uuid_copy (loc.inode->gfid, gfid); - gf_uuid_copy (loc.gfid, gfid); - - printf ("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid)); - - ret = syncop_lookup (fs->active_subvol, &loc, &iatt, - NULL, NULL, NULL); - if (ret != GFAPI_SUCCESS) { - fprintf (stderr, "syncop_lookup failed (%d)\n", ret); - return NULL; - } - - newinode = inode_link (loc.inode, NULL, NULL, &iatt); - if (newinode) { - inode_lookup (newinode); - } - - return newinode; -} - -#pragma fragment DICT - dict_t *@ARGNAME@; - - @ARGNAME@ = dict_new(); - if (!@ARGNAME@) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - - { - int key_len, data_len; - char *key_ptr; - int garbage; - for (;;) { - key_len = *((int *)new_meta); - new_meta += sizeof(int); - if (!key_len) { - break; - } - key_ptr = new_meta; - new_meta += key_len; - data_len = *((int *)new_meta); - new_meta += sizeof(int); - garbage = dict_set_static_bin (@ARGNAME@, key_ptr, - new_meta, data_len); - /* TBD: check error from dict_set_static_bin */ - (void)garbage; - new_meta += data_len; - } - } - -#pragma fragment DICT_CLEANUP -cleanup_@ARGNAME@: - dict_unref (@ARGNAME@); - -#pragma fragment DOUBLE - @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); - new_meta += sizeof(uint64_t); - -#pragma fragment FD - inode_t *@ARGNAME@_ino; - fd_t *@ARGNAME@; - - @ARGNAME@_ino = recon_get_inode (fs, *((uuid_t *)new_meta)); - new_meta += 16; - if (!@ARGNAME@_ino) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@_ino; - - @ARGNAME@ = fd_anonymous (@ARGNAME@_ino); - if (!@ARGNAME@) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - -#pragma fragment FD_CLEANUP -cleanup_@ARGNAME@: - fd_unref (@ARGNAME@); -cleanup_@ARGNAME@_ino: - inode_unref (@ARGNAME@_ino); - -#pragma fragment NEW_FD - /* - * This pseudo-type is only used for create, and in that case we know - * we'll be using loc.inode, so it's not worth generalizing to take an - * extra argument. - */ - fd_t *@ARGNAME@ = fd_anonymous (loc.inode); - - if (!fd) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - new_meta += 16; - -#pragma fragment NEW_FD_CLEANUP -cleanup_@ARGNAME@: - fd_unref (@ARGNAME@); - -#pragma fragment INTEGER - @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); - - new_meta += sizeof(@ARGTYPE@); - -#pragma fragment LOC - loc_t @ARGNAME@ = { NULL, }; - - @ARGNAME@.inode = recon_get_inode (fs, *((uuid_t *)new_meta)); - if (!@ARGNAME@.inode) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - gf_uuid_copy (@ARGNAME@.gfid, @ARGNAME@.inode->gfid); - new_meta += 16; - new_meta += 16; /* skip over pargfid */ - if (*(new_meta++)) { - @ARGNAME@.name = new_meta; - new_meta += strlen(new_meta) + 1; - } - -#pragma fragment LOC_CLEANUP -cleanup_@ARGNAME@: - loc_wipe (&@ARGNAME@); - -#pragma fragment PARENT_LOC - loc_t @ARGNAME@ = { NULL, }; - - new_meta += 16; /* skip over gfid */ - @ARGNAME@.parent = recon_get_inode (fs, *((uuid_t *)new_meta)); - if (!@ARGNAME@.parent) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - gf_uuid_copy (@ARGNAME@.pargfid, @ARGNAME@.parent->gfid); - new_meta += 16; - if (!*(new_meta++)) { - goto *err_label; - } - @ARGNAME@.name = new_meta; - new_meta += strlen(new_meta) + 1; - - @ARGNAME@.inode = inode_new (fs->active_subvol->itable); - if (!@ARGNAME@.inode) { - goto *err_label; - } - -#pragma fragment PARENT_LOC_CLEANUP -cleanup_@ARGNAME@: - loc_wipe (&@ARGNAME@); - -#pragma fragment STRING - char *@ARGNAME@; - if (*(new_meta++)) { - @ARGNAME@ = new_meta; - new_meta += (strlen(new_meta) + 1); - } - else { - goto *err_label; - } - -#pragma fragment VECTOR - struct iovec @ARGNAME@; - - @ARGNAME@.iov_len = *((size_t *)new_meta); - new_meta += sizeof(@ARGNAME@.iov_len); - @ARGNAME@.iov_base = new_data; - new_data += @ARGNAME@.iov_len; - -#pragma fragment IATT - struct iatt @ARGNAME@; - { - @ARGNAME@.ia_prot = *((ia_prot_t *)new_meta); - new_meta += sizeof(ia_prot_t); - uint32_t *myints = (uint32_t *)new_meta; - @ARGNAME@.ia_uid = myints[0]; - @ARGNAME@.ia_gid = myints[1]; - @ARGNAME@.ia_atime = myints[2]; - @ARGNAME@.ia_atime_nsec = myints[3]; - @ARGNAME@.ia_mtime = myints[4]; - @ARGNAME@.ia_mtime_nsec = myints[5]; - new_meta += sizeof(*myints) * 6; - } - -#pragma fragment IOBREF - struct iobref *@ARGNAME@; - - @ARGNAME@ = iobref_new(); - if (!@ARGNAME@) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - -#pragma fragment IOBREF_CLEANUP -cleanup_@ARGNAME@: - iobref_unref (@ARGNAME@); - -#pragma fragment LINK - /* TBD: check error */ - inode_t *new_inode = inode_link (@INODE_ARG@, NULL, NULL, @IATT_ARG@); - if (new_inode) { - inode_lookup (new_inode); - } - -#pragma fragment FOP -int -fdl_replay_@NAME@ (glfs_t *fs, char **old_meta, char **old_data) -{ - char *new_meta = *old_meta; - char *new_data = *old_data; - int ret; - int status = 0xbad; - void *err_label = &&done; - -@FUNCTION_BODY@ - - ret = syncop_@NAME@ (fs->active_subvol, @SYNCOP_ARGS@, NULL); - if (ret != @SUCCESS_VALUE@) { - fprintf (stderr, "syncop_@NAME@ returned %d", ret); - goto *err_label; - } - -@LINKS@ - - status = 0; - -@CLEANUPS@ - -done: - *old_meta = new_meta; - *old_data = new_data; - return status; -} - -#pragma fragment CASE - case GF_FOP_@UPNAME@: - printf ("=== GF_FOP_@UPNAME@\n"); - if (fdl_replay_@NAME@ (fs, &new_meta, &new_data) != 0) { - goto done; - } - recognized = 1; - break; - -#pragma fragment EPILOG -int -recon_execute (glfs_t *fs, char **old_meta, char **old_data) -{ - char *new_meta = *old_meta; - char *new_data = *old_data; - int recognized = 0; - event_header_t *eh; - - eh = (event_header_t *)new_meta; - new_meta += sizeof (*eh); - - /* TBD: check event_type instead of assuming NEW_REQUEST */ - - switch (eh->fop_type) { -@SWITCH_BODY@ - - default: - printf ("unknown fop %u\n", eh->fop_type); - } - -done: - *old_meta = new_meta; - *old_data = new_data; - return recognized; -} diff --git a/xlators/experimental/fdl/src/recon-tmpl.c.in b/xlators/experimental/fdl/src/recon-tmpl.c.in new file mode 100644 index 00000000000..5115dfd5c75 --- /dev/null +++ b/xlators/experimental/fdl/src/recon-tmpl.c.in @@ -0,0 +1,297 @@ +#pragma fragment PROLOG +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "iatt.h" +#include "syncop.h" +#include "xlator.h" +#include "glfs-internal.h" + +#include "fdl.h" + +#define GFAPI_SUCCESS 0 + +inode_t * +recon_get_inode(glfs_t *fs, uuid_t gfid) +{ + inode_t *inode; + loc_t loc = { + NULL, + }; + struct iatt iatt; + int ret; + inode_t *newinode; + + inode = inode_find(fs->active_subvol->itable, gfid); + if (inode) { + printf("=== FOUND %s IN TABLE\n", uuid_utoa(gfid)); + return inode; + } + + loc.inode = inode_new(fs->active_subvol->itable); + if (!loc.inode) { + return NULL; + } + gf_uuid_copy(loc.inode->gfid, gfid); + gf_uuid_copy(loc.gfid, gfid); + + printf("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid)); + + ret = syncop_lookup(fs->active_subvol, &loc, &iatt, NULL, NULL, NULL); + if (ret != GFAPI_SUCCESS) { + fprintf(stderr, "syncop_lookup failed (%d)\n", ret); + return NULL; + } + + newinode = inode_link(loc.inode, NULL, NULL, &iatt); + if (newinode) { + inode_lookup(newinode); + } + + return newinode; +} + +#pragma fragment DICT +dict_t *@ARGNAME@; + +@ARGNAME@ = dict_new(); +if (!@ARGNAME@) { + goto *err_label; +} +err_label = &&cleanup_@ARGNAME@; + +{ + int key_len, data_len; + char *key_ptr; + int garbage; + for (;;) { + key_len = *((int *)new_meta); + new_meta += sizeof(int); + if (!key_len) { + break; + } + key_ptr = new_meta; + new_meta += key_len; + data_len = *((int *)new_meta); + new_meta += sizeof(int); + garbage = dict_set_static_bin(@ARGNAME@, key_ptr, new_meta, data_len); + /* TBD: check error from dict_set_static_bin */ + (void)garbage; + new_meta += data_len; + } +} + +#pragma fragment DICT_CLEANUP +cleanup_@ARGNAME@ : dict_unref(@ARGNAME@); + +#pragma fragment DOUBLE +@ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); +new_meta += sizeof(uint64_t); + +#pragma fragment FD +inode_t *@ARGNAME@_ino; +fd_t *@ARGNAME@; + +@ARGNAME@_ino = recon_get_inode(fs, *((uuid_t *)new_meta)); +new_meta += 16; +if (!@ARGNAME@_ino) { + goto *err_label; +} +err_label = &&cleanup_@ARGNAME@_ino; + +@ARGNAME@ = fd_anonymous(@ARGNAME@_ino); +if (!@ARGNAME@) { + goto *err_label; +} +err_label = &&cleanup_@ARGNAME@; + +#pragma fragment FD_CLEANUP +cleanup_@ARGNAME@ : fd_unref(@ARGNAME@); +cleanup_@ARGNAME@_ino : inode_unref(@ARGNAME@_ino); + +#pragma fragment NEW_FD +/* + * This pseudo-type is only used for create, and in that case we know + * we'll be using loc.inode, so it's not worth generalizing to take an + * extra argument. + */ +fd_t *@ARGNAME@ = fd_anonymous(loc.inode); + +if (!fd) { + goto *err_label; +} +err_label = &&cleanup_@ARGNAME@; +new_meta += 16; + +#pragma fragment NEW_FD_CLEANUP +cleanup_@ARGNAME@ : fd_unref(@ARGNAME@); + +#pragma fragment INTEGER +@ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); + +new_meta += sizeof(@ARGTYPE@); + +#pragma fragment LOC +loc_t @ARGNAME@ = { + NULL, +}; + +@ARGNAME@.inode = recon_get_inode(fs, *((uuid_t *)new_meta)); +if (!@ARGNAME@.inode) { + goto *err_label; +} +err_label = &&cleanup_@ARGNAME@; +gf_uuid_copy(@ARGNAME@.gfid, @ARGNAME@.inode->gfid); +new_meta += 16; +new_meta += 16; /* skip over pargfid */ +if (*(new_meta++)) { + @ARGNAME@.name = new_meta; + new_meta += strlen(new_meta) + 1; +} + +#pragma fragment LOC_CLEANUP +cleanup_@ARGNAME@ : loc_wipe(&@ARGNAME@); + +#pragma fragment PARENT_LOC +loc_t @ARGNAME@ = { + NULL, +}; + +new_meta += 16; /* skip over gfid */ +@ARGNAME@.parent = recon_get_inode(fs, *((uuid_t *)new_meta)); +if (!@ARGNAME@.parent) { + goto *err_label; +} +err_label = &&cleanup_@ARGNAME@; +gf_uuid_copy(@ARGNAME@.pargfid, @ARGNAME@.parent->gfid); +new_meta += 16; +if (!*(new_meta++)) { + goto *err_label; +} +@ARGNAME@.name = new_meta; +new_meta += strlen(new_meta) + 1; + +@ARGNAME@.inode = inode_new(fs->active_subvol->itable); +if (!@ARGNAME@.inode) { + goto *err_label; +} + +#pragma fragment PARENT_LOC_CLEANUP +cleanup_@ARGNAME@ : loc_wipe(&@ARGNAME@); + +#pragma fragment STRING +char *@ARGNAME@; +if (*(new_meta++)) { + @ARGNAME@ = new_meta; + new_meta += (strlen(new_meta) + 1); +} else { + goto *err_label; +} + +#pragma fragment VECTOR +struct iovec @ARGNAME@; + +@ARGNAME@.iov_len = *((size_t *)new_meta); +new_meta += sizeof(@ARGNAME@.iov_len); +@ARGNAME@.iov_base = new_data; +new_data += @ARGNAME@.iov_len; + +#pragma fragment IATT +struct iatt @ARGNAME@; +{ + @ARGNAME@.ia_prot = *((ia_prot_t *)new_meta); + new_meta += sizeof(ia_prot_t); + uint32_t *myints = (uint32_t *)new_meta; + @ARGNAME@.ia_uid = myints[0]; + @ARGNAME@.ia_gid = myints[1]; + @ARGNAME@.ia_atime = myints[2]; + @ARGNAME@.ia_atime_nsec = myints[3]; + @ARGNAME@.ia_mtime = myints[4]; + @ARGNAME@.ia_mtime_nsec = myints[5]; + new_meta += sizeof(*myints) * 6; +} + +#pragma fragment IOBREF +struct iobref *@ARGNAME@; + +@ARGNAME@ = iobref_new(); +if (!@ARGNAME@) { + goto *err_label; +} +err_label = &&cleanup_@ARGNAME@; + +#pragma fragment IOBREF_CLEANUP +cleanup_@ARGNAME@ : iobref_unref(@ARGNAME@); + +#pragma fragment LINK +/* TBD: check error */ +inode_t *new_inode = inode_link(@INODE_ARG@, NULL, NULL, @IATT_ARG@); +if (new_inode) { + inode_lookup(new_inode); +} + +#pragma fragment FOP +int fdl_replay_@NAME@(glfs_t *fs, char **old_meta, char **old_data) +{ + char *new_meta = *old_meta; + char *new_data = *old_data; + int ret; + int status = 0xbad; + void *err_label = &&done; + + @FUNCTION_BODY@ + + ret = syncop_@NAME@(fs->active_subvol, @SYNCOP_ARGS@, NULL); + if (ret !=@SUCCESS_VALUE@) { + fprintf(stderr, "syncop_@NAME@ returned %d", ret); + goto *err_label; + } + + @LINKS@ + + status = 0; + + @CLEANUPS@ + + done : *old_meta = new_meta; + *old_data = new_data; + return status; +} + +#pragma fragment CASE +case GF_FOP_@UPNAME@: + printf("=== GF_FOP_@UPNAME@\n"); + if (fdl_replay_@NAME@(fs, &new_meta, &new_data) != 0) { + goto done; + } + recognized = 1; + break; + +#pragma fragment EPILOG + int + recon_execute(glfs_t *fs, char **old_meta, char **old_data) + { + char *new_meta = *old_meta; + char *new_data = *old_data; + int recognized = 0; + event_header_t *eh; + + eh = (event_header_t *)new_meta; + new_meta += sizeof(*eh); + + /* TBD: check event_type instead of assuming NEW_REQUEST */ + + switch (eh->fop_type) { + @SWITCH_BODY@ + + default : printf("unknown fop %u\n", eh->fop_type); + } + + done: + *old_meta = new_meta; + *old_data = new_data; + return recognized; + } diff --git a/xlators/experimental/jbr-client/src/Makefile.am b/xlators/experimental/jbr-client/src/Makefile.am index 15616d13798..c71f5ff1e83 100644 --- a/xlators/experimental/jbr-client/src/Makefile.am +++ b/xlators/experimental/jbr-client/src/Makefile.am @@ -22,7 +22,7 @@ AM_CFLAGS = -Wall $(GF_CFLAGS) JBRC_PREFIX = $(top_srcdir)/xlators/experimental/jbr-client/src JBRC_GEN_FOPS = $(JBRC_PREFIX)/gen-fops.py -JBRC_TEMPLATES = $(JBRC_PREFIX)/fop-template.c +JBRC_TEMPLATES = $(JBRC_PREFIX)/fop-template.c.in JBRC_WRAPPER = $(JBRC_PREFIX)/jbrc.c noinst_PYTHON = $(JBRC_GEN_FOPS) EXTRA_DIST = $(JBRC_TEMPLATES) $(JBRC_WRAPPER) diff --git a/xlators/experimental/jbr-client/src/fop-template.c b/xlators/experimental/jbr-client/src/fop-template.c deleted file mode 100644 index 7719f511f01..00000000000 --- a/xlators/experimental/jbr-client/src/fop-template.c +++ /dev/null @@ -1,113 +0,0 @@ -/* template-name fop */ -int32_t -jbrc_@NAME@ (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - jbrc_local_t *local = NULL; - xlator_t *target_xl = ACTIVE_CHILD(this); - - local = mem_get(this->local_pool); - if (!local) { - goto err; - } - - local->stub = fop_@NAME@_stub (frame, jbrc_@NAME@_continue, - @SHORT_ARGS@); - if (!local->stub) { - goto err; - } - local->curr_xl = target_xl; - local->scars = 0; - - frame->local = local; - STACK_WIND_COOKIE (frame, jbrc_@NAME@_cbk, target_xl, - target_xl, target_xl->fops->@NAME@, - @SHORT_ARGS@); - return 0; - -err: - if (local) { - mem_put(local); - } - STACK_UNWIND_STRICT (@NAME@, frame, -1, ENOMEM, - @ERROR_ARGS@); - return 0; -} - -/* template-name cbk */ -int32_t -jbrc_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - @LONG_ARGS@) -{ - jbrc_local_t *local = frame->local; - xlator_t *last_xl = cookie; - xlator_t *next_xl; - jbrc_private_t *priv = this->private; - struct timespec spec; - - if (op_ret != (-1)) { - if (local->scars) { - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_RETRY_MSG, - HILITE("retried %p OK"), frame->local); - } - priv->active = last_xl; - goto unwind; - } - if ((op_errno != EREMOTE) && (op_errno != ENOTCONN)) { - goto unwind; - } - - /* TBD: get leader ID from xdata? */ - next_xl = next_xlator(this, last_xl); - /* - * We can't just give up after we've tried all bricks, because it's - * quite likely that a new leader election just hasn't finished yet. - * We also shouldn't retry endlessly, and especially not at a high - * rate, but that's good enough while we work on other things. - * - * TBD: implement slow/finite retry via a worker thread - */ - if (!next_xl || (local->scars >= SCAR_LIMIT)) { - gf_msg (this->name, GF_LOG_DEBUG, 0, J_MSG_RETRY_MSG, - HILITE("ran out of retries for %p"), frame->local); - goto unwind; - } - - local->curr_xl = next_xl; - local->scars += 1; - spec.tv_sec = 1; - spec.tv_nsec = 0; - /* - * WARNING - * - * Just calling gf_timer_call_after like this leaves open the - * possibility that writes will get reordered, if a first write is - * rescheduled and then a second comes along to find an updated - * priv->active before the first actually executes. We might need to - * implement a stricter (and more complicated) queuing mechanism to - * ensure absolute consistency in this case. - */ - if (gf_timer_call_after(this->ctx, spec, jbrc_retry_cb, local)) { - return 0; - } - -unwind: - call_stub_destroy(local->stub); - STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, - @SHORT_ARGS@); - return 0; -} - -/* template-name cont-func */ -int32_t -jbrc_@NAME@_continue (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - jbrc_local_t *local = frame->local; - - STACK_WIND_COOKIE (frame, jbrc_@NAME@_cbk, local->curr_xl, - local->curr_xl, local->curr_xl->fops->@NAME@, - @SHORT_ARGS@); - return 0; -} diff --git a/xlators/experimental/jbr-client/src/fop-template.c.in b/xlators/experimental/jbr-client/src/fop-template.c.in new file mode 100644 index 00000000000..9732badc794 --- /dev/null +++ b/xlators/experimental/jbr-client/src/fop-template.c.in @@ -0,0 +1,102 @@ +/* template-name fop */ +int32_t jbrc_@NAME@(call_frame_t *frame, xlator_t *this, @LONG_ARGS@) +{ + jbrc_local_t *local = NULL; + xlator_t *target_xl = ACTIVE_CHILD(this); + + local = mem_get(this->local_pool); + if (!local) { + goto err; + } + + local->stub = fop_@NAME@_stub(frame, jbrc_@NAME@_continue, @SHORT_ARGS@); + if (!local->stub) { + goto err; + } + local->curr_xl = target_xl; + local->scars = 0; + + frame->local = local; + STACK_WIND_COOKIE(frame, jbrc_@NAME@_cbk, target_xl, target_xl, + target_xl->fops->@NAME@, @SHORT_ARGS@); + return 0; + +err: + if (local) { + mem_put(local); + } + STACK_UNWIND_STRICT(@NAME@, frame, -1, ENOMEM, @ERROR_ARGS@); + return 0; +} + +/* template-name cbk */ +int32_t jbrc_@NAME@_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, @LONG_ARGS@) +{ + jbrc_local_t *local = frame->local; + xlator_t *last_xl = cookie; + xlator_t *next_xl; + jbrc_private_t *priv = this->private; + struct timespec spec; + + if (op_ret != (-1)) { + if (local->scars) { + gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_RETRY_MSG, + HILITE("retried %p OK"), frame->local); + } + priv->active = last_xl; + goto unwind; + } + if ((op_errno != EREMOTE) && (op_errno != ENOTCONN)) { + goto unwind; + } + + /* TBD: get leader ID from xdata? */ + next_xl = next_xlator(this, last_xl); + /* + * We can't just give up after we've tried all bricks, because it's + * quite likely that a new leader election just hasn't finished yet. + * We also shouldn't retry endlessly, and especially not at a high + * rate, but that's good enough while we work on other things. + * + * TBD: implement slow/finite retry via a worker thread + */ + if (!next_xl || (local->scars >= SCAR_LIMIT)) { + gf_msg(this->name, GF_LOG_DEBUG, 0, J_MSG_RETRY_MSG, + HILITE("ran out of retries for %p"), frame->local); + goto unwind; + } + + local->curr_xl = next_xl; + local->scars += 1; + spec.tv_sec = 1; + spec.tv_nsec = 0; + /* + * WARNING + * + * Just calling gf_timer_call_after like this leaves open the + * possibility that writes will get reordered, if a first write is + * rescheduled and then a second comes along to find an updated + * priv->active before the first actually executes. We might need to + * implement a stricter (and more complicated) queuing mechanism to + * ensure absolute consistency in this case. + */ + if (gf_timer_call_after(this->ctx, spec, jbrc_retry_cb, local)) { + return 0; + } + +unwind: + call_stub_destroy(local->stub); + STACK_UNWIND_STRICT(@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@); + return 0; +} + +/* template-name cont-func */ +int32_t jbrc_@NAME@_continue(call_frame_t *frame, xlator_t *this, @LONG_ARGS@) +{ + jbrc_local_t *local = frame->local; + + STACK_WIND_COOKIE(frame, jbrc_@NAME@_cbk, local->curr_xl, local->curr_xl, + local->curr_xl->fops->@NAME@, @SHORT_ARGS@); + return 0; +} diff --git a/xlators/experimental/jbr-server/src/Makefile.am b/xlators/experimental/jbr-server/src/Makefile.am index b3ceb2d9eda..42d3c8a6c36 100644 --- a/xlators/experimental/jbr-server/src/Makefile.am +++ b/xlators/experimental/jbr-server/src/Makefile.am @@ -27,7 +27,7 @@ AM_CFLAGS = -Wall $(GF_CFLAGS) JBR_PREFIX = $(top_srcdir)/xlators/experimental/jbr-server/src JBR_GEN_FOPS = $(JBR_PREFIX)/gen-fops.py -JBR_TEMPLATES = $(JBR_PREFIX)/all-templates.c +JBR_TEMPLATES = $(JBR_PREFIX)/all-templates.c.in JBR_WRAPPER = $(JBR_PREFIX)/jbr.c noinst_PYTHON = $(JBR_GEN_FOPS) EXTRA_DIST = $(JBR_TEMPLATES) $(JBR_WRAPPER) diff --git a/xlators/experimental/jbr-server/src/all-templates.c b/xlators/experimental/jbr-server/src/all-templates.c deleted file mode 100644 index 530c4187571..00000000000 --- a/xlators/experimental/jbr-server/src/all-templates.c +++ /dev/null @@ -1,542 +0,0 @@ -/* - * You can put anything here - it doesn't even have to be a comment - and it - * will be ignored until we reach the first template-name comment. - */ - - -/* template-name read-fop */ -int32_t -jbr_@NAME@ (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - jbr_private_t *priv = NULL; - gf_boolean_t in_recon = _gf_false; - int32_t op_errno = 0; - int32_t recon_term, recon_index; - - GF_VALIDATE_OR_GOTO ("jbr", this, err); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, err); - GF_VALIDATE_OR_GOTO (this->name, frame, err); - - op_errno = EREMOTE; - - /* allow reads during reconciliation * - * TBD: allow "dirty" reads on non-leaders * - */ - if (xdata && - (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) && - (dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) { - in_recon = _gf_true; - } - - if ((!priv->leader) && (in_recon == _gf_false)) { - goto err; - } - - STACK_WIND (frame, default_@NAME@_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, - @SHORT_ARGS@); - return 0; - -err: - STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno, - @ERROR_ARGS@); - return 0; -} - -/* template-name read-perform_local_op */ -/* No "perform_local_op" function needed for @NAME@ */ - -/* template-name read-dispatch */ -/* No "dispatch" function needed for @NAME@ */ - -/* template-name read-call_dispatch */ -/* No "call_dispatch" function needed for @NAME@ */ - -/* template-name read-fan-in */ -/* No "fan-in" function needed for @NAME@ */ - -/* template-name read-continue */ -/* No "continue" function needed for @NAME@ */ - -/* template-name read-complete */ -/* No "complete" function needed for @NAME@ */ - -/* template-name write-fop */ -int32_t -jbr_@NAME@ (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - int32_t ret = -1; - int op_errno = ENOMEM; - - GF_VALIDATE_OR_GOTO ("jbr", this, err); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, err); - GF_VALIDATE_OR_GOTO (this->name, frame, err); - -#if defined(JBR_CG_NEED_FD) - ret = jbr_leader_checks_and_init (frame, this, &op_errno, xdata, fd); -#else - ret = jbr_leader_checks_and_init (frame, this, &op_errno, xdata, NULL); -#endif - if (ret) - goto err; - - local = frame->local; - - /* - * If we let it through despite not being the leader, then we just want - * to pass it on down without all of the additional xattrs, queuing, and - * so on. However, jbr_*_complete does depend on the initialization - * immediately above this. - */ - if (!priv->leader) { - STACK_WIND (frame, jbr_@NAME@_complete, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, - @SHORT_ARGS@); - return 0; - } - - ret = jbr_initialize_xdata_set_attrs (this, &xdata); - if (ret) - goto err; - - local->xdata = dict_ref(xdata); - local->stub = fop_@NAME@_stub (frame, jbr_@NAME@_continue, - @SHORT_ARGS@); - if (!local->stub) { - goto err; - } - - /* - * Can be used to just call_dispatch or be customised per fop to * - * perform ops specific to that particular fop. * - */ - ret = jbr_@NAME@_perform_local_op (frame, this, &op_errno, - @SHORT_ARGS@); - if (ret) - goto err; - - return ret; -err: - if (local) { - if (local->stub) { - call_stub_destroy(local->stub); - } - if (local->qstub) { - call_stub_destroy(local->qstub); - } - if (local->fd) { - fd_unref(local->fd); - } - mem_put(local); - } - STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno, - @ERROR_ARGS@); - return 0; -} - -/* template-name write-perform_local_op */ -int32_t -jbr_@NAME@_perform_local_op (call_frame_t *frame, xlator_t *this, int *op_errno, - @LONG_ARGS@) -{ - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, op_errno, out); - - ret = jbr_@NAME@_call_dispatch (frame, this, op_errno, - @SHORT_ARGS@); - -out: - return ret; -} - -/* template-name write-call_dispatch */ -int32_t -jbr_@NAME@_call_dispatch (call_frame_t *frame, xlator_t *this, int *op_errno, - @LONG_ARGS@) -{ - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); - GF_VALIDATE_OR_GOTO (this->name, op_errno, out); - -#if defined(JBR_CG_QUEUE) - jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode); - if (!ictx) { - *op_errno = EIO; - goto out; - } - - LOCK(&ictx->lock); - if (ictx->active) { - gf_msg_debug (this->name, 0, - "queuing request due to conflict"); - /* - * TBD: enqueue only for real conflict - * - * Currently we just act like all writes are in - * conflict with one another. What we should really do - * is check the active/pending queues and defer only if - * there's a conflict there. - * - * It's important to check the pending queue because we - * might have an active request X which conflicts with - * a pending request Y, and this request Z might - * conflict with Y but not X. If we checked only the - * active queue then Z could jump ahead of Y, which - * would be incorrect. - */ - local->qstub = fop_@NAME@_stub (frame, - jbr_@NAME@_dispatch, - @SHORT_ARGS@); - if (!local->qstub) { - UNLOCK(&ictx->lock); - goto out; - } - list_add_tail(&local->qlinks, &ictx->pqueue); - ++(ictx->pending); - UNLOCK(&ictx->lock); - ret = 0; - goto out; - } else { - list_add_tail(&local->qlinks, &ictx->aqueue); - ++(ictx->active); - } - UNLOCK(&ictx->lock); -#endif - ret = jbr_@NAME@_dispatch (frame, this, @SHORT_ARGS@); - -out: - return ret; -} - -/* template-name write-dispatch */ -int32_t -jbr_@NAME@_dispatch (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - int32_t ret = -1; - xlator_list_t *trav; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); - - /* - * TBD: unblock pending request(s) if we fail after this point but - * before we get to jbr_@NAME@_complete (where that code currently - * resides). - */ - - local->call_count = priv->n_children - 1; - for (trav = this->children->next; trav; trav = trav->next) { - STACK_WIND (frame, jbr_@NAME@_fan_in, - trav->xlator, trav->xlator->fops->@NAME@, - @SHORT_ARGS@); - } - - /* TBD: variable Issue count */ - ret = 0; -out: - return ret; -} - -/* template-name write-fan-in */ -int32_t -jbr_@NAME@_fan_in (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - @LONG_ARGS@) -{ - jbr_local_t *local = NULL; - int32_t ret = -1; - uint8_t call_count; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); - - gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n", - op_ret, op_errno); - - LOCK(&frame->lock); - call_count = --(local->call_count); - if (op_ret != -1) { - /* Increment the number of successful acks * - * received for the operation. * - */ - (local->successful_acks)++; - local->successful_op_ret = op_ret; - } - gf_msg_debug (this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n", - op_ret, op_errno, local->successful_acks); - UNLOCK(&frame->lock); - - /* TBD: variable Completion count */ - if (call_count == 0) { - call_resume(local->stub); - } - - ret = 0; -out: - return ret; -} - -/* template-name write-continue */ -int32_t -jbr_@NAME@_continue (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) -{ - int32_t ret = -1; - gf_boolean_t result = _gf_false; - jbr_local_t *local = NULL; - jbr_local_t *new_local = NULL; - jbr_private_t *priv = NULL; - int32_t op_errno = 0; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - priv = this->private; - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, local, out); - - /* Perform quorum check to see if the leader needs * - * to perform the operation. If the operation will not * - * meet quorum irrespective of the leader's result * - * there is no point in the leader performing the fop * - */ - result = fop_quorum_check (this, (double)priv->n_children, - (double)local->successful_acks + 1); - if (result == _gf_false) { - gf_msg (this->name, GF_LOG_ERROR, EROFS, - J_MSG_QUORUM_NOT_MET, "Didn't receive enough acks " - "to meet quorum. Failing the operation without trying " - "it on the leader."); - -#if defined(JBR_CG_QUEUE) - /* - * In case of a fop failure, before unwinding need to * - * remove it from queue * - */ - ret = jbr_remove_from_queue (frame, this); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_GENERIC, "Failed to remove from queue."); - } -#endif - - /* - * In this case, the quorum is not met on the followers * - * So the operation will not be performed on the leader * - * and a rollback will be sent via GF_FOP_IPC to all the * - * followers, where this particular fop's term and index * - * numbers will be journaled, and later used to rollback * - */ - call_frame_t *new_frame; - - new_frame = copy_frame (frame); - - if (new_frame) { - new_local = mem_get0(this->local_pool); - if (new_local) { - INIT_LIST_HEAD(&new_local->qlinks); - ret = dict_set_int32 (local->xdata, - "rollback-fop", - GF_FOP_@UPNAME@); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, - "failed to set rollback-fop"); - } else { - new_local->xdata = dict_ref(local->xdata); - new_frame->local = new_local; - jbr_ipc_call_dispatch (new_frame, - this, &op_errno, - FDL_IPC_JBR_SERVER_ROLLBACK, - new_local->xdata); - } - } else { - gf_log (this->name, GF_LOG_WARNING, - "Could not create local for new_frame"); - } - } else { - gf_log (this->name, GF_LOG_WARNING, - "Could not send rollback ipc"); - } - - STACK_UNWIND_STRICT (@NAME@, frame, -1, EROFS, - @ERROR_ARGS@); - } else { - STACK_WIND (frame, jbr_@NAME@_complete, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, - @SHORT_ARGS@); - } - -out: - return 0; -} - -/* template-name write-complete */ -int32_t -jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - @LONG_ARGS@) -{ - int32_t ret = -1; - gf_boolean_t result = _gf_false; - jbr_private_t *priv = NULL; - jbr_local_t *local = NULL; - jbr_local_t *new_local = NULL; - - GF_VALIDATE_OR_GOTO ("jbr", this, err); - GF_VALIDATE_OR_GOTO (this->name, frame, err); - priv = this->private; - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, priv, err); - GF_VALIDATE_OR_GOTO (this->name, local, err); - - /* If the fop failed on the leader, then reduce one successful ack - * before calculating the fop quorum - */ - LOCK(&frame->lock); - if (op_ret == -1) - (local->successful_acks)--; - UNLOCK(&frame->lock); - -#if defined(JBR_CG_QUEUE) - ret = jbr_remove_from_queue (frame, this); - if (ret) - goto err; -#endif - -#if defined(JBR_CG_FSYNC) - jbr_mark_fd_dirty(this, local); -#endif - -#if defined(JBR_CG_NEED_FD) - fd_unref(local->fd); -#endif - - /* After the leader completes the fop, a quorum check is * - * performed, taking into account the outcome of the fop * - * on the leader. Irrespective of the fop being successful * - * or failing on the leader, the result of the quorum will * - * determine if the overall fop is successful or not. For * - * example, a fop might have succeeded on every node except * - * the leader, in which case as quorum is being met, the fop * - * will be treated as a successful fop, even though it failed * - * on the leader. On follower nodes, no quorum check should * - * be done, and the result is returned to the leader as is. * - */ - if (priv->leader) { - result = fop_quorum_check (this, (double)priv->n_children, - (double)local->successful_acks + 1); - if (result == _gf_false) { - op_ret = -1; - op_errno = EROFS; - gf_msg (this->name, GF_LOG_ERROR, EROFS, - J_MSG_QUORUM_NOT_MET, "Quorum is not met. " - "The operation has failed."); - /* - * In this case, the quorum is not met after the * - * operation is performed on the leader. Hence a * - * rollback will be sent via GF_FOP_IPC to the leader * - * where this particular fop's term and index numbers * - * will be journaled, and later used to rollback. * - * The same will be done on all the followers * - */ - call_frame_t *new_frame; - - new_frame = copy_frame (frame); - if (new_frame) { - new_local = mem_get0(this->local_pool); - if (new_local) { - INIT_LIST_HEAD(&new_local->qlinks); - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, "op = %d", - new_frame->op); - ret = dict_set_int32 (local->xdata, - "rollback-fop", - GF_FOP_@UPNAME@); - if (ret) { - gf_msg (this->name, - GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, - "failed to set " - "rollback-fop"); - } else { - new_local->xdata = dict_ref (local->xdata); - new_frame->local = new_local; - /* - * Calling STACK_WIND instead * - * of jbr_ipc as it will not * - * unwind to the previous * - * translators like it will * - * in case of jbr_ipc. * - */ - STACK_WIND (new_frame, - jbr_ipc_complete, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ipc, - FDL_IPC_JBR_SERVER_ROLLBACK, - new_local->xdata); - } - } else { - gf_log (this->name, GF_LOG_WARNING, - "Could not create local " - "for new_frame"); - } - } else { - gf_log (this->name, GF_LOG_WARNING, - "Could not send rollback ipc"); - } - } else { -#if defined(JBR_CG_NEED_FD) - op_ret = local->successful_op_ret; -#else - op_ret = 0; -#endif - op_errno = 0; - gf_msg_debug (this->name, 0, - "Quorum has met. The operation has succeeded."); - } - } - - /* - * Unrefing the reference taken in jbr_@NAME@ () * - */ - dict_unref (local->xdata); - - STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, - @SHORT_ARGS@); - - - return 0; - -err: - STACK_UNWIND_STRICT (@NAME@, frame, -1, 0, - @SHORT_ARGS@); - - return 0; -} diff --git a/xlators/experimental/jbr-server/src/all-templates.c.in b/xlators/experimental/jbr-server/src/all-templates.c.in new file mode 100644 index 00000000000..a9d57fc646f --- /dev/null +++ b/xlators/experimental/jbr-server/src/all-templates.c.in @@ -0,0 +1,501 @@ +/* + * You can put anything here - it doesn't even have to be a comment - and it + * will be ignored until we reach the first template-name comment. + */ + +/* template-name read-fop */ +int32_t jbr_@NAME@(call_frame_t *frame, xlator_t *this, @LONG_ARGS@) +{ + jbr_private_t *priv = NULL; + gf_boolean_t in_recon = _gf_false; + int32_t op_errno = 0; + int32_t recon_term, recon_index; + + GF_VALIDATE_OR_GOTO("jbr", this, err); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, err); + GF_VALIDATE_OR_GOTO(this->name, frame, err); + + op_errno = EREMOTE; + + /* allow reads during reconciliation * + * TBD: allow "dirty" reads on non-leaders * + */ + if (xdata && (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) && + (dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) { + in_recon = _gf_true; + } + + if ((!priv->leader) && (in_recon == _gf_false)) { + goto err; + } + + STACK_WIND(frame, default_@NAME@_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); + return 0; + +err: + STACK_UNWIND_STRICT(@NAME@, frame, -1, op_errno, @ERROR_ARGS@); + return 0; +} + +/* template-name read-perform_local_op */ +/* No "perform_local_op" function needed for @NAME@ */ + +/* template-name read-dispatch */ +/* No "dispatch" function needed for @NAME@ */ + +/* template-name read-call_dispatch */ +/* No "call_dispatch" function needed for @NAME@ */ + +/* template-name read-fan-in */ +/* No "fan-in" function needed for @NAME@ */ + +/* template-name read-continue */ +/* No "continue" function needed for @NAME@ */ + +/* template-name read-complete */ +/* No "complete" function needed for @NAME@ */ + +/* template-name write-fop */ +int32_t jbr_@NAME@(call_frame_t *frame, xlator_t *this, @LONG_ARGS@) +{ + jbr_local_t *local = NULL; + jbr_private_t *priv = NULL; + int32_t ret = -1; + int op_errno = ENOMEM; + + GF_VALIDATE_OR_GOTO("jbr", this, err); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, err); + GF_VALIDATE_OR_GOTO(this->name, frame, err); + +#if defined(JBR_CG_NEED_FD) + ret = jbr_leader_checks_and_init(frame, this, &op_errno, xdata, fd); +#else + ret = jbr_leader_checks_and_init(frame, this, &op_errno, xdata, NULL); +#endif + if (ret) + goto err; + + local = frame->local; + + /* + * If we let it through despite not being the leader, then we just want + * to pass it on down without all of the additional xattrs, queuing, and + * so on. However, jbr_*_complete does depend on the initialization + * immediately above this. + */ + if (!priv->leader) { + STACK_WIND(frame, jbr_@NAME@_complete, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); + return 0; + } + + ret = jbr_initialize_xdata_set_attrs(this, &xdata); + if (ret) + goto err; + + local->xdata = dict_ref(xdata); + local->stub = fop_@NAME@_stub(frame, jbr_@NAME@_continue, @SHORT_ARGS@); + if (!local->stub) { + goto err; + } + + /* + * Can be used to just call_dispatch or be customised per fop to * + * perform ops specific to that particular fop. * + */ + ret = jbr_@NAME@_perform_local_op(frame, this, &op_errno, @SHORT_ARGS@); + if (ret) + goto err; + + return ret; +err: + if (local) { + if (local->stub) { + call_stub_destroy(local->stub); + } + if (local->qstub) { + call_stub_destroy(local->qstub); + } + if (local->fd) { + fd_unref(local->fd); + } + mem_put(local); + } + STACK_UNWIND_STRICT(@NAME@, frame, -1, op_errno, @ERROR_ARGS@); + return 0; +} + +/* template-name write-perform_local_op */ +int32_t jbr_@NAME@_perform_local_op(call_frame_t *frame, xlator_t *this, + int *op_errno, @LONG_ARGS@) +{ + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("jbr", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + + ret = jbr_@NAME@_call_dispatch(frame, this, op_errno, @SHORT_ARGS@); + +out: + return ret; +} + +/* template-name write-call_dispatch */ +int32_t jbr_@NAME@_call_dispatch(call_frame_t *frame, xlator_t *this, + int *op_errno, @LONG_ARGS@) +{ + jbr_local_t *local = NULL; + jbr_private_t *priv = NULL; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("jbr", this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, local, out); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + +#if defined(JBR_CG_QUEUE) + jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode); + if (!ictx) { + *op_errno = EIO; + goto out; + } + + LOCK(&ictx->lock); + if (ictx->active) { + gf_msg_debug(this->name, 0, "queuing request due to conflict"); + /* + * TBD: enqueue only for real conflict + * + * Currently we just act like all writes are in + * conflict with one another. What we should really do + * is check the active/pending queues and defer only if + * there's a conflict there. + * + * It's important to check the pending queue because we + * might have an active request X which conflicts with + * a pending request Y, and this request Z might + * conflict with Y but not X. If we checked only the + * active queue then Z could jump ahead of Y, which + * would be incorrect. + */ + local->qstub = fop_@NAME@_stub(frame, jbr_@NAME@_dispatch, + @SHORT_ARGS@); + if (!local->qstub) { + UNLOCK(&ictx->lock); + goto out; + } + list_add_tail(&local->qlinks, &ictx->pqueue); + ++(ictx->pending); + UNLOCK(&ictx->lock); + ret = 0; + goto out; + } else { + list_add_tail(&local->qlinks, &ictx->aqueue); + ++(ictx->active); + } + UNLOCK(&ictx->lock); +#endif + ret = jbr_@NAME@_dispatch(frame, this, @SHORT_ARGS@); + +out: + return ret; +} + +/* template-name write-dispatch */ +int32_t jbr_@NAME@_dispatch(call_frame_t *frame, xlator_t *this, @LONG_ARGS@) +{ + jbr_local_t *local = NULL; + jbr_private_t *priv = NULL; + int32_t ret = -1; + xlator_list_t *trav; + + GF_VALIDATE_OR_GOTO("jbr", this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, local, out); + + /* + * TBD: unblock pending request(s) if we fail after this point but + * before we get to jbr_@NAME@_complete (where that code currently + * resides). + */ + + local->call_count = priv->n_children - 1; + for (trav = this->children->next; trav; trav = trav->next) { + STACK_WIND(frame, jbr_@NAME@_fan_in, trav->xlator, + trav->xlator->fops->@NAME@, @SHORT_ARGS@); + } + + /* TBD: variable Issue count */ + ret = 0; +out: + return ret; +} + +/* template-name write-fan-in */ +int32_t jbr_@NAME@_fan_in(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, @LONG_ARGS@) +{ + jbr_local_t *local = NULL; + int32_t ret = -1; + uint8_t call_count; + + GF_VALIDATE_OR_GOTO("jbr", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, local, out); + + gf_msg_trace(this->name, 0, "op_ret = %d, op_errno = %d\n", op_ret, + op_errno); + + LOCK(&frame->lock); + call_count = --(local->call_count); + if (op_ret != -1) { + /* Increment the number of successful acks * + * received for the operation. * + */ + (local->successful_acks)++; + local->successful_op_ret = op_ret; + } + gf_msg_debug(this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n", + op_ret, op_errno, local->successful_acks); + UNLOCK(&frame->lock); + + /* TBD: variable Completion count */ + if (call_count == 0) { + call_resume(local->stub); + } + + ret = 0; +out: + return ret; +} + +/* template-name write-continue */ +int32_t jbr_@NAME@_continue(call_frame_t *frame, xlator_t *this, @LONG_ARGS@) +{ + int32_t ret = -1; + gf_boolean_t result = _gf_false; + jbr_local_t *local = NULL; + jbr_local_t *new_local = NULL; + jbr_private_t *priv = NULL; + int32_t op_errno = 0; + + GF_VALIDATE_OR_GOTO("jbr", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + priv = this->private; + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + GF_VALIDATE_OR_GOTO(this->name, local, out); + + /* Perform quorum check to see if the leader needs * + * to perform the operation. If the operation will not * + * meet quorum irrespective of the leader's result * + * there is no point in the leader performing the fop * + */ + result = fop_quorum_check(this, (double)priv->n_children, + (double)local->successful_acks + 1); + if (result == _gf_false) { + gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET, + "Didn't receive enough acks " + "to meet quorum. Failing the operation without trying " + "it on the leader."); + +#if defined(JBR_CG_QUEUE) + /* + * In case of a fop failure, before unwinding need to * + * remove it from queue * + */ + ret = jbr_remove_from_queue(frame, this); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_GENERIC, + "Failed to remove from queue."); + } +#endif + + /* + * In this case, the quorum is not met on the followers * + * So the operation will not be performed on the leader * + * and a rollback will be sent via GF_FOP_IPC to all the * + * followers, where this particular fop's term and index * + * numbers will be journaled, and later used to rollback * + */ + call_frame_t *new_frame; + + new_frame = copy_frame(frame); + + if (new_frame) { + new_local = mem_get0(this->local_pool); + if (new_local) { + INIT_LIST_HEAD(&new_local->qlinks); + ret = dict_set_int32(local->xdata, "rollback-fop", + GF_FOP_@UPNAME@); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, + "failed to set rollback-fop"); + } else { + new_local->xdata = dict_ref(local->xdata); + new_frame->local = new_local; + jbr_ipc_call_dispatch(new_frame, this, &op_errno, + FDL_IPC_JBR_SERVER_ROLLBACK, + new_local->xdata); + } + } else { + gf_log(this->name, GF_LOG_WARNING, + "Could not create local for new_frame"); + } + } else { + gf_log(this->name, GF_LOG_WARNING, "Could not send rollback ipc"); + } + + STACK_UNWIND_STRICT(@NAME@, frame, -1, EROFS, @ERROR_ARGS@); + } else { + STACK_WIND(frame, jbr_@NAME@_complete, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@); + } + +out: + return 0; +} + +/* template-name write-complete */ +int32_t jbr_@NAME@_complete(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, @LONG_ARGS@) +{ + int32_t ret = -1; + gf_boolean_t result = _gf_false; + jbr_private_t *priv = NULL; + jbr_local_t *local = NULL; + jbr_local_t *new_local = NULL; + + GF_VALIDATE_OR_GOTO("jbr", this, err); + GF_VALIDATE_OR_GOTO(this->name, frame, err); + priv = this->private; + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, priv, err); + GF_VALIDATE_OR_GOTO(this->name, local, err); + + /* If the fop failed on the leader, then reduce one successful ack + * before calculating the fop quorum + */ + LOCK(&frame->lock); + if (op_ret == -1) + (local->successful_acks)--; + UNLOCK(&frame->lock); + +#if defined(JBR_CG_QUEUE) + ret = jbr_remove_from_queue(frame, this); + if (ret) + goto err; +#endif + +#if defined(JBR_CG_FSYNC) + jbr_mark_fd_dirty(this, local); +#endif + +#if defined(JBR_CG_NEED_FD) + fd_unref(local->fd); +#endif + + /* After the leader completes the fop, a quorum check is * + * performed, taking into account the outcome of the fop * + * on the leader. Irrespective of the fop being successful * + * or failing on the leader, the result of the quorum will * + * determine if the overall fop is successful or not. For * + * example, a fop might have succeeded on every node except * + * the leader, in which case as quorum is being met, the fop * + * will be treated as a successful fop, even though it failed * + * on the leader. On follower nodes, no quorum check should * + * be done, and the result is returned to the leader as is. * + */ + if (priv->leader) { + result = fop_quorum_check(this, (double)priv->n_children, + (double)local->successful_acks + 1); + if (result == _gf_false) { + op_ret = -1; + op_errno = EROFS; + gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET, + "Quorum is not met. " + "The operation has failed."); + /* + * In this case, the quorum is not met after the * + * operation is performed on the leader. Hence a * + * rollback will be sent via GF_FOP_IPC to the leader * + * where this particular fop's term and index numbers * + * will be journaled, and later used to rollback. * + * The same will be done on all the followers * + */ + call_frame_t *new_frame; + + new_frame = copy_frame(frame); + if (new_frame) { + new_local = mem_get0(this->local_pool); + if (new_local) { + INIT_LIST_HEAD(&new_local->qlinks); + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, + "op = %d", new_frame->op); + ret = dict_set_int32(local->xdata, "rollback-fop", + GF_FOP_@UPNAME@); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, + "failed to set " + "rollback-fop"); + } else { + new_local->xdata = dict_ref(local->xdata); + new_frame->local = new_local; + /* + * Calling STACK_WIND instead * + * of jbr_ipc as it will not * + * unwind to the previous * + * translators like it will * + * in case of jbr_ipc. * + */ + STACK_WIND( + new_frame, jbr_ipc_complete, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, + FDL_IPC_JBR_SERVER_ROLLBACK, new_local->xdata); + } + } else { + gf_log(this->name, GF_LOG_WARNING, + "Could not create local " + "for new_frame"); + } + } else { + gf_log(this->name, GF_LOG_WARNING, + "Could not send rollback ipc"); + } + } else { +#if defined(JBR_CG_NEED_FD) + op_ret = local->successful_op_ret; +#else + op_ret = 0; +#endif + op_errno = 0; + gf_msg_debug(this->name, 0, + "Quorum has met. The operation has succeeded."); + } + } + + /* + * Unrefing the reference taken in jbr_@NAME@ () * + */ + dict_unref(local->xdata); + + STACK_UNWIND_STRICT(@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@); + + return 0; + +err: + STACK_UNWIND_STRICT(@NAME@, frame, -1, 0, @SHORT_ARGS@); + + return 0; +} |