diff options
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/experimental/fdl/src/dump-tmpl.c | 223 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/fdl-tmpl.c | 788 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/recon-tmpl.c | 423 | ||||
-rw-r--r-- | xlators/experimental/jbr-client/src/fop-template.c | 171 | ||||
-rw-r--r-- | xlators/experimental/jbr-server/src/all-templates.c | 824 |
5 files changed, 1260 insertions, 1169 deletions
diff --git a/xlators/experimental/fdl/src/dump-tmpl.c b/xlators/experimental/fdl/src/dump-tmpl.c index d19f6589e66..32b0fef6af3 100644 --- a/xlators/experimental/fdl/src/dump-tmpl.c +++ b/xlators/experimental/fdl/src/dump-tmpl.c @@ -14,140 +14,150 @@ * Returns 0 if the string is ASCII printable * * and -1 if it's not ASCII printable * */ -int -str_isprint(char *s) +int str_isprint (char *s) { - int ret = -1; + int ret = -1; - if (!s) - goto out; + if (!s) + goto out; - while (s[0] != '\0') { - if (!isprint(s[0])) - goto out; - else - s++; - } + while (s[0] != '\0') { + if (!isprint(s[0])) + goto out; + else + s++; + } - ret = 0; + ret = 0; out: - return ret; + return ret; } #pragma fragment DICT -{ - int key_len, data_len; - char *key_ptr; - char *key_val; - printf("@ARGNAME@ = dict {\n"); - for (;;) { - key_len = *((int *)new_meta); - new_meta += sizeof(int); - if (!key_len) { - break; + { + int key_len, data_len; + char *key_ptr; + char *key_val; + printf ("@ARGNAME@ = dict {\n"); + for (;;) { + key_len = *((int *)new_meta); + new_meta += sizeof(int); + if (!key_len) { + break; + } + key_ptr = new_meta; + new_meta += key_len; + data_len = *((int *)new_meta); + key_val = new_meta + sizeof(int); + new_meta += sizeof(int) + data_len; + if (str_isprint(key_val)) + printf (" %s = <%d bytes>\n", + key_ptr, data_len); + else + printf (" %s = %s <%d bytes>\n", + key_ptr, key_val, data_len); + } + printf ("}\n"); } - key_ptr = new_meta; - new_meta += key_len; - data_len = *((int *)new_meta); - key_val = new_meta + sizeof(int); - new_meta += sizeof(int) + data_len; - if (str_isprint(key_val)) - printf(" %s = <%d bytes>\n", key_ptr, data_len); - else - printf(" %s = %s <%d bytes>\n", key_ptr, key_val, data_len); - } - printf("}\n"); -} #pragma fragment DOUBLE -printf("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta), - *((uint64_t *)new_meta)); -new_meta += sizeof(uint64_t); + printf ("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta), + *((uint64_t *)new_meta)); + new_meta += sizeof(uint64_t); #pragma fragment GFID -printf("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta))); -new_meta += 16; + printf ("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta))); + new_meta += 16; #pragma fragment INTEGER -printf("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta), - *((uint32_t *)new_meta)); -new_meta += sizeof(uint32_t); + printf ("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta), + *((uint32_t *)new_meta)); + new_meta += sizeof(uint32_t); #pragma fragment LOC -printf("@ARGNAME@ = loc {\n"); -printf(" gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); -new_meta += 16; -printf(" pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); -new_meta += 16; -if (*(new_meta++)) { - printf(" name = %s\n", new_meta); - new_meta += (strlen(new_meta) + 1); -} -printf("}\n"); + printf ("@ARGNAME@ = loc {\n"); + printf (" gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); + new_meta += 16; + printf (" pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); + new_meta += 16; + if (*(new_meta++)) { + printf (" name = %s\n", new_meta); + new_meta += (strlen(new_meta) + 1); + } + printf ("}\n"); #pragma fragment STRING -if (*(new_meta++)) { - printf("@ARGNAME@ = %s\n", new_meta); - new_meta += (strlen(new_meta) + 1); -} + if (*(new_meta++)) { + printf ("@ARGNAME@ = %s\n", new_meta); + new_meta += (strlen(new_meta) + 1); + } #pragma fragment VECTOR -{ - size_t len = *((size_t *)new_meta); - new_meta += sizeof(len); - printf("@ARGNAME@ = <%zu bytes>\n", len); - new_data += len; -} + { + size_t len = *((size_t *)new_meta); + new_meta += sizeof(len); + printf ("@ARGNAME@ = <%zu bytes>\n", len); + new_data += len; + } #pragma fragment IATT -{ - ia_prot_t *myprot = ((ia_prot_t *)new_meta); - printf("@ARGNAME@ = iatt {\n"); - printf(" ia_prot = %c%c%c", myprot->suid ? 'S' : '-', - myprot->sgid ? 'S' : '-', myprot->sticky ? 'T' : '-'); - printf("%c%c%c", myprot->owner.read ? 'r' : '-', - myprot->owner.write ? 'w' : '-', myprot->owner.exec ? 'x' : '-'); - printf("%c%c%c", myprot->group.read ? 'r' : '-', - myprot->group.write ? 'w' : '-', myprot->group.exec ? 'x' : '-'); - printf("%c%c%c\n", myprot->other.read ? 'r' : '-', - myprot->other.write ? 'w' : '-', myprot->other.exec ? 'x' : '-'); - new_meta += sizeof(ia_prot_t); - uint32_t *myints = (uint32_t *)new_meta; - printf(" ia_uid = %u\n", myints[0]); - printf(" ia_gid = %u\n", myints[1]); - printf(" ia_atime = %u.%09u\n", myints[2], myints[3]); - printf(" ia_mtime = %u.%09u\n", myints[4], myints[5]); - new_meta += sizeof(*myints) * 6; -} + { + ia_prot_t *myprot = ((ia_prot_t *)new_meta); + printf ("@ARGNAME@ = iatt {\n"); + printf (" ia_prot = %c%c%c", + myprot->suid ? 'S' : '-', + myprot->sgid ? 'S' : '-', + myprot->sticky ? 'T' : '-'); + printf ("%c%c%c", + myprot->owner.read ? 'r' : '-', + myprot->owner.write ? 'w' : '-', + myprot->owner.exec ? 'x' : '-'); + printf ("%c%c%c", + myprot->group.read ? 'r' : '-', + myprot->group.write ? 'w' : '-', + myprot->group.exec ? 'x' : '-'); + printf ("%c%c%c\n", + myprot->other.read ? 'r' : '-', + myprot->other.write ? 'w' : '-', + myprot->other.exec ? 'x' : '-'); + new_meta += sizeof(ia_prot_t); + uint32_t *myints = (uint32_t *)new_meta; + printf (" ia_uid = %u\n", myints[0]); + printf (" ia_gid = %u\n", myints[1]); + printf (" ia_atime = %u.%09u\n", myints[2], myints[3]); + printf (" ia_mtime = %u.%09u\n", myints[4], myints[5]); + new_meta += sizeof(*myints) * 6; + } #pragma fragment FOP -void fdl_dump_ @NAME @(char **old_meta, char **old_data) +void +fdl_dump_@NAME@ (char **old_meta, char **old_data) { - char *new_meta = *old_meta; - char *new_data = *old_data; + char *new_meta = *old_meta; + char *new_data = *old_data; - /* TBD: word size/endianness */ - @FUNCTION_BODY @ + /* TBD: word size/endianness */ +@FUNCTION_BODY@ *old_meta = new_meta; - *old_data = new_data; + *old_data = new_data; } #pragma fragment CASE -case GF_FOP_ @UPNAME @: - printf("=== GF_FOP_@UPNAME@\n"); - fdl_dump_ @NAME @(&new_meta, &new_data); - break; + case GF_FOP_@UPNAME@: + printf ("=== GF_FOP_@UPNAME@\n"); + fdl_dump_@NAME@ (&new_meta, &new_data); + break; #pragma fragment EPILOG - int - fdl_dump(char **old_meta, char **old_data) - { - char *new_meta = *old_meta; - char *new_data = *old_data; - static glfs_t *fs = NULL; - int recognized = 1; - event_header_t *eh; +int +fdl_dump (char **old_meta, char **old_data) +{ + char *new_meta = *old_meta; + char *new_data = *old_data; + static glfs_t *fs = NULL; + int recognized = 1; + event_header_t *eh; /* * We don't really call anything else in GFAPI, but this is the most @@ -155,22 +165,23 @@ case GF_FOP_ @UPNAME @: * or glusterfsd initialize (e.g. setting up THIS). */ if (!fs) { - fs = glfs_new("dummy"); + fs = glfs_new ("dummy"); } eh = (event_header_t *)new_meta; - new_meta += sizeof(*eh); + new_meta += sizeof (*eh); /* TBD: check event_type instead of assuming NEW_REQUEST */ switch (eh->fop_type) { - @SWITCH_BODY @ +@SWITCH_BODY@ - default : printf("unknown fop %u\n", eh->fop_type); - recognized = 0; + default: + printf ("unknown fop %u\n", eh->fop_type); + recognized = 0; } *old_meta = new_meta; *old_data = new_data; return recognized; - } +} diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c b/xlators/experimental/fdl/src/fdl-tmpl.c index 7388b83e0bc..145dad7964a 100644 --- a/xlators/experimental/fdl/src/fdl-tmpl.c +++ b/xlators/experimental/fdl/src/fdl-tmpl.c @@ -24,489 +24,513 @@ #include "fdl.h" /* TBD: make tunable */ -#define META_FILE_SIZE (1 << 20) -#define DATA_FILE_SIZE (1 << 24) +#define META_FILE_SIZE (1 << 20) +#define DATA_FILE_SIZE (1 << 24) -enum gf_fdl { gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, gf_fdl_mt_end }; +enum gf_fdl { + gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, + gf_fdl_mt_end +}; typedef struct { - char *type; - off_t size; - char *path; - int fd; - void *ptr; - off_t max_offset; + char *type; + off_t size; + char *path; + int fd; + void * ptr; + off_t max_offset; } log_obj_t; typedef struct { - struct list_head reqs; - pthread_mutex_t req_lock; - pthread_cond_t req_cond; - char *log_dir; - pthread_t worker; - gf_boolean_t should_stop; - gf_boolean_t change_term; - log_obj_t meta_log; - log_obj_t data_log; - int term; - int first_term; + struct list_head reqs; + pthread_mutex_t req_lock; + pthread_cond_t req_cond; + char *log_dir; + pthread_t worker; + gf_boolean_t should_stop; + gf_boolean_t change_term; + log_obj_t meta_log; + log_obj_t data_log; + int term; + int first_term; } fdl_private_t; int32_t -fdl_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata); +fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata); void -fdl_enqueue(xlator_t *this, call_stub_t *stub) +fdl_enqueue (xlator_t *this, call_stub_t *stub) { - fdl_private_t *priv = this->private; + fdl_private_t *priv = this->private; - pthread_mutex_lock(&priv->req_lock); - list_add_tail(&stub->list, &priv->reqs); - pthread_mutex_unlock(&priv->req_lock); + pthread_mutex_lock (&priv->req_lock); + list_add_tail (&stub->list, &priv->reqs); + pthread_mutex_unlock (&priv->req_lock); - pthread_cond_signal(&priv->req_cond); + pthread_cond_signal (&priv->req_cond); } #pragma generate char * -fdl_open_term_log(xlator_t *this, log_obj_t *obj, int term) +fdl_open_term_log (xlator_t *this, log_obj_t *obj, int term) { - fdl_private_t *priv = this->private; - int ret; - char *ptr = NULL; - - /* - * Use .jnl instead of .log so that we don't get test info (mistakenly) - * appended to our journal files. - */ - if (this->ctx->cmd_args.log_ident) { - ret = gf_asprintf(&obj->path, "%s/%s-%s-%d.jnl", priv->log_dir, - this->ctx->cmd_args.log_ident, obj->type, term); - } else { - ret = gf_asprintf(&obj->path, "%s/fubar-%s-%d.jnl", priv->log_dir, - obj->type, term); - } - if ((ret <= 0) || !obj->path) { - gf_log(this->name, GF_LOG_ERROR, "failed to construct log-file path"); - goto err; - } - - gf_log(this->name, GF_LOG_INFO, "opening %s (size %ld)", obj->path, - obj->size); - - obj->fd = open(obj->path, O_RDWR | O_CREAT | O_TRUNC, 0666); - if (obj->fd < 0) { - gf_log(this->name, GF_LOG_ERROR, "failed to open log file (%s)", - strerror(errno)); - goto err; - } + fdl_private_t *priv = this->private; + int ret; + char * ptr = NULL; + + /* + * Use .jnl instead of .log so that we don't get test info (mistakenly) + * appended to our journal files. + */ + if (this->ctx->cmd_args.log_ident) { + ret = gf_asprintf (&obj->path, "%s/%s-%s-%d.jnl", + priv->log_dir, this->ctx->cmd_args.log_ident, + obj->type, term); + } + else { + ret = gf_asprintf (&obj->path, "%s/fubar-%s-%d.jnl", + priv->log_dir, obj->type, term); + } + if ((ret <= 0) || !obj->path) { + gf_log (this->name, GF_LOG_ERROR, + "failed to construct log-file path"); + goto err; + } + + gf_log (this->name, GF_LOG_INFO, "opening %s (size %ld)", + obj->path, obj->size); + + obj->fd = open (obj->path, O_RDWR|O_CREAT|O_TRUNC, 0666); + if (obj->fd < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to open log file (%s)", strerror(errno)); + goto err; + } #if !defined(GF_BSD_HOST_OS) - /* - * NetBSD can just go die in a fire. Even though it claims to support - * fallocate/posix_fallocate they don't actually *do* anything so the - * file size remains zero. Then mmap succeeds anyway, but any access - * to the mmap'ed region will segfault. It would be acceptable for - * fallocate to do what it says, for mmap to fail, or for access to - * extend the file. NetBSD managed to hit the trifecta of Getting - * Everything Wrong, and debugging in that environment to get this far - * has already been painful enough (systems I worked on in 1990 were - * better that way). We'll fall through to the lseek/write method, and - * performance will be worse, and TOO BAD. - */ - if (sys_fallocate(obj->fd, 0, 0, obj->size) < 0) + /* + * NetBSD can just go die in a fire. Even though it claims to support + * fallocate/posix_fallocate they don't actually *do* anything so the + * file size remains zero. Then mmap succeeds anyway, but any access + * to the mmap'ed region will segfault. It would be acceptable for + * fallocate to do what it says, for mmap to fail, or for access to + * extend the file. NetBSD managed to hit the trifecta of Getting + * Everything Wrong, and debugging in that environment to get this far + * has already been painful enough (systems I worked on in 1990 were + * better that way). We'll fall through to the lseek/write method, and + * performance will be worse, and TOO BAD. + */ + if (sys_fallocate(obj->fd,0,0,obj->size) < 0) #endif - { - gf_log(this->name, GF_LOG_WARNING, - "failed to fallocate space for log file"); - /* Have to do this the ugly page-faulty way. */ - (void)sys_lseek(obj->fd, obj->size - 1, SEEK_SET); - (void)sys_write(obj->fd, "", 1); - } - - ptr = mmap(NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0); - if (ptr == MAP_FAILED) { - gf_log(this->name, GF_LOG_ERROR, "failed to mmap log (%s)", - strerror(errno)); - goto err; - } - - obj->ptr = ptr; - obj->max_offset = 0; - return ptr; + { + gf_log (this->name, GF_LOG_WARNING, + "failed to fallocate space for log file"); + /* Have to do this the ugly page-faulty way. */ + (void) sys_lseek (obj->fd, obj->size-1, SEEK_SET); + (void) sys_write (obj->fd, "", 1); + } + + ptr = mmap (NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0); + if (ptr == MAP_FAILED) { + gf_log (this->name, GF_LOG_ERROR, "failed to mmap log (%s)", + strerror(errno)); + goto err; + } + + obj->ptr = ptr; + obj->max_offset = 0; + return ptr; err: - if (obj->fd >= 0) { - sys_close(obj->fd); - obj->fd = (-1); - } - if (obj->path) { - GF_FREE(obj->path); - obj->path = NULL; - } - return ptr; + if (obj->fd >= 0) { + sys_close (obj->fd); + obj->fd = (-1); + } + if (obj->path) { + GF_FREE (obj->path); + obj->path = NULL; + } + return ptr; } void -fdl_close_term_log(xlator_t *this, log_obj_t *obj) +fdl_close_term_log (xlator_t *this, log_obj_t *obj) { - fdl_private_t *priv = this->private; - - if (obj->ptr) { - (void)munmap(obj->ptr, obj->size); - obj->ptr = NULL; - } - - if (obj->fd >= 0) { - gf_log(this->name, GF_LOG_INFO, "truncating term %d %s journal to %ld", - priv->term, obj->type, obj->max_offset); - if (sys_ftruncate(obj->fd, obj->max_offset) < 0) { - gf_log(this->name, GF_LOG_WARNING, - "failed to truncate journal (%s)", strerror(errno)); + fdl_private_t *priv = this->private; + + if (obj->ptr) { + (void) munmap (obj->ptr, obj->size); + obj->ptr = NULL; + } + + if (obj->fd >= 0) { + gf_log (this->name, GF_LOG_INFO, + "truncating term %d %s journal to %ld", + priv->term, obj->type, obj->max_offset); + if (sys_ftruncate(obj->fd,obj->max_offset) < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to truncate journal (%s)", + strerror(errno)); + } + sys_close (obj->fd); + obj->fd = (-1); + } + + if (obj->path) { + GF_FREE (obj->path); + obj->path = NULL; } - sys_close(obj->fd); - obj->fd = (-1); - } - - if (obj->path) { - GF_FREE(obj->path); - obj->path = NULL; - } } gf_boolean_t -fdl_change_term(xlator_t *this, char **meta_ptr, char **data_ptr) +fdl_change_term (xlator_t *this, char **meta_ptr, char **data_ptr) { - fdl_private_t *priv = this->private; + fdl_private_t *priv = this->private; - fdl_close_term_log(this, &priv->meta_log); - fdl_close_term_log(this, &priv->data_log); + fdl_close_term_log (this, &priv->meta_log); + fdl_close_term_log (this, &priv->data_log); - ++(priv->term); + ++(priv->term); - *meta_ptr = fdl_open_term_log(this, &priv->meta_log, priv->term); - if (!*meta_ptr) { - return _gf_false; - } + *meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); + if (!*meta_ptr) { + return _gf_false; + } - *data_ptr = fdl_open_term_log(this, &priv->data_log, priv->term); - if (!*data_ptr) { - return _gf_false; - } + *data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); + if (!*data_ptr) { + return _gf_false; + } - return _gf_true; + return _gf_true; } void * -fdl_worker(void *arg) +fdl_worker (void *arg) { - xlator_t *this = arg; - fdl_private_t *priv = this->private; - call_stub_t *stub; - char *meta_ptr = NULL; - off_t *meta_offset = &priv->meta_log.max_offset; - char *data_ptr = NULL; - off_t *data_offset = &priv->data_log.max_offset; - unsigned long base_as_ul; - void *msync_ptr; - size_t msync_len; - gf_boolean_t recycle; - void *err_label = &&err_unlocked; - - priv->meta_log.type = "meta"; - priv->meta_log.size = META_FILE_SIZE; - priv->meta_log.path = NULL; - priv->meta_log.fd = (-1); - priv->meta_log.ptr = NULL; - - priv->data_log.type = "data"; - priv->data_log.size = DATA_FILE_SIZE; - priv->data_log.path = NULL; - priv->data_log.fd = (-1); - priv->data_log.ptr = NULL; - - /* TBD: initial term should come from persistent storage (e.g. etcd) */ - priv->first_term = ++(priv->term); - meta_ptr = fdl_open_term_log(this, &priv->meta_log, priv->term); - if (!meta_ptr) { - goto *err_label; - } - data_ptr = fdl_open_term_log(this, &priv->data_log, priv->term); - if (!data_ptr) { - fdl_close_term_log(this, &priv->meta_log); - goto *err_label; - } - - for (;;) { - pthread_mutex_lock(&priv->req_lock); - err_label = &&err_locked; - while (list_empty(&priv->reqs)) { - pthread_cond_wait(&priv->req_cond, &priv->req_lock); - if (priv->should_stop) { + xlator_t *this = arg; + fdl_private_t *priv = this->private; + call_stub_t *stub; + char * meta_ptr = NULL; + off_t *meta_offset = &priv->meta_log.max_offset; + char * data_ptr = NULL; + off_t *data_offset = &priv->data_log.max_offset; + unsigned long base_as_ul; + void * msync_ptr; + size_t msync_len; + gf_boolean_t recycle; + void *err_label = &&err_unlocked; + + priv->meta_log.type = "meta"; + priv->meta_log.size = META_FILE_SIZE; + priv->meta_log.path = NULL; + priv->meta_log.fd = (-1); + priv->meta_log.ptr = NULL; + + priv->data_log.type = "data"; + priv->data_log.size = DATA_FILE_SIZE; + priv->data_log.path = NULL; + priv->data_log.fd = (-1); + priv->data_log.ptr = NULL; + + /* TBD: initial term should come from persistent storage (e.g. etcd) */ + priv->first_term = ++(priv->term); + meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); + if (!meta_ptr) { goto *err_label; - } - if (priv->change_term) { - if (!fdl_change_term(this, &meta_ptr, &data_ptr)) { - goto *err_label; - } - priv->change_term = _gf_false; - continue; - } - } - stub = list_entry(priv->reqs.next, call_stub_t, list); - list_del_init(&stub->list); - pthread_mutex_unlock(&priv->req_lock); - err_label = &&err_unlocked; - /* - * TBD: batch requests - * - * What we should do here is gather up *all* of the requests - * that have accumulated since we were last at this point, - * blast them all out in one big writev, and then dispatch them - * all before coming back for more. That maximizes throughput, - * at some cost to latency (due to queuing effects at the log - * stage). Note that we're likely to be above io-threads, so - * the dispatch itself will be parallelized (at further cost to - * latency). For now, we just do the simplest thing and handle - * one request all the way through before fetching the next. - * - * So, why mmap/msync instead of writev/fdatasync? Because it's - * faster. Much faster. So much faster that I half-suspect - * cheating, but it's more convenient for now than having to - * ensure that everything's page-aligned for O_DIRECT (the only - * alternative that still might avoid ridiculous levels of - * local-FS overhead). - * - * TBD: check that msync really does get our data to disk. - */ - gf_log(this->name, GF_LOG_DEBUG, "logging %u+%u bytes for op %d", - stub->jnl_meta_len, stub->jnl_data_len, stub->fop); - recycle = _gf_false; - if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) { - recycle = _gf_true; } - if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) { - recycle = _gf_true; - } - if (recycle && !fdl_change_term(this, &meta_ptr, &data_ptr)) { - goto *err_label; - } - meta_ptr = priv->meta_log.ptr; - data_ptr = priv->data_log.ptr; - gf_log(this->name, GF_LOG_DEBUG, "serializing to %p/%p", - meta_ptr + *meta_offset, data_ptr + *data_offset); - stub->serialize(stub, meta_ptr + *meta_offset, data_ptr + *data_offset); - if (stub->jnl_meta_len > 0) { - base_as_ul = (unsigned long)(meta_ptr + *meta_offset); - msync_ptr = (void *)(base_as_ul & ~0x0fff); - msync_len = (size_t)(base_as_ul & 0x0fff); - if (msync(msync_ptr, msync_len + stub->jnl_meta_len, MS_SYNC) < 0) { - gf_log(this->name, GF_LOG_WARNING, - "failed to log request meta (%s)", strerror(errno)); - } - *meta_offset += stub->jnl_meta_len; + data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); + if (!data_ptr) { + fdl_close_term_log (this, &priv->meta_log); + goto *err_label; } - if (stub->jnl_data_len > 0) { - base_as_ul = (unsigned long)(data_ptr + *data_offset); - msync_ptr = (void *)(base_as_ul & ~0x0fff); - msync_len = (size_t)(base_as_ul & 0x0fff); - if (msync(msync_ptr, msync_len + stub->jnl_data_len, MS_SYNC) < 0) { - gf_log(this->name, GF_LOG_WARNING, - "failed to log request data (%s)", strerror(errno)); - } - *data_offset += stub->jnl_data_len; + + for (;;) { + pthread_mutex_lock (&priv->req_lock); + err_label = &&err_locked; + while (list_empty(&priv->reqs)) { + pthread_cond_wait (&priv->req_cond, &priv->req_lock); + if (priv->should_stop) { + goto *err_label; + } + if (priv->change_term) { + if (!fdl_change_term(this, &meta_ptr, + &data_ptr)) { + goto *err_label; + } + priv->change_term = _gf_false; + continue; + } + } + stub = list_entry (priv->reqs.next, call_stub_t, list); + list_del_init (&stub->list); + pthread_mutex_unlock (&priv->req_lock); + err_label = &&err_unlocked; + /* + * TBD: batch requests + * + * What we should do here is gather up *all* of the requests + * that have accumulated since we were last at this point, + * blast them all out in one big writev, and then dispatch them + * all before coming back for more. That maximizes throughput, + * at some cost to latency (due to queuing effects at the log + * stage). Note that we're likely to be above io-threads, so + * the dispatch itself will be parallelized (at further cost to + * latency). For now, we just do the simplest thing and handle + * one request all the way through before fetching the next. + * + * So, why mmap/msync instead of writev/fdatasync? Because it's + * faster. Much faster. So much faster that I half-suspect + * cheating, but it's more convenient for now than having to + * ensure that everything's page-aligned for O_DIRECT (the only + * alternative that still might avoid ridiculous levels of + * local-FS overhead). + * + * TBD: check that msync really does get our data to disk. + */ + gf_log (this->name, GF_LOG_DEBUG, + "logging %u+%u bytes for op %d", + stub->jnl_meta_len, stub->jnl_data_len, stub->fop); + recycle = _gf_false; + if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) { + recycle = _gf_true; + } + if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) { + recycle = _gf_true; + } + if (recycle && !fdl_change_term(this,&meta_ptr,&data_ptr)) { + goto *err_label; + } + meta_ptr = priv->meta_log.ptr; + data_ptr = priv->data_log.ptr; + gf_log (this->name, GF_LOG_DEBUG, "serializing to %p/%p", + meta_ptr + *meta_offset, data_ptr + *data_offset); + stub->serialize (stub, meta_ptr + *meta_offset, + data_ptr + *data_offset); + if (stub->jnl_meta_len > 0) { + base_as_ul = (unsigned long) (meta_ptr + *meta_offset); + msync_ptr = (void *) (base_as_ul & ~0x0fff); + msync_len = (size_t) (base_as_ul & 0x0fff); + if (msync (msync_ptr, msync_len+stub->jnl_meta_len, + MS_SYNC) < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to log request meta (%s)", + strerror(errno)); + } + *meta_offset += stub->jnl_meta_len; + } + if (stub->jnl_data_len > 0) { + base_as_ul = (unsigned long) (data_ptr + *data_offset); + msync_ptr = (void *) (base_as_ul & ~0x0fff); + msync_len = (size_t) (base_as_ul & 0x0fff); + if (msync (msync_ptr, msync_len+stub->jnl_data_len, + MS_SYNC) < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to log request data (%s)", + strerror(errno)); + } + *data_offset += stub->jnl_data_len; + } + call_resume (stub); } - call_resume(stub); - } err_locked: - pthread_mutex_unlock(&priv->req_lock); + pthread_mutex_unlock (&priv->req_lock); err_unlocked: - fdl_close_term_log(this, &priv->meta_log); - fdl_close_term_log(this, &priv->data_log); - return NULL; + fdl_close_term_log (this, &priv->meta_log); + fdl_close_term_log (this, &priv->data_log); + return NULL; } int32_t -fdl_ipc_continue(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +fdl_ipc_continue (call_frame_t *frame, xlator_t *this, + int32_t op, dict_t *xdata) { - /* - * Nothing to be done here. Just Unwind. * - */ - STACK_UNWIND_STRICT(ipc, frame, 0, 0, xdata); + /* + * Nothing to be done here. Just Unwind. * + */ + STACK_UNWIND_STRICT (ipc, frame, 0, 0, xdata); - return 0; + return 0; } int32_t -fdl_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) { - call_stub_t *stub; - fdl_private_t *priv = this->private; - dict_t *tdict; - int32_t gt_err = EIO; + call_stub_t *stub; + fdl_private_t *priv = this->private; + dict_t *tdict; + int32_t gt_err = EIO; + + switch (op) { - switch (op) { case FDL_IPC_CHANGE_TERM: - gf_log(this->name, GF_LOG_INFO, "got CHANGE_TERM op"); - priv->change_term = _gf_true; - pthread_cond_signal(&priv->req_cond); - STACK_UNWIND_STRICT(ipc, frame, 0, 0, NULL); - break; + gf_log (this->name, GF_LOG_INFO, "got CHANGE_TERM op"); + priv->change_term = _gf_true; + pthread_cond_signal (&priv->req_cond); + STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL); + break; case FDL_IPC_GET_TERMS: - gf_log(this->name, GF_LOG_INFO, "got GET_TERMS op"); - tdict = dict_new(); - if (!tdict) { - gt_err = ENOMEM; - goto gt_done; - } - if (dict_set_int32(tdict, "first", priv->first_term) != 0) { - goto gt_done; - } - if (dict_set_int32(tdict, "last", priv->term) != 0) { - goto gt_done; - } - gt_err = 0; + gf_log (this->name, GF_LOG_INFO, "got GET_TERMS op"); + tdict = dict_new (); + if (!tdict) { + gt_err = ENOMEM; + goto gt_done; + } + if (dict_set_int32(tdict,"first",priv->first_term) != 0) { + goto gt_done; + } + if (dict_set_int32(tdict,"last",priv->term) != 0) { + goto gt_done; + } + gt_err = 0; gt_done: - if (gt_err) { - STACK_UNWIND_STRICT(ipc, frame, -1, gt_err, NULL); - } else { - STACK_UNWIND_STRICT(ipc, frame, 0, 0, tdict); - } - if (tdict) { - dict_unref(tdict); - } - break; + if (gt_err) { + STACK_UNWIND_STRICT (ipc, frame, -1, gt_err, NULL); + } else { + STACK_UNWIND_STRICT (ipc, frame, 0, 0, tdict); + } + if (tdict) { + dict_unref (tdict); + } + break; case FDL_IPC_JBR_SERVER_ROLLBACK: - /* - * In case of a rollback from jbr-server, dump * - * the term and index number in the journal, * - * which will later be used to rollback the fop * - */ - stub = fop_ipc_stub(frame, fdl_ipc_continue, op, xdata); - fdl_len_ipc(stub); - stub->serialize = fdl_serialize_ipc; - fdl_enqueue(this, stub); - - break; + /* + * In case of a rollback from jbr-server, dump * + * the term and index number in the journal, * + * which will later be used to rollback the fop * + */ + stub = fop_ipc_stub (frame, fdl_ipc_continue, + op, xdata); + fdl_len_ipc (stub); + stub->serialize = fdl_serialize_ipc; + fdl_enqueue (this, stub); + + break; default: - STACK_WIND_TAIL(frame, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ipc, op, xdata); - } + STACK_WIND_TAIL (frame, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, + op, xdata); + } - return 0; + return 0; } int -fdl_init(xlator_t *this) +fdl_init (xlator_t *this) { - fdl_private_t *priv = NULL; - - priv = GF_CALLOC(1, sizeof(*priv), gf_fdl_mt_fdl_private_t); - if (!priv) { - gf_log(this->name, GF_LOG_ERROR, "failed to allocate fdl_private"); - goto err; - } - - INIT_LIST_HEAD(&priv->reqs); - if (pthread_mutex_init(&priv->req_lock, NULL) != 0) { - gf_log(this->name, GF_LOG_ERROR, "failed to initialize req_lock"); - goto err; - } - if (pthread_cond_init(&priv->req_cond, NULL) != 0) { - gf_log(this->name, GF_LOG_ERROR, "failed to initialize req_cond"); - goto err; - } - - GF_OPTION_INIT("log-path", priv->log_dir, path, err); - - this->private = priv; - /* - * The rest of the fop table is automatically generated, so this is a - * bit cleaner than messing with the generation to add a hand-written - * exception. - */ - - if (gf_thread_create(&priv->worker, NULL, fdl_worker, this, "fdlwrker") != - 0) { - gf_log(this->name, GF_LOG_ERROR, "failed to start fdl_worker"); - goto err; - } - - return 0; + fdl_private_t *priv = NULL; + + priv = GF_CALLOC (1, sizeof (*priv), gf_fdl_mt_fdl_private_t); + if (!priv) { + gf_log (this->name, GF_LOG_ERROR, + "failed to allocate fdl_private"); + goto err; + } + + INIT_LIST_HEAD (&priv->reqs); + if (pthread_mutex_init (&priv->req_lock, NULL) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to initialize req_lock"); + goto err; + } + if (pthread_cond_init (&priv->req_cond, NULL) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to initialize req_cond"); + goto err; + } + + GF_OPTION_INIT ("log-path", priv->log_dir, path, err); + + this->private = priv; + /* + * The rest of the fop table is automatically generated, so this is a + * bit cleaner than messing with the generation to add a hand-written + * exception. + */ + + if (gf_thread_create (&priv->worker, NULL, fdl_worker, this, + "fdlwrker") != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to start fdl_worker"); + goto err; + } + + return 0; err: - if (priv) { - GF_FREE(priv); - } - return -1; + if (priv) { + GF_FREE(priv); + } + return -1; } void -fdl_fini(xlator_t *this) +fdl_fini (xlator_t *this) { - fdl_private_t *priv = this->private; - - if (priv) { - priv->should_stop = _gf_true; - pthread_cond_signal(&priv->req_cond); - pthread_join(priv->worker, NULL); - GF_FREE(priv); - } + fdl_private_t *priv = this->private; + + if (priv) { + priv->should_stop = _gf_true; + pthread_cond_signal (&priv->req_cond); + pthread_join (priv->worker, NULL); + GF_FREE(priv); + } } int -fdl_reconfigure(xlator_t *this, dict_t *options) +fdl_reconfigure (xlator_t *this, dict_t *options) { - fdl_private_t *priv = this->private; + fdl_private_t *priv = this->private; - GF_OPTION_RECONF("log_dir", priv->log_dir, options, path, out); - /* TBD: react if it changed */ + GF_OPTION_RECONF ("log_dir", priv->log_dir, options, path, out); + /* TBD: react if it changed */ out: - return 0; + return 0; } int32_t -mem_acct_init(xlator_t *this) +mem_acct_init (xlator_t *this) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO("fdl", this, out); + GF_VALIDATE_OR_GOTO ("fdl", this, out); - ret = xlator_mem_acct_init(this, gf_fdl_mt_end + 1); + ret = xlator_mem_acct_init (this, gf_fdl_mt_end + 1); - if (ret != 0) { - gf_log(this->name, GF_LOG_ERROR, - "Memory accounting init" - "failed"); - return ret; - } + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; + } out: - return ret; + return ret; } class_methods_t class_methods = { - .init = fdl_init, - .fini = fdl_fini, - .reconfigure = fdl_reconfigure, - .notify = default_notify, + .init = fdl_init, + .fini = fdl_fini, + .reconfigure = fdl_reconfigure, + .notify = default_notify, }; struct volume_options options[] = { - {.key = {"log-path"}, - .type = GF_OPTION_TYPE_PATH, - .default_value = DEFAULT_LOG_FILE_DIRECTORY, - .description = "Directory for FDL files."}, - {.key = {NULL}}, + { .key = {"log-path"}, + .type = GF_OPTION_TYPE_PATH, + .default_value = DEFAULT_LOG_FILE_DIRECTORY, + .description = "Directory for FDL files." + }, + { .key = {NULL} }, }; struct xlator_cbks cbks = { - .release = default_release, - .releasedir = default_releasedir, - .forget = default_forget, + .release = default_release, + .releasedir = default_releasedir, + .forget = default_forget, }; diff --git a/xlators/experimental/fdl/src/recon-tmpl.c b/xlators/experimental/fdl/src/recon-tmpl.c index 4760eaad2e2..228860401ae 100644 --- a/xlators/experimental/fdl/src/recon-tmpl.c +++ b/xlators/experimental/fdl/src/recon-tmpl.c @@ -15,283 +15,290 @@ #define GFAPI_SUCCESS 0 inode_t * -recon_get_inode(glfs_t *fs, uuid_t gfid) +recon_get_inode (glfs_t *fs, uuid_t gfid) { - inode_t *inode; - loc_t loc = { - NULL, - }; - struct iatt iatt; - int ret; - inode_t *newinode; - - inode = inode_find(fs->active_subvol->itable, gfid); - if (inode) { - printf("=== FOUND %s IN TABLE\n", uuid_utoa(gfid)); - return inode; - } - - loc.inode = inode_new(fs->active_subvol->itable); - if (!loc.inode) { - return NULL; - } - gf_uuid_copy(loc.inode->gfid, gfid); - gf_uuid_copy(loc.gfid, gfid); - - printf("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid)); - - ret = syncop_lookup(fs->active_subvol, &loc, &iatt, NULL, NULL, NULL); - if (ret != GFAPI_SUCCESS) { - fprintf(stderr, "syncop_lookup failed (%d)\n", ret); - return NULL; - } - - newinode = inode_link(loc.inode, NULL, NULL, &iatt); - if (newinode) { - inode_lookup(newinode); - } - - return newinode; -} + inode_t *inode; + loc_t loc = {NULL,}; + struct iatt iatt; + int ret; + inode_t *newinode; + + inode = inode_find (fs->active_subvol->itable, gfid); + if (inode) { + printf ("=== FOUND %s IN TABLE\n", uuid_utoa(gfid)); + return inode; + } -#pragma fragment DICT -dict_t *@ARGNAME @; + loc.inode = inode_new (fs->active_subvol->itable); + if (!loc.inode) { + return NULL; + } + gf_uuid_copy (loc.inode->gfid, gfid); + gf_uuid_copy (loc.gfid, gfid); -@ARGNAME @ = dict_new(); -if (!@ARGNAME @) { - goto *err_label; -} -err_label = &&cleanup_ @ARGNAME @; + printf ("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid)); -{ - int key_len, data_len; - char *key_ptr; - int garbage; - for (;;) { - key_len = *((int *)new_meta); - new_meta += sizeof(int); - if (!key_len) { - break; + ret = syncop_lookup (fs->active_subvol, &loc, &iatt, + NULL, NULL, NULL); + if (ret != GFAPI_SUCCESS) { + fprintf (stderr, "syncop_lookup failed (%d)\n", ret); + return NULL; } - key_ptr = new_meta; - new_meta += key_len; - data_len = *((int *)new_meta); - new_meta += sizeof(int); - garbage = dict_set_static_bin(@ARGNAME @, key_ptr, new_meta, data_len); - /* TBD: check error from dict_set_static_bin */ - (void)garbage; - new_meta += data_len; - } + + newinode = inode_link (loc.inode, NULL, NULL, &iatt); + if (newinode) { + inode_lookup (newinode); + } + + return newinode; } +#pragma fragment DICT + dict_t *@ARGNAME@; + + @ARGNAME@ = dict_new(); + if (!@ARGNAME@) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; + + { + int key_len, data_len; + char *key_ptr; + int garbage; + for (;;) { + key_len = *((int *)new_meta); + new_meta += sizeof(int); + if (!key_len) { + break; + } + key_ptr = new_meta; + new_meta += key_len; + data_len = *((int *)new_meta); + new_meta += sizeof(int); + garbage = dict_set_static_bin (@ARGNAME@, key_ptr, + new_meta, data_len); + /* TBD: check error from dict_set_static_bin */ + (void)garbage; + new_meta += data_len; + } + } + #pragma fragment DICT_CLEANUP -cleanup_ @ARGNAME @ : dict_unref(@ARGNAME @); +cleanup_@ARGNAME@: + dict_unref (@ARGNAME@); #pragma fragment DOUBLE -@ARGTYPE @ @ARGNAME @ = *((@ARGTYPE @ *)new_meta); -new_meta += sizeof(uint64_t); + @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); + new_meta += sizeof(uint64_t); #pragma fragment FD -inode_t *@ARGNAME @_ino; -fd_t *@ARGNAME @; + inode_t *@ARGNAME@_ino; + fd_t *@ARGNAME@; -@ARGNAME @_ino = recon_get_inode(fs, *((uuid_t *)new_meta)); -new_meta += 16; -if (!@ARGNAME @_ino) { - goto *err_label; -} -err_label = &&cleanup_ @ARGNAME @_ino; + @ARGNAME@_ino = recon_get_inode (fs, *((uuid_t *)new_meta)); + new_meta += 16; + if (!@ARGNAME@_ino) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@_ino; -@ARGNAME @ = fd_anonymous(@ARGNAME @_ino); -if (!@ARGNAME @) { - goto *err_label; -} -err_label = &&cleanup_ @ARGNAME @; + @ARGNAME@ = fd_anonymous (@ARGNAME@_ino); + if (!@ARGNAME@) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; #pragma fragment FD_CLEANUP -cleanup_ @ARGNAME @ : fd_unref(@ARGNAME @); -cleanup_ @ARGNAME @_ino : inode_unref(@ARGNAME @_ino); +cleanup_@ARGNAME@: + fd_unref (@ARGNAME@); +cleanup_@ARGNAME@_ino: + inode_unref (@ARGNAME@_ino); #pragma fragment NEW_FD -/* - * This pseudo-type is only used for create, and in that case we know - * we'll be using loc.inode, so it's not worth generalizing to take an - * extra argument. - */ -fd_t *@ARGNAME @ = fd_anonymous(loc.inode); - -if (!fd) { - goto *err_label; -} -err_label = &&cleanup_ @ARGNAME @; -new_meta += 16; + /* + * This pseudo-type is only used for create, and in that case we know + * we'll be using loc.inode, so it's not worth generalizing to take an + * extra argument. + */ + fd_t *@ARGNAME@ = fd_anonymous (loc.inode); + + if (!fd) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; + new_meta += 16; #pragma fragment NEW_FD_CLEANUP -cleanup_ @ARGNAME @ : fd_unref(@ARGNAME @); +cleanup_@ARGNAME@: + fd_unref (@ARGNAME@); #pragma fragment INTEGER -@ARGTYPE @ @ARGNAME @ = *((@ARGTYPE @ *)new_meta); + @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); -new_meta += sizeof(@ARGTYPE @); + new_meta += sizeof(@ARGTYPE@); #pragma fragment LOC -loc_t @ARGNAME @ = { - NULL, -}; + loc_t @ARGNAME@ = { NULL, }; -@ARGNAME @.inode = recon_get_inode(fs, *((uuid_t *)new_meta)); -if (!@ARGNAME @.inode) { - goto *err_label; -} -err_label = &&cleanup_ @ARGNAME @; -gf_uuid_copy(@ARGNAME @.gfid, @ARGNAME @.inode->gfid); -new_meta += 16; -new_meta += 16; /* skip over pargfid */ -if (*(new_meta++)) { - @ARGNAME @.name = new_meta; - new_meta += strlen(new_meta) + 1; -} + @ARGNAME@.inode = recon_get_inode (fs, *((uuid_t *)new_meta)); + if (!@ARGNAME@.inode) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; + gf_uuid_copy (@ARGNAME@.gfid, @ARGNAME@.inode->gfid); + new_meta += 16; + new_meta += 16; /* skip over pargfid */ + if (*(new_meta++)) { + @ARGNAME@.name = new_meta; + new_meta += strlen(new_meta) + 1; + } #pragma fragment LOC_CLEANUP -cleanup_ @ARGNAME @ : loc_wipe(&@ARGNAME @); +cleanup_@ARGNAME@: + loc_wipe (&@ARGNAME@); #pragma fragment PARENT_LOC -loc_t @ARGNAME @ = { - NULL, -}; - -new_meta += 16; /* skip over gfid */ -@ARGNAME @.parent = recon_get_inode(fs, *((uuid_t *)new_meta)); -if (!@ARGNAME @.parent) { - goto *err_label; -} -err_label = &&cleanup_ @ARGNAME @; -gf_uuid_copy(@ARGNAME @.pargfid, @ARGNAME @.parent->gfid); -new_meta += 16; -if (!*(new_meta++)) { - goto *err_label; -} -@ARGNAME @.name = new_meta; -new_meta += strlen(new_meta) + 1; + loc_t @ARGNAME@ = { NULL, }; -@ARGNAME @.inode = inode_new(fs->active_subvol->itable); -if (!@ARGNAME @.inode) { - goto *err_label; -} + new_meta += 16; /* skip over gfid */ + @ARGNAME@.parent = recon_get_inode (fs, *((uuid_t *)new_meta)); + if (!@ARGNAME@.parent) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; + gf_uuid_copy (@ARGNAME@.pargfid, @ARGNAME@.parent->gfid); + new_meta += 16; + if (!*(new_meta++)) { + goto *err_label; + } + @ARGNAME@.name = new_meta; + new_meta += strlen(new_meta) + 1; + + @ARGNAME@.inode = inode_new (fs->active_subvol->itable); + if (!@ARGNAME@.inode) { + goto *err_label; + } #pragma fragment PARENT_LOC_CLEANUP -cleanup_ @ARGNAME @ : loc_wipe(&@ARGNAME @); +cleanup_@ARGNAME@: + loc_wipe (&@ARGNAME@); #pragma fragment STRING -char *@ARGNAME @; -if (*(new_meta++)) { - @ARGNAME @ = new_meta; - new_meta += (strlen(new_meta) + 1); -} else { - goto *err_label; -} + char *@ARGNAME@; + if (*(new_meta++)) { + @ARGNAME@ = new_meta; + new_meta += (strlen(new_meta) + 1); + } + else { + goto *err_label; + } #pragma fragment VECTOR -struct iovec @ARGNAME @; + struct iovec @ARGNAME@; -@ARGNAME @.iov_len = *((size_t *)new_meta); -new_meta += sizeof(@ARGNAME @.iov_len); -@ARGNAME @.iov_base = new_data; -new_data += @ARGNAME @.iov_len; + @ARGNAME@.iov_len = *((size_t *)new_meta); + new_meta += sizeof(@ARGNAME@.iov_len); + @ARGNAME@.iov_base = new_data; + new_data += @ARGNAME@.iov_len; #pragma fragment IATT -struct iatt @ARGNAME @; -{ - @ARGNAME @.ia_prot = *((ia_prot_t *)new_meta); - new_meta += sizeof(ia_prot_t); - uint32_t *myints = (uint32_t *)new_meta; - @ARGNAME @.ia_uid = myints[0]; - @ARGNAME @.ia_gid = myints[1]; - @ARGNAME @.ia_atime = myints[2]; - @ARGNAME @.ia_atime_nsec = myints[3]; - @ARGNAME @.ia_mtime = myints[4]; - @ARGNAME @.ia_mtime_nsec = myints[5]; - new_meta += sizeof(*myints) * 6; -} + struct iatt @ARGNAME@; + { + @ARGNAME@.ia_prot = *((ia_prot_t *)new_meta); + new_meta += sizeof(ia_prot_t); + uint32_t *myints = (uint32_t *)new_meta; + @ARGNAME@.ia_uid = myints[0]; + @ARGNAME@.ia_gid = myints[1]; + @ARGNAME@.ia_atime = myints[2]; + @ARGNAME@.ia_atime_nsec = myints[3]; + @ARGNAME@.ia_mtime = myints[4]; + @ARGNAME@.ia_mtime_nsec = myints[5]; + new_meta += sizeof(*myints) * 6; + } #pragma fragment IOBREF -struct iobref *@ARGNAME @; + struct iobref *@ARGNAME@; -@ARGNAME @ = iobref_new(); -if (!@ARGNAME @) { - goto *err_label; -} -err_label = &&cleanup_ @ARGNAME @; + @ARGNAME@ = iobref_new(); + if (!@ARGNAME@) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; #pragma fragment IOBREF_CLEANUP -cleanup_ @ARGNAME @ : iobref_unref(@ARGNAME @); +cleanup_@ARGNAME@: + iobref_unref (@ARGNAME@); #pragma fragment LINK -/* TBD: check error */ -inode_t *new_inode = inode_link(@INODE_ARG @, NULL, NULL, @IATT_ARG @); -if (new_inode) { - inode_lookup(new_inode); -} + /* TBD: check error */ + inode_t *new_inode = inode_link (@INODE_ARG@, NULL, NULL, @IATT_ARG@); + if (new_inode) { + inode_lookup (new_inode); + } #pragma fragment FOP -int fdl_replay_ @NAME @(glfs_t *fs, char **old_meta, char **old_data) +int +fdl_replay_@NAME@ (glfs_t *fs, char **old_meta, char **old_data) { - char *new_meta = *old_meta; - char *new_data = *old_data; - int ret; - int status = 0xbad; - void *err_label = &&done; - - @FUNCTION_BODY @ - - ret = syncop_ @NAME @(fs->active_subvol, @SYNCOP_ARGS @, NULL); - if (ret != @SUCCESS_VALUE @) { - fprintf(stderr, "syncop_@NAME@ returned %d", ret); - goto *err_label; - } + char *new_meta = *old_meta; + char *new_data = *old_data; + int ret; + int status = 0xbad; + void *err_label = &&done; + +@FUNCTION_BODY@ + + ret = syncop_@NAME@ (fs->active_subvol, @SYNCOP_ARGS@, NULL); + if (ret != @SUCCESS_VALUE@) { + fprintf (stderr, "syncop_@NAME@ returned %d", ret); + goto *err_label; + } - @LINKS @ +@LINKS@ status = 0; - @CLEANUPS @ +@CLEANUPS@ - done : *old_meta = new_meta; - *old_data = new_data; - return status; +done: + *old_meta = new_meta; + *old_data = new_data; + return status; } #pragma fragment CASE -case GF_FOP_ @UPNAME @: - printf("=== GF_FOP_@UPNAME@\n"); - if (fdl_replay_ @NAME @(fs, &new_meta, &new_data) != 0) { - goto done; - } - recognized = 1; - break; + case GF_FOP_@UPNAME@: + printf ("=== GF_FOP_@UPNAME@\n"); + if (fdl_replay_@NAME@ (fs, &new_meta, &new_data) != 0) { + goto done; + } + recognized = 1; + break; #pragma fragment EPILOG - int - recon_execute(glfs_t *fs, char **old_meta, char **old_data) - { - char *new_meta = *old_meta; - char *new_data = *old_data; - int recognized = 0; - event_header_t *eh; +int +recon_execute (glfs_t *fs, char **old_meta, char **old_data) +{ + char *new_meta = *old_meta; + char *new_data = *old_data; + int recognized = 0; + event_header_t *eh; eh = (event_header_t *)new_meta; - new_meta += sizeof(*eh); + new_meta += sizeof (*eh); /* TBD: check event_type instead of assuming NEW_REQUEST */ switch (eh->fop_type) { - @SWITCH_BODY @ +@SWITCH_BODY@ - default : printf("unknown fop %u\n", eh->fop_type); + default: + printf ("unknown fop %u\n", eh->fop_type); } - done: +done: *old_meta = new_meta; *old_data = new_data; return recognized; - } +} diff --git a/xlators/experimental/jbr-client/src/fop-template.c b/xlators/experimental/jbr-client/src/fop-template.c index d2f2212c480..7719f511f01 100644 --- a/xlators/experimental/jbr-client/src/fop-template.c +++ b/xlators/experimental/jbr-client/src/fop-template.c @@ -1,104 +1,113 @@ /* template-name fop */ -int32_t jbrc_ @NAME @(call_frame_t *frame, xlator_t *this, @LONG_ARGS @) +int32_t +jbrc_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) { - jbrc_local_t *local = NULL; - xlator_t *target_xl = ACTIVE_CHILD(this); + jbrc_local_t *local = NULL; + xlator_t *target_xl = ACTIVE_CHILD(this); - local = mem_get(this->local_pool); - if (!local) { - goto err; - } + local = mem_get(this->local_pool); + if (!local) { + goto err; + } - local->stub = fop_ @NAME - @_stub(frame, jbrc_ @NAME @_continue, @SHORT_ARGS @); - if (!local->stub) { - goto err; - } - local->curr_xl = target_xl; - local->scars = 0; + local->stub = fop_@NAME@_stub (frame, jbrc_@NAME@_continue, + @SHORT_ARGS@); + if (!local->stub) { + goto err; + } + local->curr_xl = target_xl; + local->scars = 0; - frame->local = local; - STACK_WIND_COOKIE(frame, jbrc_ @NAME @_cbk, target_xl, target_xl, - target_xl->fops->@NAME @, @SHORT_ARGS @); - return 0; + frame->local = local; + STACK_WIND_COOKIE (frame, jbrc_@NAME@_cbk, target_xl, + target_xl, target_xl->fops->@NAME@, + @SHORT_ARGS@); + return 0; err: - if (local) { - mem_put(local); - } - STACK_UNWIND_STRICT(@NAME @, frame, -1, ENOMEM, @ERROR_ARGS @); - return 0; + if (local) { + mem_put(local); + } + STACK_UNWIND_STRICT (@NAME@, frame, -1, ENOMEM, + @ERROR_ARGS@); + return 0; } /* template-name cbk */ -int32_t jbrc_ @NAME @_cbk(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, @LONG_ARGS @) +int32_t +jbrc_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + @LONG_ARGS@) { - jbrc_local_t *local = frame->local; - xlator_t *last_xl = cookie; - xlator_t *next_xl; - jbrc_private_t *priv = this->private; - struct timespec spec; + jbrc_local_t *local = frame->local; + xlator_t *last_xl = cookie; + xlator_t *next_xl; + jbrc_private_t *priv = this->private; + struct timespec spec; - if (op_ret != (-1)) { - if (local->scars) { - gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_RETRY_MSG, - HILITE("retried %p OK"), frame->local); + if (op_ret != (-1)) { + if (local->scars) { + gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_RETRY_MSG, + HILITE("retried %p OK"), frame->local); + } + priv->active = last_xl; + goto unwind; + } + if ((op_errno != EREMOTE) && (op_errno != ENOTCONN)) { + goto unwind; } - priv->active = last_xl; - goto unwind; - } - if ((op_errno != EREMOTE) && (op_errno != ENOTCONN)) { - goto unwind; - } - /* TBD: get leader ID from xdata? */ - next_xl = next_xlator(this, last_xl); - /* - * We can't just give up after we've tried all bricks, because it's - * quite likely that a new leader election just hasn't finished yet. - * We also shouldn't retry endlessly, and especially not at a high - * rate, but that's good enough while we work on other things. - * - * TBD: implement slow/finite retry via a worker thread - */ - if (!next_xl || (local->scars >= SCAR_LIMIT)) { - gf_msg(this->name, GF_LOG_DEBUG, 0, J_MSG_RETRY_MSG, - HILITE("ran out of retries for %p"), frame->local); - goto unwind; - } + /* TBD: get leader ID from xdata? */ + next_xl = next_xlator(this, last_xl); + /* + * We can't just give up after we've tried all bricks, because it's + * quite likely that a new leader election just hasn't finished yet. + * We also shouldn't retry endlessly, and especially not at a high + * rate, but that's good enough while we work on other things. + * + * TBD: implement slow/finite retry via a worker thread + */ + if (!next_xl || (local->scars >= SCAR_LIMIT)) { + gf_msg (this->name, GF_LOG_DEBUG, 0, J_MSG_RETRY_MSG, + HILITE("ran out of retries for %p"), frame->local); + goto unwind; + } - local->curr_xl = next_xl; - local->scars += 1; - spec.tv_sec = 1; - spec.tv_nsec = 0; - /* - * WARNING - * - * Just calling gf_timer_call_after like this leaves open the - * possibility that writes will get reordered, if a first write is - * rescheduled and then a second comes along to find an updated - * priv->active before the first actually executes. We might need to - * implement a stricter (and more complicated) queuing mechanism to - * ensure absolute consistency in this case. - */ - if (gf_timer_call_after(this->ctx, spec, jbrc_retry_cb, local)) { - return 0; - } + local->curr_xl = next_xl; + local->scars += 1; + spec.tv_sec = 1; + spec.tv_nsec = 0; + /* + * WARNING + * + * Just calling gf_timer_call_after like this leaves open the + * possibility that writes will get reordered, if a first write is + * rescheduled and then a second comes along to find an updated + * priv->active before the first actually executes. We might need to + * implement a stricter (and more complicated) queuing mechanism to + * ensure absolute consistency in this case. + */ + if (gf_timer_call_after(this->ctx, spec, jbrc_retry_cb, local)) { + return 0; + } unwind: - call_stub_destroy(local->stub); - STACK_UNWIND_STRICT(@NAME @, frame, op_ret, op_errno, @SHORT_ARGS @); - return 0; + call_stub_destroy(local->stub); + STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, + @SHORT_ARGS@); + return 0; } /* template-name cont-func */ -int32_t jbrc_ @NAME - @_continue(call_frame_t *frame, xlator_t *this, @LONG_ARGS @) +int32_t +jbrc_@NAME@_continue (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) { - jbrc_local_t *local = frame->local; + jbrc_local_t *local = frame->local; - STACK_WIND_COOKIE(frame, jbrc_ @NAME @_cbk, local->curr_xl, local->curr_xl, - local->curr_xl->fops->@NAME @, @SHORT_ARGS @); - return 0; + STACK_WIND_COOKIE (frame, jbrc_@NAME@_cbk, local->curr_xl, + local->curr_xl, local->curr_xl->fops->@NAME@, + @SHORT_ARGS@); + return 0; } diff --git a/xlators/experimental/jbr-server/src/all-templates.c b/xlators/experimental/jbr-server/src/all-templates.c index a3ae74fea0f..530c4187571 100644 --- a/xlators/experimental/jbr-server/src/all-templates.c +++ b/xlators/experimental/jbr-server/src/all-templates.c @@ -3,40 +3,46 @@ * will be ignored until we reach the first template-name comment. */ + /* template-name read-fop */ -int32_t jbr_ @NAME @(call_frame_t *frame, xlator_t *this, @LONG_ARGS @) +int32_t +jbr_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) { - jbr_private_t *priv = NULL; - gf_boolean_t in_recon = _gf_false; - int32_t op_errno = 0; - int32_t recon_term, recon_index; - - GF_VALIDATE_OR_GOTO("jbr", this, err); - priv = this->private; - GF_VALIDATE_OR_GOTO(this->name, priv, err); - GF_VALIDATE_OR_GOTO(this->name, frame, err); - - op_errno = EREMOTE; - - /* allow reads during reconciliation * - * TBD: allow "dirty" reads on non-leaders * - */ - if (xdata && (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) && - (dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) { - in_recon = _gf_true; - } - - if ((!priv->leader) && (in_recon == _gf_false)) { - goto err; - } - - STACK_WIND(frame, default_ @NAME @_cbk, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->@NAME @, @SHORT_ARGS @); - return 0; + jbr_private_t *priv = NULL; + gf_boolean_t in_recon = _gf_false; + int32_t op_errno = 0; + int32_t recon_term, recon_index; + + GF_VALIDATE_OR_GOTO ("jbr", this, err); + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, err); + GF_VALIDATE_OR_GOTO (this->name, frame, err); + + op_errno = EREMOTE; + + /* allow reads during reconciliation * + * TBD: allow "dirty" reads on non-leaders * + */ + if (xdata && + (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) && + (dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) { + in_recon = _gf_true; + } + + if ((!priv->leader) && (in_recon == _gf_false)) { + goto err; + } + + STACK_WIND (frame, default_@NAME@_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + return 0; err: - STACK_UNWIND_STRICT(@NAME @, frame, -1, op_errno, @ERROR_ARGS @); - return 0; + STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno, + @ERROR_ARGS@); + return 0; } /* template-name read-perform_local_op */ @@ -58,445 +64,479 @@ err: /* No "complete" function needed for @NAME@ */ /* template-name write-fop */ -int32_t jbr_ @NAME @(call_frame_t *frame, xlator_t *this, @LONG_ARGS @) +int32_t +jbr_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) { - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - int32_t ret = -1; - int op_errno = ENOMEM; + jbr_local_t *local = NULL; + jbr_private_t *priv = NULL; + int32_t ret = -1; + int op_errno = ENOMEM; - GF_VALIDATE_OR_GOTO("jbr", this, err); - priv = this->private; - GF_VALIDATE_OR_GOTO(this->name, priv, err); - GF_VALIDATE_OR_GOTO(this->name, frame, err); + GF_VALIDATE_OR_GOTO ("jbr", this, err); + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, err); + GF_VALIDATE_OR_GOTO (this->name, frame, err); #if defined(JBR_CG_NEED_FD) - ret = jbr_leader_checks_and_init(frame, this, &op_errno, xdata, fd); + ret = jbr_leader_checks_and_init (frame, this, &op_errno, xdata, fd); #else - ret = jbr_leader_checks_and_init(frame, this, &op_errno, xdata, NULL); + ret = jbr_leader_checks_and_init (frame, this, &op_errno, xdata, NULL); #endif - if (ret) - goto err; - - local = frame->local; - - /* - * If we let it through despite not being the leader, then we just want - * to pass it on down without all of the additional xattrs, queuing, and - * so on. However, jbr_*_complete does depend on the initialization - * immediately above this. - */ - if (!priv->leader) { - STACK_WIND(frame, jbr_ @NAME @_complete, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->@NAME @, @SHORT_ARGS @); - return 0; - } - - ret = jbr_initialize_xdata_set_attrs(this, &xdata); - if (ret) - goto err; - - local->xdata = dict_ref(xdata); - local->stub = fop_ @NAME - @_stub(frame, jbr_ @NAME @_continue, @SHORT_ARGS @); - if (!local->stub) { - goto err; - } - - /* - * Can be used to just call_dispatch or be customised per fop to * - * perform ops specific to that particular fop. * - */ - ret = jbr_ @NAME @_perform_local_op(frame, this, &op_errno, @SHORT_ARGS @); - if (ret) - goto err; - - return ret; -err: - if (local) { - if (local->stub) { - call_stub_destroy(local->stub); + if (ret) + goto err; + + local = frame->local; + + /* + * If we let it through despite not being the leader, then we just want + * to pass it on down without all of the additional xattrs, queuing, and + * so on. However, jbr_*_complete does depend on the initialization + * immediately above this. + */ + if (!priv->leader) { + STACK_WIND (frame, jbr_@NAME@_complete, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + return 0; } - if (local->qstub) { - call_stub_destroy(local->qstub); + + ret = jbr_initialize_xdata_set_attrs (this, &xdata); + if (ret) + goto err; + + local->xdata = dict_ref(xdata); + local->stub = fop_@NAME@_stub (frame, jbr_@NAME@_continue, + @SHORT_ARGS@); + if (!local->stub) { + goto err; } - if (local->fd) { - fd_unref(local->fd); + + /* + * Can be used to just call_dispatch or be customised per fop to * + * perform ops specific to that particular fop. * + */ + ret = jbr_@NAME@_perform_local_op (frame, this, &op_errno, + @SHORT_ARGS@); + if (ret) + goto err; + + return ret; +err: + if (local) { + if (local->stub) { + call_stub_destroy(local->stub); + } + if (local->qstub) { + call_stub_destroy(local->qstub); + } + if (local->fd) { + fd_unref(local->fd); + } + mem_put(local); } - mem_put(local); - } - STACK_UNWIND_STRICT(@NAME @, frame, -1, op_errno, @ERROR_ARGS @); - return 0; + STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno, + @ERROR_ARGS@); + return 0; } /* template-name write-perform_local_op */ -int32_t jbr_ @NAME @_perform_local_op(call_frame_t *frame, xlator_t *this, - int *op_errno, @LONG_ARGS @) +int32_t +jbr_@NAME@_perform_local_op (call_frame_t *frame, xlator_t *this, int *op_errno, + @LONG_ARGS@) { - int32_t ret = -1; + int32_t ret = -1; - GF_VALIDATE_OR_GOTO("jbr", this, out); - GF_VALIDATE_OR_GOTO(this->name, frame, out); - GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + GF_VALIDATE_OR_GOTO ("jbr", this, out); + GF_VALIDATE_OR_GOTO (this->name, frame, out); + GF_VALIDATE_OR_GOTO (this->name, op_errno, out); - ret = jbr_ @NAME @_call_dispatch(frame, this, op_errno, @SHORT_ARGS @); + ret = jbr_@NAME@_call_dispatch (frame, this, op_errno, + @SHORT_ARGS@); out: - return ret; + return ret; } /* template-name write-call_dispatch */ -int32_t jbr_ @NAME @_call_dispatch(call_frame_t *frame, xlator_t *this, - int *op_errno, @LONG_ARGS @) +int32_t +jbr_@NAME@_call_dispatch (call_frame_t *frame, xlator_t *this, int *op_errno, + @LONG_ARGS@) { - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO("jbr", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO(this->name, priv, out); - GF_VALIDATE_OR_GOTO(this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO(this->name, local, out); - GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + jbr_local_t *local = NULL; + jbr_private_t *priv = NULL; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO ("jbr", this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO (this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO (this->name, local, out); + GF_VALIDATE_OR_GOTO (this->name, op_errno, out); #if defined(JBR_CG_QUEUE) - jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode); - if (!ictx) { - *op_errno = EIO; - goto out; - } - - LOCK(&ictx->lock); - if (ictx->active) { - gf_msg_debug(this->name, 0, "queuing request due to conflict"); - /* - * TBD: enqueue only for real conflict - * - * Currently we just act like all writes are in - * conflict with one another. What we should really do - * is check the active/pending queues and defer only if - * there's a conflict there. - * - * It's important to check the pending queue because we - * might have an active request X which conflicts with - * a pending request Y, and this request Z might - * conflict with Y but not X. If we checked only the - * active queue then Z could jump ahead of Y, which - * would be incorrect. - */ - local->qstub = fop_ @NAME - @_stub(frame, jbr_ @NAME @_dispatch, @SHORT_ARGS @); - if (!local->qstub) { - UNLOCK(&ictx->lock); - goto out; + jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode); + if (!ictx) { + *op_errno = EIO; + goto out; } - list_add_tail(&local->qlinks, &ictx->pqueue); - ++(ictx->pending); + + LOCK(&ictx->lock); + if (ictx->active) { + gf_msg_debug (this->name, 0, + "queuing request due to conflict"); + /* + * TBD: enqueue only for real conflict + * + * Currently we just act like all writes are in + * conflict with one another. What we should really do + * is check the active/pending queues and defer only if + * there's a conflict there. + * + * It's important to check the pending queue because we + * might have an active request X which conflicts with + * a pending request Y, and this request Z might + * conflict with Y but not X. If we checked only the + * active queue then Z could jump ahead of Y, which + * would be incorrect. + */ + local->qstub = fop_@NAME@_stub (frame, + jbr_@NAME@_dispatch, + @SHORT_ARGS@); + if (!local->qstub) { + UNLOCK(&ictx->lock); + goto out; + } + list_add_tail(&local->qlinks, &ictx->pqueue); + ++(ictx->pending); + UNLOCK(&ictx->lock); + ret = 0; + goto out; + } else { + list_add_tail(&local->qlinks, &ictx->aqueue); + ++(ictx->active); + } UNLOCK(&ictx->lock); - ret = 0; - goto out; - } else { - list_add_tail(&local->qlinks, &ictx->aqueue); - ++(ictx->active); - } - UNLOCK(&ictx->lock); #endif - ret = jbr_ @NAME @_dispatch(frame, this, @SHORT_ARGS @); + ret = jbr_@NAME@_dispatch (frame, this, @SHORT_ARGS@); out: - return ret; + return ret; } /* template-name write-dispatch */ -int32_t jbr_ @NAME @_dispatch(call_frame_t *frame, xlator_t *this, @LONG_ARGS @) +int32_t +jbr_@NAME@_dispatch (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) { - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - int32_t ret = -1; - xlator_list_t *trav; - - GF_VALIDATE_OR_GOTO("jbr", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO(this->name, priv, out); - GF_VALIDATE_OR_GOTO(this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO(this->name, local, out); - - /* - * TBD: unblock pending request(s) if we fail after this point but - * before we get to jbr_@NAME@_complete (where that code currently - * resides). - */ - - local->call_count = priv->n_children - 1; - for (trav = this->children->next; trav; trav = trav->next) { - STACK_WIND(frame, jbr_ @NAME @_fan_in, trav->xlator, - trav->xlator->fops->@NAME @, @SHORT_ARGS @); - } - - /* TBD: variable Issue count */ - ret = 0; + jbr_local_t *local = NULL; + jbr_private_t *priv = NULL; + int32_t ret = -1; + xlator_list_t *trav; + + GF_VALIDATE_OR_GOTO ("jbr", this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO (this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO (this->name, local, out); + + /* + * TBD: unblock pending request(s) if we fail after this point but + * before we get to jbr_@NAME@_complete (where that code currently + * resides). + */ + + local->call_count = priv->n_children - 1; + for (trav = this->children->next; trav; trav = trav->next) { + STACK_WIND (frame, jbr_@NAME@_fan_in, + trav->xlator, trav->xlator->fops->@NAME@, + @SHORT_ARGS@); + } + + /* TBD: variable Issue count */ + ret = 0; out: - return ret; + return ret; } /* template-name write-fan-in */ -int32_t jbr_ @NAME @_fan_in(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, @LONG_ARGS @) +int32_t +jbr_@NAME@_fan_in (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + @LONG_ARGS@) { - jbr_local_t *local = NULL; - int32_t ret = -1; - uint8_t call_count; - - GF_VALIDATE_OR_GOTO("jbr", this, out); - GF_VALIDATE_OR_GOTO(this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO(this->name, local, out); - - gf_msg_trace(this->name, 0, "op_ret = %d, op_errno = %d\n", op_ret, - op_errno); - - LOCK(&frame->lock); - call_count = --(local->call_count); - if (op_ret != -1) { - /* Increment the number of successful acks * - * received for the operation. * - */ - (local->successful_acks)++; - local->successful_op_ret = op_ret; - } - gf_msg_debug(this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n", - op_ret, op_errno, local->successful_acks); - UNLOCK(&frame->lock); - - /* TBD: variable Completion count */ - if (call_count == 0) { - call_resume(local->stub); - } - - ret = 0; + jbr_local_t *local = NULL; + int32_t ret = -1; + uint8_t call_count; + + GF_VALIDATE_OR_GOTO ("jbr", this, out); + GF_VALIDATE_OR_GOTO (this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO (this->name, local, out); + + gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n", + op_ret, op_errno); + + LOCK(&frame->lock); + call_count = --(local->call_count); + if (op_ret != -1) { + /* Increment the number of successful acks * + * received for the operation. * + */ + (local->successful_acks)++; + local->successful_op_ret = op_ret; + } + gf_msg_debug (this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n", + op_ret, op_errno, local->successful_acks); + UNLOCK(&frame->lock); + + /* TBD: variable Completion count */ + if (call_count == 0) { + call_resume(local->stub); + } + + ret = 0; out: - return ret; + return ret; } /* template-name write-continue */ -int32_t jbr_ @NAME @_continue(call_frame_t *frame, xlator_t *this, @LONG_ARGS @) +int32_t +jbr_@NAME@_continue (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) { - int32_t ret = -1; - gf_boolean_t result = _gf_false; - jbr_local_t *local = NULL; - jbr_local_t *new_local = NULL; - jbr_private_t *priv = NULL; - int32_t op_errno = 0; - - GF_VALIDATE_OR_GOTO("jbr", this, out); - GF_VALIDATE_OR_GOTO(this->name, frame, out); - priv = this->private; - local = frame->local; - GF_VALIDATE_OR_GOTO(this->name, priv, out); - GF_VALIDATE_OR_GOTO(this->name, local, out); - - /* Perform quorum check to see if the leader needs * - * to perform the operation. If the operation will not * - * meet quorum irrespective of the leader's result * - * there is no point in the leader performing the fop * - */ - result = fop_quorum_check(this, (double)priv->n_children, - (double)local->successful_acks + 1); - if (result == _gf_false) { - gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET, - "Didn't receive enough acks " - "to meet quorum. Failing the operation without trying " - "it on the leader."); + int32_t ret = -1; + gf_boolean_t result = _gf_false; + jbr_local_t *local = NULL; + jbr_local_t *new_local = NULL; + jbr_private_t *priv = NULL; + int32_t op_errno = 0; + + GF_VALIDATE_OR_GOTO ("jbr", this, out); + GF_VALIDATE_OR_GOTO (this->name, frame, out); + priv = this->private; + local = frame->local; + GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO (this->name, local, out); + + /* Perform quorum check to see if the leader needs * + * to perform the operation. If the operation will not * + * meet quorum irrespective of the leader's result * + * there is no point in the leader performing the fop * + */ + result = fop_quorum_check (this, (double)priv->n_children, + (double)local->successful_acks + 1); + if (result == _gf_false) { + gf_msg (this->name, GF_LOG_ERROR, EROFS, + J_MSG_QUORUM_NOT_MET, "Didn't receive enough acks " + "to meet quorum. Failing the operation without trying " + "it on the leader."); #if defined(JBR_CG_QUEUE) - /* - * In case of a fop failure, before unwinding need to * - * remove it from queue * - */ - ret = jbr_remove_from_queue(frame, this); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_GENERIC, - "Failed to remove from queue."); - } + /* + * In case of a fop failure, before unwinding need to * + * remove it from queue * + */ + ret = jbr_remove_from_queue (frame, this); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + J_MSG_GENERIC, "Failed to remove from queue."); + } #endif - /* - * In this case, the quorum is not met on the followers * - * So the operation will not be performed on the leader * - * and a rollback will be sent via GF_FOP_IPC to all the * - * followers, where this particular fop's term and index * - * numbers will be journaled, and later used to rollback * - */ - call_frame_t *new_frame; - - new_frame = copy_frame(frame); - - if (new_frame) { - new_local = mem_get0(this->local_pool); - if (new_local) { - INIT_LIST_HEAD(&new_local->qlinks); - ret = dict_set_int32(local->xdata, "rollback-fop", - GF_FOP_ @UPNAME @); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, - "failed to set rollback-fop"); + /* + * In this case, the quorum is not met on the followers * + * So the operation will not be performed on the leader * + * and a rollback will be sent via GF_FOP_IPC to all the * + * followers, where this particular fop's term and index * + * numbers will be journaled, and later used to rollback * + */ + call_frame_t *new_frame; + + new_frame = copy_frame (frame); + + if (new_frame) { + new_local = mem_get0(this->local_pool); + if (new_local) { + INIT_LIST_HEAD(&new_local->qlinks); + ret = dict_set_int32 (local->xdata, + "rollback-fop", + GF_FOP_@UPNAME@); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + J_MSG_DICT_FLR, + "failed to set rollback-fop"); + } else { + new_local->xdata = dict_ref(local->xdata); + new_frame->local = new_local; + jbr_ipc_call_dispatch (new_frame, + this, &op_errno, + FDL_IPC_JBR_SERVER_ROLLBACK, + new_local->xdata); + } + } else { + gf_log (this->name, GF_LOG_WARNING, + "Could not create local for new_frame"); + } } else { - new_local->xdata = dict_ref(local->xdata); - new_frame->local = new_local; - jbr_ipc_call_dispatch(new_frame, this, &op_errno, - FDL_IPC_JBR_SERVER_ROLLBACK, - new_local->xdata); + gf_log (this->name, GF_LOG_WARNING, + "Could not send rollback ipc"); } - } else { - gf_log(this->name, GF_LOG_WARNING, - "Could not create local for new_frame"); - } + + STACK_UNWIND_STRICT (@NAME@, frame, -1, EROFS, + @ERROR_ARGS@); } else { - gf_log(this->name, GF_LOG_WARNING, "Could not send rollback ipc"); + STACK_WIND (frame, jbr_@NAME@_complete, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); } - STACK_UNWIND_STRICT(@NAME @, frame, -1, EROFS, @ERROR_ARGS @); - } else { - STACK_WIND(frame, jbr_ @NAME @_complete, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->@NAME @, @SHORT_ARGS @); - } - out: - return 0; + return 0; } /* template-name write-complete */ -int32_t jbr_ @NAME @_complete(call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, @LONG_ARGS @) +int32_t +jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + @LONG_ARGS@) { - int32_t ret = -1; - gf_boolean_t result = _gf_false; - jbr_private_t *priv = NULL; - jbr_local_t *local = NULL; - jbr_local_t *new_local = NULL; - - GF_VALIDATE_OR_GOTO("jbr", this, err); - GF_VALIDATE_OR_GOTO(this->name, frame, err); - priv = this->private; - local = frame->local; - GF_VALIDATE_OR_GOTO(this->name, priv, err); - GF_VALIDATE_OR_GOTO(this->name, local, err); - - /* If the fop failed on the leader, then reduce one successful ack - * before calculating the fop quorum - */ - LOCK(&frame->lock); - if (op_ret == -1) - (local->successful_acks)--; - UNLOCK(&frame->lock); + int32_t ret = -1; + gf_boolean_t result = _gf_false; + jbr_private_t *priv = NULL; + jbr_local_t *local = NULL; + jbr_local_t *new_local = NULL; + + GF_VALIDATE_OR_GOTO ("jbr", this, err); + GF_VALIDATE_OR_GOTO (this->name, frame, err); + priv = this->private; + local = frame->local; + GF_VALIDATE_OR_GOTO (this->name, priv, err); + GF_VALIDATE_OR_GOTO (this->name, local, err); + + /* If the fop failed on the leader, then reduce one successful ack + * before calculating the fop quorum + */ + LOCK(&frame->lock); + if (op_ret == -1) + (local->successful_acks)--; + UNLOCK(&frame->lock); #if defined(JBR_CG_QUEUE) - ret = jbr_remove_from_queue(frame, this); - if (ret) - goto err; + ret = jbr_remove_from_queue (frame, this); + if (ret) + goto err; #endif #if defined(JBR_CG_FSYNC) - jbr_mark_fd_dirty(this, local); + jbr_mark_fd_dirty(this, local); #endif #if defined(JBR_CG_NEED_FD) - fd_unref(local->fd); + fd_unref(local->fd); #endif - /* After the leader completes the fop, a quorum check is * - * performed, taking into account the outcome of the fop * - * on the leader. Irrespective of the fop being successful * - * or failing on the leader, the result of the quorum will * - * determine if the overall fop is successful or not. For * - * example, a fop might have succeeded on every node except * - * the leader, in which case as quorum is being met, the fop * - * will be treated as a successful fop, even though it failed * - * on the leader. On follower nodes, no quorum check should * - * be done, and the result is returned to the leader as is. * - */ - if (priv->leader) { - result = fop_quorum_check(this, (double)priv->n_children, - (double)local->successful_acks + 1); - if (result == _gf_false) { - op_ret = -1; - op_errno = EROFS; - gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET, - "Quorum is not met. " - "The operation has failed."); - /* - * In this case, the quorum is not met after the * - * operation is performed on the leader. Hence a * - * rollback will be sent via GF_FOP_IPC to the leader * - * where this particular fop's term and index numbers * - * will be journaled, and later used to rollback. * - * The same will be done on all the followers * - */ - call_frame_t *new_frame; - - new_frame = copy_frame(frame); - if (new_frame) { - new_local = mem_get0(this->local_pool); - if (new_local) { - INIT_LIST_HEAD(&new_local->qlinks); - gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, - "op = %d", new_frame->op); - ret = dict_set_int32(local->xdata, "rollback-fop", - GF_FOP_ @UPNAME @); - if (ret) { - gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, - "failed to set " - "rollback-fop"); - } else { - new_local->xdata = dict_ref(local->xdata); - new_frame->local = new_local; + /* After the leader completes the fop, a quorum check is * + * performed, taking into account the outcome of the fop * + * on the leader. Irrespective of the fop being successful * + * or failing on the leader, the result of the quorum will * + * determine if the overall fop is successful or not. For * + * example, a fop might have succeeded on every node except * + * the leader, in which case as quorum is being met, the fop * + * will be treated as a successful fop, even though it failed * + * on the leader. On follower nodes, no quorum check should * + * be done, and the result is returned to the leader as is. * + */ + if (priv->leader) { + result = fop_quorum_check (this, (double)priv->n_children, + (double)local->successful_acks + 1); + if (result == _gf_false) { + op_ret = -1; + op_errno = EROFS; + gf_msg (this->name, GF_LOG_ERROR, EROFS, + J_MSG_QUORUM_NOT_MET, "Quorum is not met. " + "The operation has failed."); /* - * Calling STACK_WIND instead * - * of jbr_ipc as it will not * - * unwind to the previous * - * translators like it will * - * in case of jbr_ipc. * + * In this case, the quorum is not met after the * + * operation is performed on the leader. Hence a * + * rollback will be sent via GF_FOP_IPC to the leader * + * where this particular fop's term and index numbers * + * will be journaled, and later used to rollback. * + * The same will be done on all the followers * */ - STACK_WIND( - new_frame, jbr_ipc_complete, FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ipc, - FDL_IPC_JBR_SERVER_ROLLBACK, new_local->xdata); - } + call_frame_t *new_frame; + + new_frame = copy_frame (frame); + if (new_frame) { + new_local = mem_get0(this->local_pool); + if (new_local) { + INIT_LIST_HEAD(&new_local->qlinks); + gf_msg (this->name, GF_LOG_ERROR, 0, + J_MSG_DICT_FLR, "op = %d", + new_frame->op); + ret = dict_set_int32 (local->xdata, + "rollback-fop", + GF_FOP_@UPNAME@); + if (ret) { + gf_msg (this->name, + GF_LOG_ERROR, 0, + J_MSG_DICT_FLR, + "failed to set " + "rollback-fop"); + } else { + new_local->xdata = dict_ref (local->xdata); + new_frame->local = new_local; + /* + * Calling STACK_WIND instead * + * of jbr_ipc as it will not * + * unwind to the previous * + * translators like it will * + * in case of jbr_ipc. * + */ + STACK_WIND (new_frame, + jbr_ipc_complete, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, + FDL_IPC_JBR_SERVER_ROLLBACK, + new_local->xdata); + } + } else { + gf_log (this->name, GF_LOG_WARNING, + "Could not create local " + "for new_frame"); + } + } else { + gf_log (this->name, GF_LOG_WARNING, + "Could not send rollback ipc"); + } } else { - gf_log(this->name, GF_LOG_WARNING, - "Could not create local " - "for new_frame"); - } - } else { - gf_log(this->name, GF_LOG_WARNING, - "Could not send rollback ipc"); - } - } else { #if defined(JBR_CG_NEED_FD) - op_ret = local->successful_op_ret; + op_ret = local->successful_op_ret; #else - op_ret = 0; + op_ret = 0; #endif - op_errno = 0; - gf_msg_debug(this->name, 0, - "Quorum has met. The operation has succeeded."); + op_errno = 0; + gf_msg_debug (this->name, 0, + "Quorum has met. The operation has succeeded."); + } } - } - /* - * Unrefing the reference taken in jbr_@NAME@ () * - */ - dict_unref(local->xdata); + /* + * Unrefing the reference taken in jbr_@NAME@ () * + */ + dict_unref (local->xdata); + + STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, + @SHORT_ARGS@); - STACK_UNWIND_STRICT(@NAME @, frame, op_ret, op_errno, @SHORT_ARGS @); - return 0; + return 0; err: - STACK_UNWIND_STRICT(@NAME @, frame, -1, 0, @SHORT_ARGS @); + STACK_UNWIND_STRICT (@NAME@, frame, -1, 0, + @SHORT_ARGS@); - return 0; + return 0; } |