diff options
author | Gluster Ant <bugzilla-bot@gluster.org> | 2018-09-12 17:52:45 +0530 |
---|---|---|
committer | Nigel Babu <nigelb@redhat.com> | 2018-09-12 17:52:45 +0530 |
commit | e16868dede6455cab644805af6fe1ac312775e13 (patch) | |
tree | 15aebdb4fff2d87cf8a72f836816b3aa634da58d /xlators/experimental | |
parent | 45a71c0548b6fd2c757aa2e7b7671a1411948894 (diff) |
Land part 2 of clang-format changes
Change-Id: Ia84cc24c8924e6d22d02ac15f611c10e26db99b4
Signed-off-by: Nigel Babu <nigelb@redhat.com>
Diffstat (limited to 'xlators/experimental')
-rw-r--r-- | xlators/experimental/dht2/dht2-client/src/dht2-client-main.c | 35 | ||||
-rw-r--r-- | xlators/experimental/dht2/dht2-server/src/dht2-server-main.c | 35 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/dump-tmpl.c | 223 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/fdl-tmpl.c | 788 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/logdump.c | 77 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/recon-tmpl.c | 423 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/recon.c | 120 | ||||
-rw-r--r-- | xlators/experimental/jbr-client/src/fop-template.c | 171 | ||||
-rw-r--r-- | xlators/experimental/jbr-client/src/jbrc.c | 373 | ||||
-rw-r--r-- | xlators/experimental/jbr-server/src/all-templates.c | 824 | ||||
-rw-r--r-- | xlators/experimental/jbr-server/src/jbr.c | 2739 | ||||
-rw-r--r-- | xlators/experimental/posix2/ds/src/posix2-ds-main.c | 33 | ||||
-rw-r--r-- | xlators/experimental/posix2/mds/src/posix2-mds-main.c | 33 |
13 files changed, 2843 insertions, 3031 deletions
diff --git a/xlators/experimental/dht2/dht2-client/src/dht2-client-main.c b/xlators/experimental/dht2/dht2-client/src/dht2-client-main.c index bd1d446e2b5..556385724a4 100644 --- a/xlators/experimental/dht2/dht2-client/src/dht2-client-main.c +++ b/xlators/experimental/dht2/dht2-client/src/dht2-client-main.c @@ -20,34 +20,33 @@ #include "statedump.h" int32_t -dht2_client_init (xlator_t *this) +dht2_client_init(xlator_t *this) { - if (!this->children) { - gf_log (this->name, GF_LOG_ERROR, - "Missing children in volume graph, this (%s) is" - " not a leaf translator", this->name); - return -1; - } - - return 0; + if (!this->children) { + gf_log(this->name, GF_LOG_ERROR, + "Missing children in volume graph, this (%s) is" + " not a leaf translator", + this->name); + return -1; + } + + return 0; } void -dht2_client_fini (xlator_t *this) +dht2_client_fini(xlator_t *this) { - return; + return; } class_methods_t class_methods = { - .init = dht2_client_init, - .fini = dht2_client_fini, + .init = dht2_client_init, + .fini = dht2_client_fini, }; -struct xlator_fops fops = { -}; +struct xlator_fops fops = {}; -struct xlator_cbks cbks = { -}; +struct xlator_cbks cbks = {}; /* struct xlator_dumpops dumpops = { @@ -55,5 +54,5 @@ struct xlator_dumpops dumpops = { */ struct volume_options options[] = { - { .key = {NULL} }, + {.key = {NULL}}, }; diff --git a/xlators/experimental/dht2/dht2-server/src/dht2-server-main.c b/xlators/experimental/dht2/dht2-server/src/dht2-server-main.c index 1f232cc3430..f051a44e99f 100644 --- a/xlators/experimental/dht2/dht2-server/src/dht2-server-main.c +++ b/xlators/experimental/dht2/dht2-server/src/dht2-server-main.c @@ -20,34 +20,33 @@ #include "statedump.h" int32_t -dht2_server_init (xlator_t *this) +dht2_server_init(xlator_t *this) { - if (!this->children) { - gf_log (this->name, GF_LOG_ERROR, - "Missing children in volume graph, this (%s) is" - " not a leaf translator", this->name); - return -1; - } - - return 0; + if (!this->children) { + gf_log(this->name, GF_LOG_ERROR, + "Missing children in volume graph, this (%s) is" + " not a leaf translator", + this->name); + return -1; + } + + return 0; } void -dht2_server_fini (xlator_t *this) +dht2_server_fini(xlator_t *this) { - return; + return; } class_methods_t class_methods = { - .init = dht2_server_init, - .fini = dht2_server_fini, + .init = dht2_server_init, + .fini = dht2_server_fini, }; -struct xlator_fops fops = { -}; +struct xlator_fops fops = {}; -struct xlator_cbks cbks = { -}; +struct xlator_cbks cbks = {}; /* struct xlator_dumpops dumpops = { @@ -55,5 +54,5 @@ struct xlator_dumpops dumpops = { */ struct volume_options options[] = { - { .key = {NULL} }, + {.key = {NULL}}, }; diff --git a/xlators/experimental/fdl/src/dump-tmpl.c b/xlators/experimental/fdl/src/dump-tmpl.c index 32b0fef6af3..d19f6589e66 100644 --- a/xlators/experimental/fdl/src/dump-tmpl.c +++ b/xlators/experimental/fdl/src/dump-tmpl.c @@ -14,150 +14,140 @@ * Returns 0 if the string is ASCII printable * * and -1 if it's not ASCII printable * */ -int str_isprint (char *s) +int +str_isprint(char *s) { - int ret = -1; + int ret = -1; - if (!s) - goto out; + if (!s) + goto out; - while (s[0] != '\0') { - if (!isprint(s[0])) - goto out; - else - s++; - } + while (s[0] != '\0') { + if (!isprint(s[0])) + goto out; + else + s++; + } - ret = 0; + ret = 0; out: - return ret; + return ret; } #pragma fragment DICT - { - int key_len, data_len; - char *key_ptr; - char *key_val; - printf ("@ARGNAME@ = dict {\n"); - for (;;) { - key_len = *((int *)new_meta); - new_meta += sizeof(int); - if (!key_len) { - break; - } - key_ptr = new_meta; - new_meta += key_len; - data_len = *((int *)new_meta); - key_val = new_meta + sizeof(int); - new_meta += sizeof(int) + data_len; - if (str_isprint(key_val)) - printf (" %s = <%d bytes>\n", - key_ptr, data_len); - else - printf (" %s = %s <%d bytes>\n", - key_ptr, key_val, data_len); - } - printf ("}\n"); +{ + int key_len, data_len; + char *key_ptr; + char *key_val; + printf("@ARGNAME@ = dict {\n"); + for (;;) { + key_len = *((int *)new_meta); + new_meta += sizeof(int); + if (!key_len) { + break; } + key_ptr = new_meta; + new_meta += key_len; + data_len = *((int *)new_meta); + key_val = new_meta + sizeof(int); + new_meta += sizeof(int) + data_len; + if (str_isprint(key_val)) + printf(" %s = <%d bytes>\n", key_ptr, data_len); + else + printf(" %s = %s <%d bytes>\n", key_ptr, key_val, data_len); + } + printf("}\n"); +} #pragma fragment DOUBLE - printf ("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta), - *((uint64_t *)new_meta)); - new_meta += sizeof(uint64_t); +printf("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta), + *((uint64_t *)new_meta)); +new_meta += sizeof(uint64_t); #pragma fragment GFID - printf ("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta))); - new_meta += 16; +printf("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta))); +new_meta += 16; #pragma fragment INTEGER - printf ("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta), - *((uint32_t *)new_meta)); - new_meta += sizeof(uint32_t); +printf("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta), + *((uint32_t *)new_meta)); +new_meta += sizeof(uint32_t); #pragma fragment LOC - printf ("@ARGNAME@ = loc {\n"); - printf (" gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); - new_meta += 16; - printf (" pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); - new_meta += 16; - if (*(new_meta++)) { - printf (" name = %s\n", new_meta); - new_meta += (strlen(new_meta) + 1); - } - printf ("}\n"); +printf("@ARGNAME@ = loc {\n"); +printf(" gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); +new_meta += 16; +printf(" pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); +new_meta += 16; +if (*(new_meta++)) { + printf(" name = %s\n", new_meta); + new_meta += (strlen(new_meta) + 1); +} +printf("}\n"); #pragma fragment STRING - if (*(new_meta++)) { - printf ("@ARGNAME@ = %s\n", new_meta); - new_meta += (strlen(new_meta) + 1); - } +if (*(new_meta++)) { + printf("@ARGNAME@ = %s\n", new_meta); + new_meta += (strlen(new_meta) + 1); +} #pragma fragment VECTOR - { - size_t len = *((size_t *)new_meta); - new_meta += sizeof(len); - printf ("@ARGNAME@ = <%zu bytes>\n", len); - new_data += len; - } +{ + size_t len = *((size_t *)new_meta); + new_meta += sizeof(len); + printf("@ARGNAME@ = <%zu bytes>\n", len); + new_data += len; +} #pragma fragment IATT - { - ia_prot_t *myprot = ((ia_prot_t *)new_meta); - printf ("@ARGNAME@ = iatt {\n"); - printf (" ia_prot = %c%c%c", - myprot->suid ? 'S' : '-', - myprot->sgid ? 'S' : '-', - myprot->sticky ? 'T' : '-'); - printf ("%c%c%c", - myprot->owner.read ? 'r' : '-', - myprot->owner.write ? 'w' : '-', - myprot->owner.exec ? 'x' : '-'); - printf ("%c%c%c", - myprot->group.read ? 'r' : '-', - myprot->group.write ? 'w' : '-', - myprot->group.exec ? 'x' : '-'); - printf ("%c%c%c\n", - myprot->other.read ? 'r' : '-', - myprot->other.write ? 'w' : '-', - myprot->other.exec ? 'x' : '-'); - new_meta += sizeof(ia_prot_t); - uint32_t *myints = (uint32_t *)new_meta; - printf (" ia_uid = %u\n", myints[0]); - printf (" ia_gid = %u\n", myints[1]); - printf (" ia_atime = %u.%09u\n", myints[2], myints[3]); - printf (" ia_mtime = %u.%09u\n", myints[4], myints[5]); - new_meta += sizeof(*myints) * 6; - } +{ + ia_prot_t *myprot = ((ia_prot_t *)new_meta); + printf("@ARGNAME@ = iatt {\n"); + printf(" ia_prot = %c%c%c", myprot->suid ? 'S' : '-', + myprot->sgid ? 'S' : '-', myprot->sticky ? 'T' : '-'); + printf("%c%c%c", myprot->owner.read ? 'r' : '-', + myprot->owner.write ? 'w' : '-', myprot->owner.exec ? 'x' : '-'); + printf("%c%c%c", myprot->group.read ? 'r' : '-', + myprot->group.write ? 'w' : '-', myprot->group.exec ? 'x' : '-'); + printf("%c%c%c\n", myprot->other.read ? 'r' : '-', + myprot->other.write ? 'w' : '-', myprot->other.exec ? 'x' : '-'); + new_meta += sizeof(ia_prot_t); + uint32_t *myints = (uint32_t *)new_meta; + printf(" ia_uid = %u\n", myints[0]); + printf(" ia_gid = %u\n", myints[1]); + printf(" ia_atime = %u.%09u\n", myints[2], myints[3]); + printf(" ia_mtime = %u.%09u\n", myints[4], myints[5]); + new_meta += sizeof(*myints) * 6; +} #pragma fragment FOP -void -fdl_dump_@NAME@ (char **old_meta, char **old_data) +void fdl_dump_ @NAME @(char **old_meta, char **old_data) { - char *new_meta = *old_meta; - char *new_data = *old_data; + char *new_meta = *old_meta; + char *new_data = *old_data; - /* TBD: word size/endianness */ -@FUNCTION_BODY@ + /* TBD: word size/endianness */ + @FUNCTION_BODY @ *old_meta = new_meta; - *old_data = new_data; + *old_data = new_data; } #pragma fragment CASE - case GF_FOP_@UPNAME@: - printf ("=== GF_FOP_@UPNAME@\n"); - fdl_dump_@NAME@ (&new_meta, &new_data); - break; +case GF_FOP_ @UPNAME @: + printf("=== GF_FOP_@UPNAME@\n"); + fdl_dump_ @NAME @(&new_meta, &new_data); + break; #pragma fragment EPILOG -int -fdl_dump (char **old_meta, char **old_data) -{ - char *new_meta = *old_meta; - char *new_data = *old_data; - static glfs_t *fs = NULL; - int recognized = 1; - event_header_t *eh; + int + fdl_dump(char **old_meta, char **old_data) + { + char *new_meta = *old_meta; + char *new_data = *old_data; + static glfs_t *fs = NULL; + int recognized = 1; + event_header_t *eh; /* * We don't really call anything else in GFAPI, but this is the most @@ -165,23 +155,22 @@ fdl_dump (char **old_meta, char **old_data) * or glusterfsd initialize (e.g. setting up THIS). */ if (!fs) { - fs = glfs_new ("dummy"); + fs = glfs_new("dummy"); } eh = (event_header_t *)new_meta; - new_meta += sizeof (*eh); + new_meta += sizeof(*eh); /* TBD: check event_type instead of assuming NEW_REQUEST */ switch (eh->fop_type) { -@SWITCH_BODY@ + @SWITCH_BODY @ - default: - printf ("unknown fop %u\n", eh->fop_type); - recognized = 0; + default : printf("unknown fop %u\n", eh->fop_type); + recognized = 0; } *old_meta = new_meta; *old_data = new_data; return recognized; -} + } diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c b/xlators/experimental/fdl/src/fdl-tmpl.c index 145dad7964a..7388b83e0bc 100644 --- a/xlators/experimental/fdl/src/fdl-tmpl.c +++ b/xlators/experimental/fdl/src/fdl-tmpl.c @@ -24,513 +24,489 @@ #include "fdl.h" /* TBD: make tunable */ -#define META_FILE_SIZE (1 << 20) -#define DATA_FILE_SIZE (1 << 24) +#define META_FILE_SIZE (1 << 20) +#define DATA_FILE_SIZE (1 << 24) -enum gf_fdl { - gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, - gf_fdl_mt_end -}; +enum gf_fdl { gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, gf_fdl_mt_end }; typedef struct { - char *type; - off_t size; - char *path; - int fd; - void * ptr; - off_t max_offset; + char *type; + off_t size; + char *path; + int fd; + void *ptr; + off_t max_offset; } log_obj_t; typedef struct { - struct list_head reqs; - pthread_mutex_t req_lock; - pthread_cond_t req_cond; - char *log_dir; - pthread_t worker; - gf_boolean_t should_stop; - gf_boolean_t change_term; - log_obj_t meta_log; - log_obj_t data_log; - int term; - int first_term; + struct list_head reqs; + pthread_mutex_t req_lock; + pthread_cond_t req_cond; + char *log_dir; + pthread_t worker; + gf_boolean_t should_stop; + gf_boolean_t change_term; + log_obj_t meta_log; + log_obj_t data_log; + int term; + int first_term; } fdl_private_t; int32_t -fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata); +fdl_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata); void -fdl_enqueue (xlator_t *this, call_stub_t *stub) +fdl_enqueue(xlator_t *this, call_stub_t *stub) { - fdl_private_t *priv = this->private; + fdl_private_t *priv = this->private; - pthread_mutex_lock (&priv->req_lock); - list_add_tail (&stub->list, &priv->reqs); - pthread_mutex_unlock (&priv->req_lock); + pthread_mutex_lock(&priv->req_lock); + list_add_tail(&stub->list, &priv->reqs); + pthread_mutex_unlock(&priv->req_lock); - pthread_cond_signal (&priv->req_cond); + pthread_cond_signal(&priv->req_cond); } #pragma generate char * -fdl_open_term_log (xlator_t *this, log_obj_t *obj, int term) +fdl_open_term_log(xlator_t *this, log_obj_t *obj, int term) { - fdl_private_t *priv = this->private; - int ret; - char * ptr = NULL; - - /* - * Use .jnl instead of .log so that we don't get test info (mistakenly) - * appended to our journal files. - */ - if (this->ctx->cmd_args.log_ident) { - ret = gf_asprintf (&obj->path, "%s/%s-%s-%d.jnl", - priv->log_dir, this->ctx->cmd_args.log_ident, - obj->type, term); - } - else { - ret = gf_asprintf (&obj->path, "%s/fubar-%s-%d.jnl", - priv->log_dir, obj->type, term); - } - if ((ret <= 0) || !obj->path) { - gf_log (this->name, GF_LOG_ERROR, - "failed to construct log-file path"); - goto err; - } - - gf_log (this->name, GF_LOG_INFO, "opening %s (size %ld)", - obj->path, obj->size); - - obj->fd = open (obj->path, O_RDWR|O_CREAT|O_TRUNC, 0666); - if (obj->fd < 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to open log file (%s)", strerror(errno)); - goto err; - } + fdl_private_t *priv = this->private; + int ret; + char *ptr = NULL; + + /* + * Use .jnl instead of .log so that we don't get test info (mistakenly) + * appended to our journal files. + */ + if (this->ctx->cmd_args.log_ident) { + ret = gf_asprintf(&obj->path, "%s/%s-%s-%d.jnl", priv->log_dir, + this->ctx->cmd_args.log_ident, obj->type, term); + } else { + ret = gf_asprintf(&obj->path, "%s/fubar-%s-%d.jnl", priv->log_dir, + obj->type, term); + } + if ((ret <= 0) || !obj->path) { + gf_log(this->name, GF_LOG_ERROR, "failed to construct log-file path"); + goto err; + } + + gf_log(this->name, GF_LOG_INFO, "opening %s (size %ld)", obj->path, + obj->size); + + obj->fd = open(obj->path, O_RDWR | O_CREAT | O_TRUNC, 0666); + if (obj->fd < 0) { + gf_log(this->name, GF_LOG_ERROR, "failed to open log file (%s)", + strerror(errno)); + goto err; + } #if !defined(GF_BSD_HOST_OS) - /* - * NetBSD can just go die in a fire. Even though it claims to support - * fallocate/posix_fallocate they don't actually *do* anything so the - * file size remains zero. Then mmap succeeds anyway, but any access - * to the mmap'ed region will segfault. It would be acceptable for - * fallocate to do what it says, for mmap to fail, or for access to - * extend the file. NetBSD managed to hit the trifecta of Getting - * Everything Wrong, and debugging in that environment to get this far - * has already been painful enough (systems I worked on in 1990 were - * better that way). We'll fall through to the lseek/write method, and - * performance will be worse, and TOO BAD. - */ - if (sys_fallocate(obj->fd,0,0,obj->size) < 0) + /* + * NetBSD can just go die in a fire. Even though it claims to support + * fallocate/posix_fallocate they don't actually *do* anything so the + * file size remains zero. Then mmap succeeds anyway, but any access + * to the mmap'ed region will segfault. It would be acceptable for + * fallocate to do what it says, for mmap to fail, or for access to + * extend the file. NetBSD managed to hit the trifecta of Getting + * Everything Wrong, and debugging in that environment to get this far + * has already been painful enough (systems I worked on in 1990 were + * better that way). We'll fall through to the lseek/write method, and + * performance will be worse, and TOO BAD. + */ + if (sys_fallocate(obj->fd, 0, 0, obj->size) < 0) #endif - { - gf_log (this->name, GF_LOG_WARNING, - "failed to fallocate space for log file"); - /* Have to do this the ugly page-faulty way. */ - (void) sys_lseek (obj->fd, obj->size-1, SEEK_SET); - (void) sys_write (obj->fd, "", 1); - } - - ptr = mmap (NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0); - if (ptr == MAP_FAILED) { - gf_log (this->name, GF_LOG_ERROR, "failed to mmap log (%s)", - strerror(errno)); - goto err; - } - - obj->ptr = ptr; - obj->max_offset = 0; - return ptr; + { + gf_log(this->name, GF_LOG_WARNING, + "failed to fallocate space for log file"); + /* Have to do this the ugly page-faulty way. */ + (void)sys_lseek(obj->fd, obj->size - 1, SEEK_SET); + (void)sys_write(obj->fd, "", 1); + } + + ptr = mmap(NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0); + if (ptr == MAP_FAILED) { + gf_log(this->name, GF_LOG_ERROR, "failed to mmap log (%s)", + strerror(errno)); + goto err; + } + + obj->ptr = ptr; + obj->max_offset = 0; + return ptr; err: - if (obj->fd >= 0) { - sys_close (obj->fd); - obj->fd = (-1); - } - if (obj->path) { - GF_FREE (obj->path); - obj->path = NULL; - } - return ptr; + if (obj->fd >= 0) { + sys_close(obj->fd); + obj->fd = (-1); + } + if (obj->path) { + GF_FREE(obj->path); + obj->path = NULL; + } + return ptr; } void -fdl_close_term_log (xlator_t *this, log_obj_t *obj) +fdl_close_term_log(xlator_t *this, log_obj_t *obj) { - fdl_private_t *priv = this->private; - - if (obj->ptr) { - (void) munmap (obj->ptr, obj->size); - obj->ptr = NULL; - } - - if (obj->fd >= 0) { - gf_log (this->name, GF_LOG_INFO, - "truncating term %d %s journal to %ld", - priv->term, obj->type, obj->max_offset); - if (sys_ftruncate(obj->fd,obj->max_offset) < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to truncate journal (%s)", - strerror(errno)); - } - sys_close (obj->fd); - obj->fd = (-1); - } - - if (obj->path) { - GF_FREE (obj->path); - obj->path = NULL; + fdl_private_t *priv = this->private; + + if (obj->ptr) { + (void)munmap(obj->ptr, obj->size); + obj->ptr = NULL; + } + + if (obj->fd >= 0) { + gf_log(this->name, GF_LOG_INFO, "truncating term %d %s journal to %ld", + priv->term, obj->type, obj->max_offset); + if (sys_ftruncate(obj->fd, obj->max_offset) < 0) { + gf_log(this->name, GF_LOG_WARNING, + "failed to truncate journal (%s)", strerror(errno)); } + sys_close(obj->fd); + obj->fd = (-1); + } + + if (obj->path) { + GF_FREE(obj->path); + obj->path = NULL; + } } gf_boolean_t -fdl_change_term (xlator_t *this, char **meta_ptr, char **data_ptr) +fdl_change_term(xlator_t *this, char **meta_ptr, char **data_ptr) { - fdl_private_t *priv = this->private; + fdl_private_t *priv = this->private; - fdl_close_term_log (this, &priv->meta_log); - fdl_close_term_log (this, &priv->data_log); + fdl_close_term_log(this, &priv->meta_log); + fdl_close_term_log(this, &priv->data_log); - ++(priv->term); + ++(priv->term); - *meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); - if (!*meta_ptr) { - return _gf_false; - } + *meta_ptr = fdl_open_term_log(this, &priv->meta_log, priv->term); + if (!*meta_ptr) { + return _gf_false; + } - *data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); - if (!*data_ptr) { - return _gf_false; - } + *data_ptr = fdl_open_term_log(this, &priv->data_log, priv->term); + if (!*data_ptr) { + return _gf_false; + } - return _gf_true; + return _gf_true; } void * -fdl_worker (void *arg) +fdl_worker(void *arg) { - xlator_t *this = arg; - fdl_private_t *priv = this->private; - call_stub_t *stub; - char * meta_ptr = NULL; - off_t *meta_offset = &priv->meta_log.max_offset; - char * data_ptr = NULL; - off_t *data_offset = &priv->data_log.max_offset; - unsigned long base_as_ul; - void * msync_ptr; - size_t msync_len; - gf_boolean_t recycle; - void *err_label = &&err_unlocked; - - priv->meta_log.type = "meta"; - priv->meta_log.size = META_FILE_SIZE; - priv->meta_log.path = NULL; - priv->meta_log.fd = (-1); - priv->meta_log.ptr = NULL; - - priv->data_log.type = "data"; - priv->data_log.size = DATA_FILE_SIZE; - priv->data_log.path = NULL; - priv->data_log.fd = (-1); - priv->data_log.ptr = NULL; - - /* TBD: initial term should come from persistent storage (e.g. etcd) */ - priv->first_term = ++(priv->term); - meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); - if (!meta_ptr) { + xlator_t *this = arg; + fdl_private_t *priv = this->private; + call_stub_t *stub; + char *meta_ptr = NULL; + off_t *meta_offset = &priv->meta_log.max_offset; + char *data_ptr = NULL; + off_t *data_offset = &priv->data_log.max_offset; + unsigned long base_as_ul; + void *msync_ptr; + size_t msync_len; + gf_boolean_t recycle; + void *err_label = &&err_unlocked; + + priv->meta_log.type = "meta"; + priv->meta_log.size = META_FILE_SIZE; + priv->meta_log.path = NULL; + priv->meta_log.fd = (-1); + priv->meta_log.ptr = NULL; + + priv->data_log.type = "data"; + priv->data_log.size = DATA_FILE_SIZE; + priv->data_log.path = NULL; + priv->data_log.fd = (-1); + priv->data_log.ptr = NULL; + + /* TBD: initial term should come from persistent storage (e.g. etcd) */ + priv->first_term = ++(priv->term); + meta_ptr = fdl_open_term_log(this, &priv->meta_log, priv->term); + if (!meta_ptr) { + goto *err_label; + } + data_ptr = fdl_open_term_log(this, &priv->data_log, priv->term); + if (!data_ptr) { + fdl_close_term_log(this, &priv->meta_log); + goto *err_label; + } + + for (;;) { + pthread_mutex_lock(&priv->req_lock); + err_label = &&err_locked; + while (list_empty(&priv->reqs)) { + pthread_cond_wait(&priv->req_cond, &priv->req_lock); + if (priv->should_stop) { goto *err_label; + } + if (priv->change_term) { + if (!fdl_change_term(this, &meta_ptr, &data_ptr)) { + goto *err_label; + } + priv->change_term = _gf_false; + continue; + } } - data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); - if (!data_ptr) { - fdl_close_term_log (this, &priv->meta_log); - goto *err_label; + stub = list_entry(priv->reqs.next, call_stub_t, list); + list_del_init(&stub->list); + pthread_mutex_unlock(&priv->req_lock); + err_label = &&err_unlocked; + /* + * TBD: batch requests + * + * What we should do here is gather up *all* of the requests + * that have accumulated since we were last at this point, + * blast them all out in one big writev, and then dispatch them + * all before coming back for more. That maximizes throughput, + * at some cost to latency (due to queuing effects at the log + * stage). Note that we're likely to be above io-threads, so + * the dispatch itself will be parallelized (at further cost to + * latency). For now, we just do the simplest thing and handle + * one request all the way through before fetching the next. + * + * So, why mmap/msync instead of writev/fdatasync? Because it's + * faster. Much faster. So much faster that I half-suspect + * cheating, but it's more convenient for now than having to + * ensure that everything's page-aligned for O_DIRECT (the only + * alternative that still might avoid ridiculous levels of + * local-FS overhead). + * + * TBD: check that msync really does get our data to disk. + */ + gf_log(this->name, GF_LOG_DEBUG, "logging %u+%u bytes for op %d", + stub->jnl_meta_len, stub->jnl_data_len, stub->fop); + recycle = _gf_false; + if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) { + recycle = _gf_true; } - - for (;;) { - pthread_mutex_lock (&priv->req_lock); - err_label = &&err_locked; - while (list_empty(&priv->reqs)) { - pthread_cond_wait (&priv->req_cond, &priv->req_lock); - if (priv->should_stop) { - goto *err_label; - } - if (priv->change_term) { - if (!fdl_change_term(this, &meta_ptr, - &data_ptr)) { - goto *err_label; - } - priv->change_term = _gf_false; - continue; - } - } - stub = list_entry (priv->reqs.next, call_stub_t, list); - list_del_init (&stub->list); - pthread_mutex_unlock (&priv->req_lock); - err_label = &&err_unlocked; - /* - * TBD: batch requests - * - * What we should do here is gather up *all* of the requests - * that have accumulated since we were last at this point, - * blast them all out in one big writev, and then dispatch them - * all before coming back for more. That maximizes throughput, - * at some cost to latency (due to queuing effects at the log - * stage). Note that we're likely to be above io-threads, so - * the dispatch itself will be parallelized (at further cost to - * latency). For now, we just do the simplest thing and handle - * one request all the way through before fetching the next. - * - * So, why mmap/msync instead of writev/fdatasync? Because it's - * faster. Much faster. So much faster that I half-suspect - * cheating, but it's more convenient for now than having to - * ensure that everything's page-aligned for O_DIRECT (the only - * alternative that still might avoid ridiculous levels of - * local-FS overhead). - * - * TBD: check that msync really does get our data to disk. - */ - gf_log (this->name, GF_LOG_DEBUG, - "logging %u+%u bytes for op %d", - stub->jnl_meta_len, stub->jnl_data_len, stub->fop); - recycle = _gf_false; - if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) { - recycle = _gf_true; - } - if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) { - recycle = _gf_true; - } - if (recycle && !fdl_change_term(this,&meta_ptr,&data_ptr)) { - goto *err_label; - } - meta_ptr = priv->meta_log.ptr; - data_ptr = priv->data_log.ptr; - gf_log (this->name, GF_LOG_DEBUG, "serializing to %p/%p", - meta_ptr + *meta_offset, data_ptr + *data_offset); - stub->serialize (stub, meta_ptr + *meta_offset, - data_ptr + *data_offset); - if (stub->jnl_meta_len > 0) { - base_as_ul = (unsigned long) (meta_ptr + *meta_offset); - msync_ptr = (void *) (base_as_ul & ~0x0fff); - msync_len = (size_t) (base_as_ul & 0x0fff); - if (msync (msync_ptr, msync_len+stub->jnl_meta_len, - MS_SYNC) < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to log request meta (%s)", - strerror(errno)); - } - *meta_offset += stub->jnl_meta_len; - } - if (stub->jnl_data_len > 0) { - base_as_ul = (unsigned long) (data_ptr + *data_offset); - msync_ptr = (void *) (base_as_ul & ~0x0fff); - msync_len = (size_t) (base_as_ul & 0x0fff); - if (msync (msync_ptr, msync_len+stub->jnl_data_len, - MS_SYNC) < 0) { - gf_log (this->name, GF_LOG_WARNING, - "failed to log request data (%s)", - strerror(errno)); - } - *data_offset += stub->jnl_data_len; - } - call_resume (stub); + if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) { + recycle = _gf_true; + } + if (recycle && !fdl_change_term(this, &meta_ptr, &data_ptr)) { + goto *err_label; + } + meta_ptr = priv->meta_log.ptr; + data_ptr = priv->data_log.ptr; + gf_log(this->name, GF_LOG_DEBUG, "serializing to %p/%p", + meta_ptr + *meta_offset, data_ptr + *data_offset); + stub->serialize(stub, meta_ptr + *meta_offset, data_ptr + *data_offset); + if (stub->jnl_meta_len > 0) { + base_as_ul = (unsigned long)(meta_ptr + *meta_offset); + msync_ptr = (void *)(base_as_ul & ~0x0fff); + msync_len = (size_t)(base_as_ul & 0x0fff); + if (msync(msync_ptr, msync_len + stub->jnl_meta_len, MS_SYNC) < 0) { + gf_log(this->name, GF_LOG_WARNING, + "failed to log request meta (%s)", strerror(errno)); + } + *meta_offset += stub->jnl_meta_len; } + if (stub->jnl_data_len > 0) { + base_as_ul = (unsigned long)(data_ptr + *data_offset); + msync_ptr = (void *)(base_as_ul & ~0x0fff); + msync_len = (size_t)(base_as_ul & 0x0fff); + if (msync(msync_ptr, msync_len + stub->jnl_data_len, MS_SYNC) < 0) { + gf_log(this->name, GF_LOG_WARNING, + "failed to log request data (%s)", strerror(errno)); + } + *data_offset += stub->jnl_data_len; + } + call_resume(stub); + } err_locked: - pthread_mutex_unlock (&priv->req_lock); + pthread_mutex_unlock(&priv->req_lock); err_unlocked: - fdl_close_term_log (this, &priv->meta_log); - fdl_close_term_log (this, &priv->data_log); - return NULL; + fdl_close_term_log(this, &priv->meta_log); + fdl_close_term_log(this, &priv->data_log); + return NULL; } int32_t -fdl_ipc_continue (call_frame_t *frame, xlator_t *this, - int32_t op, dict_t *xdata) +fdl_ipc_continue(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) { - /* - * Nothing to be done here. Just Unwind. * - */ - STACK_UNWIND_STRICT (ipc, frame, 0, 0, xdata); + /* + * Nothing to be done here. Just Unwind. * + */ + STACK_UNWIND_STRICT(ipc, frame, 0, 0, xdata); - return 0; + return 0; } int32_t -fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +fdl_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) { - call_stub_t *stub; - fdl_private_t *priv = this->private; - dict_t *tdict; - int32_t gt_err = EIO; - - switch (op) { + call_stub_t *stub; + fdl_private_t *priv = this->private; + dict_t *tdict; + int32_t gt_err = EIO; + switch (op) { case FDL_IPC_CHANGE_TERM: - gf_log (this->name, GF_LOG_INFO, "got CHANGE_TERM op"); - priv->change_term = _gf_true; - pthread_cond_signal (&priv->req_cond); - STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL); - break; + gf_log(this->name, GF_LOG_INFO, "got CHANGE_TERM op"); + priv->change_term = _gf_true; + pthread_cond_signal(&priv->req_cond); + STACK_UNWIND_STRICT(ipc, frame, 0, 0, NULL); + break; case FDL_IPC_GET_TERMS: - gf_log (this->name, GF_LOG_INFO, "got GET_TERMS op"); - tdict = dict_new (); - if (!tdict) { - gt_err = ENOMEM; - goto gt_done; - } - if (dict_set_int32(tdict,"first",priv->first_term) != 0) { - goto gt_done; - } - if (dict_set_int32(tdict,"last",priv->term) != 0) { - goto gt_done; - } - gt_err = 0; + gf_log(this->name, GF_LOG_INFO, "got GET_TERMS op"); + tdict = dict_new(); + if (!tdict) { + gt_err = ENOMEM; + goto gt_done; + } + if (dict_set_int32(tdict, "first", priv->first_term) != 0) { + goto gt_done; + } + if (dict_set_int32(tdict, "last", priv->term) != 0) { + goto gt_done; + } + gt_err = 0; gt_done: - if (gt_err) { - STACK_UNWIND_STRICT (ipc, frame, -1, gt_err, NULL); - } else { - STACK_UNWIND_STRICT (ipc, frame, 0, 0, tdict); - } - if (tdict) { - dict_unref (tdict); - } - break; + if (gt_err) { + STACK_UNWIND_STRICT(ipc, frame, -1, gt_err, NULL); + } else { + STACK_UNWIND_STRICT(ipc, frame, 0, 0, tdict); + } + if (tdict) { + dict_unref(tdict); + } + break; case FDL_IPC_JBR_SERVER_ROLLBACK: - /* - * In case of a rollback from jbr-server, dump * - * the term and index number in the journal, * - * which will later be used to rollback the fop * - */ - stub = fop_ipc_stub (frame, fdl_ipc_continue, - op, xdata); - fdl_len_ipc (stub); - stub->serialize = fdl_serialize_ipc; - fdl_enqueue (this, stub); - - break; + /* + * In case of a rollback from jbr-server, dump * + * the term and index number in the journal, * + * which will later be used to rollback the fop * + */ + stub = fop_ipc_stub(frame, fdl_ipc_continue, op, xdata); + fdl_len_ipc(stub); + stub->serialize = fdl_serialize_ipc; + fdl_enqueue(this, stub); + + break; default: - STACK_WIND_TAIL (frame, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ipc, - op, xdata); - } + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, op, xdata); + } - return 0; + return 0; } int -fdl_init (xlator_t *this) +fdl_init(xlator_t *this) { - fdl_private_t *priv = NULL; - - priv = GF_CALLOC (1, sizeof (*priv), gf_fdl_mt_fdl_private_t); - if (!priv) { - gf_log (this->name, GF_LOG_ERROR, - "failed to allocate fdl_private"); - goto err; - } - - INIT_LIST_HEAD (&priv->reqs); - if (pthread_mutex_init (&priv->req_lock, NULL) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to initialize req_lock"); - goto err; - } - if (pthread_cond_init (&priv->req_cond, NULL) != 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to initialize req_cond"); - goto err; - } - - GF_OPTION_INIT ("log-path", priv->log_dir, path, err); - - this->private = priv; - /* - * The rest of the fop table is automatically generated, so this is a - * bit cleaner than messing with the generation to add a hand-written - * exception. - */ - - if (gf_thread_create (&priv->worker, NULL, fdl_worker, this, - "fdlwrker") != 0) { - gf_log (this->name, GF_LOG_ERROR, - "failed to start fdl_worker"); - goto err; - } - - return 0; + fdl_private_t *priv = NULL; + + priv = GF_CALLOC(1, sizeof(*priv), gf_fdl_mt_fdl_private_t); + if (!priv) { + gf_log(this->name, GF_LOG_ERROR, "failed to allocate fdl_private"); + goto err; + } + + INIT_LIST_HEAD(&priv->reqs); + if (pthread_mutex_init(&priv->req_lock, NULL) != 0) { + gf_log(this->name, GF_LOG_ERROR, "failed to initialize req_lock"); + goto err; + } + if (pthread_cond_init(&priv->req_cond, NULL) != 0) { + gf_log(this->name, GF_LOG_ERROR, "failed to initialize req_cond"); + goto err; + } + + GF_OPTION_INIT("log-path", priv->log_dir, path, err); + + this->private = priv; + /* + * The rest of the fop table is automatically generated, so this is a + * bit cleaner than messing with the generation to add a hand-written + * exception. + */ + + if (gf_thread_create(&priv->worker, NULL, fdl_worker, this, "fdlwrker") != + 0) { + gf_log(this->name, GF_LOG_ERROR, "failed to start fdl_worker"); + goto err; + } + + return 0; err: - if (priv) { - GF_FREE(priv); - } - return -1; + if (priv) { + GF_FREE(priv); + } + return -1; } void -fdl_fini (xlator_t *this) +fdl_fini(xlator_t *this) { - fdl_private_t *priv = this->private; - - if (priv) { - priv->should_stop = _gf_true; - pthread_cond_signal (&priv->req_cond); - pthread_join (priv->worker, NULL); - GF_FREE(priv); - } + fdl_private_t *priv = this->private; + + if (priv) { + priv->should_stop = _gf_true; + pthread_cond_signal(&priv->req_cond); + pthread_join(priv->worker, NULL); + GF_FREE(priv); + } } int -fdl_reconfigure (xlator_t *this, dict_t *options) +fdl_reconfigure(xlator_t *this, dict_t *options) { - fdl_private_t *priv = this->private; + fdl_private_t *priv = this->private; - GF_OPTION_RECONF ("log_dir", priv->log_dir, options, path, out); - /* TBD: react if it changed */ + GF_OPTION_RECONF("log_dir", priv->log_dir, options, path, out); + /* TBD: react if it changed */ out: - return 0; + return 0; } int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO ("fdl", this, out); + GF_VALIDATE_OR_GOTO("fdl", this, out); - ret = xlator_mem_acct_init (this, gf_fdl_mt_end + 1); + ret = xlator_mem_acct_init(this, gf_fdl_mt_end + 1); - if (ret != 0) { - gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" - "failed"); - return ret; - } -out: + if (ret != 0) { + gf_log(this->name, GF_LOG_ERROR, + "Memory accounting init" + "failed"); return ret; + } +out: + return ret; } class_methods_t class_methods = { - .init = fdl_init, - .fini = fdl_fini, - .reconfigure = fdl_reconfigure, - .notify = default_notify, + .init = fdl_init, + .fini = fdl_fini, + .reconfigure = fdl_reconfigure, + .notify = default_notify, }; struct volume_options options[] = { - { .key = {"log-path"}, - .type = GF_OPTION_TYPE_PATH, - .default_value = DEFAULT_LOG_FILE_DIRECTORY, - .description = "Directory for FDL files." - }, - { .key = {NULL} }, + {.key = {"log-path"}, + .type = GF_OPTION_TYPE_PATH, + .default_value = DEFAULT_LOG_FILE_DIRECTORY, + .description = "Directory for FDL files."}, + {.key = {NULL}}, }; struct xlator_cbks cbks = { - .release = default_release, - .releasedir = default_releasedir, - .forget = default_forget, + .release = default_release, + .releasedir = default_releasedir, + .forget = default_forget, }; diff --git a/xlators/experimental/fdl/src/logdump.c b/xlators/experimental/fdl/src/logdump.c index 7c979c32a04..6fbc5218d47 100644 --- a/xlators/experimental/fdl/src/logdump.c +++ b/xlators/experimental/fdl/src/logdump.c @@ -4,47 +4,48 @@ #include <unistd.h> #include <sys/mman.h> -extern int fdl_dump (char **, char **); +extern int +fdl_dump(char **, char **); int -main (int argc, char **argv) +main(int argc, char **argv) { - int meta_fd = (-1); - char *meta_buf = NULL; - int data_fd = (-1); - char *data_buf = NULL; - - meta_fd = open (argv[1], O_RDONLY); - if (meta_fd < 0) { - perror ("open"); - return EXIT_FAILURE; + int meta_fd = (-1); + char *meta_buf = NULL; + int data_fd = (-1); + char *data_buf = NULL; + + meta_fd = open(argv[1], O_RDONLY); + if (meta_fd < 0) { + perror("open"); + return EXIT_FAILURE; + } + + /* TBD: get proper length */ + meta_buf = mmap(NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0); + if (meta_buf == MAP_FAILED) { + perror("mmap"); + return EXIT_FAILURE; + } + + data_fd = open(argv[2], O_RDONLY); + if (data_fd < 0) { + perror("open"); + return EXIT_FAILURE; + } + + /* TBD: get proper length */ + data_buf = mmap(NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0); + if (data_buf == MAP_FAILED) { + perror("mmap"); + return EXIT_FAILURE; + } + + for (;;) { + if (!fdl_dump(&meta_buf, &data_buf)) { + break; } + } - /* TBD: get proper length */ - meta_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0); - if (meta_buf == MAP_FAILED) { - perror ("mmap"); - return EXIT_FAILURE; - } - - data_fd = open (argv[2], O_RDONLY); - if (data_fd < 0) { - perror ("open"); - return EXIT_FAILURE; - } - - /* TBD: get proper length */ - data_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0); - if (data_buf == MAP_FAILED) { - perror ("mmap"); - return EXIT_FAILURE; - } - - for (;;) { - if (!fdl_dump(&meta_buf,&data_buf)) { - break; - } - } - - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/xlators/experimental/fdl/src/recon-tmpl.c b/xlators/experimental/fdl/src/recon-tmpl.c index 228860401ae..4760eaad2e2 100644 --- a/xlators/experimental/fdl/src/recon-tmpl.c +++ b/xlators/experimental/fdl/src/recon-tmpl.c @@ -15,290 +15,283 @@ #define GFAPI_SUCCESS 0 inode_t * -recon_get_inode (glfs_t *fs, uuid_t gfid) +recon_get_inode(glfs_t *fs, uuid_t gfid) { - inode_t *inode; - loc_t loc = {NULL,}; - struct iatt iatt; - int ret; - inode_t *newinode; - - inode = inode_find (fs->active_subvol->itable, gfid); - if (inode) { - printf ("=== FOUND %s IN TABLE\n", uuid_utoa(gfid)); - return inode; - } - - loc.inode = inode_new (fs->active_subvol->itable); - if (!loc.inode) { - return NULL; - } - gf_uuid_copy (loc.inode->gfid, gfid); - gf_uuid_copy (loc.gfid, gfid); - - printf ("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid)); - - ret = syncop_lookup (fs->active_subvol, &loc, &iatt, - NULL, NULL, NULL); - if (ret != GFAPI_SUCCESS) { - fprintf (stderr, "syncop_lookup failed (%d)\n", ret); - return NULL; - } - - newinode = inode_link (loc.inode, NULL, NULL, &iatt); - if (newinode) { - inode_lookup (newinode); - } - - return newinode; + inode_t *inode; + loc_t loc = { + NULL, + }; + struct iatt iatt; + int ret; + inode_t *newinode; + + inode = inode_find(fs->active_subvol->itable, gfid); + if (inode) { + printf("=== FOUND %s IN TABLE\n", uuid_utoa(gfid)); + return inode; + } + + loc.inode = inode_new(fs->active_subvol->itable); + if (!loc.inode) { + return NULL; + } + gf_uuid_copy(loc.inode->gfid, gfid); + gf_uuid_copy(loc.gfid, gfid); + + printf("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid)); + + ret = syncop_lookup(fs->active_subvol, &loc, &iatt, NULL, NULL, NULL); + if (ret != GFAPI_SUCCESS) { + fprintf(stderr, "syncop_lookup failed (%d)\n", ret); + return NULL; + } + + newinode = inode_link(loc.inode, NULL, NULL, &iatt); + if (newinode) { + inode_lookup(newinode); + } + + return newinode; } #pragma fragment DICT - dict_t *@ARGNAME@; +dict_t *@ARGNAME @; - @ARGNAME@ = dict_new(); - if (!@ARGNAME@) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - - { - int key_len, data_len; - char *key_ptr; - int garbage; - for (;;) { - key_len = *((int *)new_meta); - new_meta += sizeof(int); - if (!key_len) { - break; - } - key_ptr = new_meta; - new_meta += key_len; - data_len = *((int *)new_meta); - new_meta += sizeof(int); - garbage = dict_set_static_bin (@ARGNAME@, key_ptr, - new_meta, data_len); - /* TBD: check error from dict_set_static_bin */ - (void)garbage; - new_meta += data_len; - } +@ARGNAME @ = dict_new(); +if (!@ARGNAME @) { + goto *err_label; +} +err_label = &&cleanup_ @ARGNAME @; + +{ + int key_len, data_len; + char *key_ptr; + int garbage; + for (;;) { + key_len = *((int *)new_meta); + new_meta += sizeof(int); + if (!key_len) { + break; } + key_ptr = new_meta; + new_meta += key_len; + data_len = *((int *)new_meta); + new_meta += sizeof(int); + garbage = dict_set_static_bin(@ARGNAME @, key_ptr, new_meta, data_len); + /* TBD: check error from dict_set_static_bin */ + (void)garbage; + new_meta += data_len; + } +} #pragma fragment DICT_CLEANUP -cleanup_@ARGNAME@: - dict_unref (@ARGNAME@); +cleanup_ @ARGNAME @ : dict_unref(@ARGNAME @); #pragma fragment DOUBLE - @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); - new_meta += sizeof(uint64_t); +@ARGTYPE @ @ARGNAME @ = *((@ARGTYPE @ *)new_meta); +new_meta += sizeof(uint64_t); #pragma fragment FD - inode_t *@ARGNAME@_ino; - fd_t *@ARGNAME@; +inode_t *@ARGNAME @_ino; +fd_t *@ARGNAME @; - @ARGNAME@_ino = recon_get_inode (fs, *((uuid_t *)new_meta)); - new_meta += 16; - if (!@ARGNAME@_ino) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@_ino; +@ARGNAME @_ino = recon_get_inode(fs, *((uuid_t *)new_meta)); +new_meta += 16; +if (!@ARGNAME @_ino) { + goto *err_label; +} +err_label = &&cleanup_ @ARGNAME @_ino; - @ARGNAME@ = fd_anonymous (@ARGNAME@_ino); - if (!@ARGNAME@) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; +@ARGNAME @ = fd_anonymous(@ARGNAME @_ino); +if (!@ARGNAME @) { + goto *err_label; +} +err_label = &&cleanup_ @ARGNAME @; #pragma fragment FD_CLEANUP -cleanup_@ARGNAME@: - fd_unref (@ARGNAME@); -cleanup_@ARGNAME@_ino: - inode_unref (@ARGNAME@_ino); +cleanup_ @ARGNAME @ : fd_unref(@ARGNAME @); +cleanup_ @ARGNAME @_ino : inode_unref(@ARGNAME @_ino); #pragma fragment NEW_FD - /* - * This pseudo-type is only used for create, and in that case we know - * we'll be using loc.inode, so it's not worth generalizing to take an - * extra argument. - */ - fd_t *@ARGNAME@ = fd_anonymous (loc.inode); - - if (!fd) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - new_meta += 16; +/* + * This pseudo-type is only used for create, and in that case we know + * we'll be using loc.inode, so it's not worth generalizing to take an + * extra argument. + */ +fd_t *@ARGNAME @ = fd_anonymous(loc.inode); + +if (!fd) { + goto *err_label; +} +err_label = &&cleanup_ @ARGNAME @; +new_meta += 16; #pragma fragment NEW_FD_CLEANUP -cleanup_@ARGNAME@: - fd_unref (@ARGNAME@); +cleanup_ @ARGNAME @ : fd_unref(@ARGNAME @); #pragma fragment INTEGER - @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); +@ARGTYPE @ @ARGNAME @ = *((@ARGTYPE @ *)new_meta); - new_meta += sizeof(@ARGTYPE@); +new_meta += sizeof(@ARGTYPE @); #pragma fragment LOC - loc_t @ARGNAME@ = { NULL, }; +loc_t @ARGNAME @ = { + NULL, +}; - @ARGNAME@.inode = recon_get_inode (fs, *((uuid_t *)new_meta)); - if (!@ARGNAME@.inode) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - gf_uuid_copy (@ARGNAME@.gfid, @ARGNAME@.inode->gfid); - new_meta += 16; - new_meta += 16; /* skip over pargfid */ - if (*(new_meta++)) { - @ARGNAME@.name = new_meta; - new_meta += strlen(new_meta) + 1; - } +@ARGNAME @.inode = recon_get_inode(fs, *((uuid_t *)new_meta)); +if (!@ARGNAME @.inode) { + goto *err_label; +} +err_label = &&cleanup_ @ARGNAME @; +gf_uuid_copy(@ARGNAME @.gfid, @ARGNAME @.inode->gfid); +new_meta += 16; +new_meta += 16; /* skip over pargfid */ +if (*(new_meta++)) { + @ARGNAME @.name = new_meta; + new_meta += strlen(new_meta) + 1; +} #pragma fragment LOC_CLEANUP -cleanup_@ARGNAME@: - loc_wipe (&@ARGNAME@); +cleanup_ @ARGNAME @ : loc_wipe(&@ARGNAME @); #pragma fragment PARENT_LOC - loc_t @ARGNAME@ = { NULL, }; - - new_meta += 16; /* skip over gfid */ - @ARGNAME@.parent = recon_get_inode (fs, *((uuid_t *)new_meta)); - if (!@ARGNAME@.parent) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; - gf_uuid_copy (@ARGNAME@.pargfid, @ARGNAME@.parent->gfid); - new_meta += 16; - if (!*(new_meta++)) { - goto *err_label; - } - @ARGNAME@.name = new_meta; - new_meta += strlen(new_meta) + 1; +loc_t @ARGNAME @ = { + NULL, +}; + +new_meta += 16; /* skip over gfid */ +@ARGNAME @.parent = recon_get_inode(fs, *((uuid_t *)new_meta)); +if (!@ARGNAME @.parent) { + goto *err_label; +} +err_label = &&cleanup_ @ARGNAME @; +gf_uuid_copy(@ARGNAME @.pargfid, @ARGNAME @.parent->gfid); +new_meta += 16; +if (!*(new_meta++)) { + goto *err_label; +} +@ARGNAME @.name = new_meta; +new_meta += strlen(new_meta) + 1; - @ARGNAME@.inode = inode_new (fs->active_subvol->itable); - if (!@ARGNAME@.inode) { - goto *err_label; - } +@ARGNAME @.inode = inode_new(fs->active_subvol->itable); +if (!@ARGNAME @.inode) { + goto *err_label; +} #pragma fragment PARENT_LOC_CLEANUP -cleanup_@ARGNAME@: - loc_wipe (&@ARGNAME@); +cleanup_ @ARGNAME @ : loc_wipe(&@ARGNAME @); #pragma fragment STRING - char *@ARGNAME@; - if (*(new_meta++)) { - @ARGNAME@ = new_meta; - new_meta += (strlen(new_meta) + 1); - } - else { - goto *err_label; - } +char *@ARGNAME @; +if (*(new_meta++)) { + @ARGNAME @ = new_meta; + new_meta += (strlen(new_meta) + 1); +} else { + goto *err_label; +} #pragma fragment VECTOR - struct iovec @ARGNAME@; +struct iovec @ARGNAME @; - @ARGNAME@.iov_len = *((size_t *)new_meta); - new_meta += sizeof(@ARGNAME@.iov_len); - @ARGNAME@.iov_base = new_data; - new_data += @ARGNAME@.iov_len; +@ARGNAME @.iov_len = *((size_t *)new_meta); +new_meta += sizeof(@ARGNAME @.iov_len); +@ARGNAME @.iov_base = new_data; +new_data += @ARGNAME @.iov_len; #pragma fragment IATT - struct iatt @ARGNAME@; - { - @ARGNAME@.ia_prot = *((ia_prot_t *)new_meta); - new_meta += sizeof(ia_prot_t); - uint32_t *myints = (uint32_t *)new_meta; - @ARGNAME@.ia_uid = myints[0]; - @ARGNAME@.ia_gid = myints[1]; - @ARGNAME@.ia_atime = myints[2]; - @ARGNAME@.ia_atime_nsec = myints[3]; - @ARGNAME@.ia_mtime = myints[4]; - @ARGNAME@.ia_mtime_nsec = myints[5]; - new_meta += sizeof(*myints) * 6; - } +struct iatt @ARGNAME @; +{ + @ARGNAME @.ia_prot = *((ia_prot_t *)new_meta); + new_meta += sizeof(ia_prot_t); + uint32_t *myints = (uint32_t *)new_meta; + @ARGNAME @.ia_uid = myints[0]; + @ARGNAME @.ia_gid = myints[1]; + @ARGNAME @.ia_atime = myints[2]; + @ARGNAME @.ia_atime_nsec = myints[3]; + @ARGNAME @.ia_mtime = myints[4]; + @ARGNAME @.ia_mtime_nsec = myints[5]; + new_meta += sizeof(*myints) * 6; +} #pragma fragment IOBREF - struct iobref *@ARGNAME@; +struct iobref *@ARGNAME @; - @ARGNAME@ = iobref_new(); - if (!@ARGNAME@) { - goto *err_label; - } - err_label = &&cleanup_@ARGNAME@; +@ARGNAME @ = iobref_new(); +if (!@ARGNAME @) { + goto *err_label; +} +err_label = &&cleanup_ @ARGNAME @; #pragma fragment IOBREF_CLEANUP -cleanup_@ARGNAME@: - iobref_unref (@ARGNAME@); +cleanup_ @ARGNAME @ : iobref_unref(@ARGNAME @); #pragma fragment LINK - /* TBD: check error */ - inode_t *new_inode = inode_link (@INODE_ARG@, NULL, NULL, @IATT_ARG@); - if (new_inode) { - inode_lookup (new_inode); - } +/* TBD: check error */ +inode_t *new_inode = inode_link(@INODE_ARG @, NULL, NULL, @IATT_ARG @); +if (new_inode) { + inode_lookup(new_inode); +} #pragma fragment FOP -int -fdl_replay_@NAME@ (glfs_t *fs, char **old_meta, char **old_data) +int fdl_replay_ @NAME @(glfs_t *fs, char **old_meta, char **old_data) { - char *new_meta = *old_meta; - char *new_data = *old_data; - int ret; - int status = 0xbad; - void *err_label = &&done; - -@FUNCTION_BODY@ - - ret = syncop_@NAME@ (fs->active_subvol, @SYNCOP_ARGS@, NULL); - if (ret != @SUCCESS_VALUE@) { - fprintf (stderr, "syncop_@NAME@ returned %d", ret); - goto *err_label; - } + char *new_meta = *old_meta; + char *new_data = *old_data; + int ret; + int status = 0xbad; + void *err_label = &&done; -@LINKS@ + @FUNCTION_BODY @ + + ret = syncop_ @NAME @(fs->active_subvol, @SYNCOP_ARGS @, NULL); + if (ret != @SUCCESS_VALUE @) { + fprintf(stderr, "syncop_@NAME@ returned %d", ret); + goto *err_label; + } + + @LINKS @ status = 0; -@CLEANUPS@ + @CLEANUPS @ -done: - *old_meta = new_meta; - *old_data = new_data; - return status; + done : *old_meta = new_meta; + *old_data = new_data; + return status; } #pragma fragment CASE - case GF_FOP_@UPNAME@: - printf ("=== GF_FOP_@UPNAME@\n"); - if (fdl_replay_@NAME@ (fs, &new_meta, &new_data) != 0) { - goto done; - } - recognized = 1; - break; +case GF_FOP_ @UPNAME @: + printf("=== GF_FOP_@UPNAME@\n"); + if (fdl_replay_ @NAME @(fs, &new_meta, &new_data) != 0) { + goto done; + } + recognized = 1; + break; #pragma fragment EPILOG -int -recon_execute (glfs_t *fs, char **old_meta, char **old_data) -{ - char *new_meta = *old_meta; - char *new_data = *old_data; - int recognized = 0; - event_header_t *eh; + int + recon_execute(glfs_t *fs, char **old_meta, char **old_data) + { + char *new_meta = *old_meta; + char *new_data = *old_data; + int recognized = 0; + event_header_t *eh; eh = (event_header_t *)new_meta; - new_meta += sizeof (*eh); + new_meta += sizeof(*eh); /* TBD: check event_type instead of assuming NEW_REQUEST */ switch (eh->fop_type) { -@SWITCH_BODY@ + @SWITCH_BODY @ - default: - printf ("unknown fop %u\n", eh->fop_type); + default : printf("unknown fop %u\n", eh->fop_type); } -done: + done: *old_meta = new_meta; *old_data = new_data; return recognized; -} + } diff --git a/xlators/experimental/fdl/src/recon.c b/xlators/experimental/fdl/src/recon.c index 14168a011e0..ec1bf37dad9 100644 --- a/xlators/experimental/fdl/src/recon.c +++ b/xlators/experimental/fdl/src/recon.c @@ -11,79 +11,79 @@ #define GFAPI_SUCCESS 0 -extern int recon_execute (glfs_t *, char **, char **); +extern int +recon_execute(glfs_t *, char **, char **); int -main (int argc, char **argv) +main(int argc, char **argv) { - glfs_t *fs; - int ret; - int meta_fd = (-1); - char *meta_buf = NULL; - int data_fd = (-1); - char *data_buf = NULL; + glfs_t *fs; + int ret; + int meta_fd = (-1); + char *meta_buf = NULL; + int data_fd = (-1); + char *data_buf = NULL; - fs = glfs_new ("whocares"); - if (!fs) { - fprintf (stderr, "glfs_new failed\n"); - return EXIT_FAILURE; - } + fs = glfs_new("whocares"); + if (!fs) { + fprintf(stderr, "glfs_new failed\n"); + return EXIT_FAILURE; + } - if (getenv("RECON_DEBUG")) { - ret = glfs_set_logging (fs, "/dev/stderr", 7); - } - else { - ret = glfs_set_logging (fs, "/dev/null", 0); - } + if (getenv("RECON_DEBUG")) { + ret = glfs_set_logging(fs, "/dev/stderr", 7); + } else { + ret = glfs_set_logging(fs, "/dev/null", 0); + } - if (ret != GFAPI_SUCCESS) { - fprintf (stderr, "glfs_set_logging failed (%d)\n", errno); - return EXIT_FAILURE; - } + if (ret != GFAPI_SUCCESS) { + fprintf(stderr, "glfs_set_logging failed (%d)\n", errno); + return EXIT_FAILURE; + } - ret = glfs_set_volfile (fs, argv[1]); - if (ret != GFAPI_SUCCESS) { - fprintf (stderr, "glfs_set_volfile failed (%d)\n", errno); - return EXIT_FAILURE; - } + ret = glfs_set_volfile(fs, argv[1]); + if (ret != GFAPI_SUCCESS) { + fprintf(stderr, "glfs_set_volfile failed (%d)\n", errno); + return EXIT_FAILURE; + } - ret = glfs_init (fs); - if (ret != GFAPI_SUCCESS) { - fprintf (stderr, "glfs_init failed (%d)\n", errno); - return EXIT_FAILURE; - } + ret = glfs_init(fs); + if (ret != GFAPI_SUCCESS) { + fprintf(stderr, "glfs_init failed (%d)\n", errno); + return EXIT_FAILURE; + } - meta_fd = open (argv[2], O_RDONLY); - if (meta_fd < 0) { - perror ("open"); - return EXIT_FAILURE; - } + meta_fd = open(argv[2], O_RDONLY); + if (meta_fd < 0) { + perror("open"); + return EXIT_FAILURE; + } - /* TBD: get proper length */ - meta_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0); - if (meta_buf == MAP_FAILED) { - perror ("mmap"); - return EXIT_FAILURE; - } + /* TBD: get proper length */ + meta_buf = mmap(NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0); + if (meta_buf == MAP_FAILED) { + perror("mmap"); + return EXIT_FAILURE; + } - data_fd = open (argv[3], O_RDONLY); - if (data_fd < 0) { - perror ("open"); - return EXIT_FAILURE; - } + data_fd = open(argv[3], O_RDONLY); + if (data_fd < 0) { + perror("open"); + return EXIT_FAILURE; + } - /* TBD: get proper length */ - data_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0); - if (data_buf == MAP_FAILED) { - perror ("mmap"); - return EXIT_FAILURE; - } + /* TBD: get proper length */ + data_buf = mmap(NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0); + if (data_buf == MAP_FAILED) { + perror("mmap"); + return EXIT_FAILURE; + } - for (;;) { - if (!recon_execute(fs,&meta_buf,&data_buf)) { - break; - } + for (;;) { + if (!recon_execute(fs, &meta_buf, &data_buf)) { + break; } + } - return EXIT_SUCCESS; + return EXIT_SUCCESS; } diff --git a/xlators/experimental/jbr-client/src/fop-template.c b/xlators/experimental/jbr-client/src/fop-template.c index 7719f511f01..d2f2212c480 100644 --- a/xlators/experimental/jbr-client/src/fop-template.c +++ b/xlators/experimental/jbr-client/src/fop-template.c @@ -1,113 +1,104 @@ /* template-name fop */ -int32_t -jbrc_@NAME@ (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) +int32_t jbrc_ @NAME @(call_frame_t *frame, xlator_t *this, @LONG_ARGS @) { - jbrc_local_t *local = NULL; - xlator_t *target_xl = ACTIVE_CHILD(this); + jbrc_local_t *local = NULL; + xlator_t *target_xl = ACTIVE_CHILD(this); - local = mem_get(this->local_pool); - if (!local) { - goto err; - } + local = mem_get(this->local_pool); + if (!local) { + goto err; + } - local->stub = fop_@NAME@_stub (frame, jbrc_@NAME@_continue, - @SHORT_ARGS@); - if (!local->stub) { - goto err; - } - local->curr_xl = target_xl; - local->scars = 0; + local->stub = fop_ @NAME + @_stub(frame, jbrc_ @NAME @_continue, @SHORT_ARGS @); + if (!local->stub) { + goto err; + } + local->curr_xl = target_xl; + local->scars = 0; - frame->local = local; - STACK_WIND_COOKIE (frame, jbrc_@NAME@_cbk, target_xl, - target_xl, target_xl->fops->@NAME@, - @SHORT_ARGS@); - return 0; + frame->local = local; + STACK_WIND_COOKIE(frame, jbrc_ @NAME @_cbk, target_xl, target_xl, + target_xl->fops->@NAME @, @SHORT_ARGS @); + return 0; err: - if (local) { - mem_put(local); - } - STACK_UNWIND_STRICT (@NAME@, frame, -1, ENOMEM, - @ERROR_ARGS@); - return 0; + if (local) { + mem_put(local); + } + STACK_UNWIND_STRICT(@NAME @, frame, -1, ENOMEM, @ERROR_ARGS @); + return 0; } /* template-name cbk */ -int32_t -jbrc_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - @LONG_ARGS@) +int32_t jbrc_ @NAME @_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, @LONG_ARGS @) { - jbrc_local_t *local = frame->local; - xlator_t *last_xl = cookie; - xlator_t *next_xl; - jbrc_private_t *priv = this->private; - struct timespec spec; + jbrc_local_t *local = frame->local; + xlator_t *last_xl = cookie; + xlator_t *next_xl; + jbrc_private_t *priv = this->private; + struct timespec spec; - if (op_ret != (-1)) { - if (local->scars) { - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_RETRY_MSG, - HILITE("retried %p OK"), frame->local); - } - priv->active = last_xl; - goto unwind; - } - if ((op_errno != EREMOTE) && (op_errno != ENOTCONN)) { - goto unwind; + if (op_ret != (-1)) { + if (local->scars) { + gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_RETRY_MSG, + HILITE("retried %p OK"), frame->local); } + priv->active = last_xl; + goto unwind; + } + if ((op_errno != EREMOTE) && (op_errno != ENOTCONN)) { + goto unwind; + } - /* TBD: get leader ID from xdata? */ - next_xl = next_xlator(this, last_xl); - /* - * We can't just give up after we've tried all bricks, because it's - * quite likely that a new leader election just hasn't finished yet. - * We also shouldn't retry endlessly, and especially not at a high - * rate, but that's good enough while we work on other things. - * - * TBD: implement slow/finite retry via a worker thread - */ - if (!next_xl || (local->scars >= SCAR_LIMIT)) { - gf_msg (this->name, GF_LOG_DEBUG, 0, J_MSG_RETRY_MSG, - HILITE("ran out of retries for %p"), frame->local); - goto unwind; - } + /* TBD: get leader ID from xdata? */ + next_xl = next_xlator(this, last_xl); + /* + * We can't just give up after we've tried all bricks, because it's + * quite likely that a new leader election just hasn't finished yet. + * We also shouldn't retry endlessly, and especially not at a high + * rate, but that's good enough while we work on other things. + * + * TBD: implement slow/finite retry via a worker thread + */ + if (!next_xl || (local->scars >= SCAR_LIMIT)) { + gf_msg(this->name, GF_LOG_DEBUG, 0, J_MSG_RETRY_MSG, + HILITE("ran out of retries for %p"), frame->local); + goto unwind; + } - local->curr_xl = next_xl; - local->scars += 1; - spec.tv_sec = 1; - spec.tv_nsec = 0; - /* - * WARNING - * - * Just calling gf_timer_call_after like this leaves open the - * possibility that writes will get reordered, if a first write is - * rescheduled and then a second comes along to find an updated - * priv->active before the first actually executes. We might need to - * implement a stricter (and more complicated) queuing mechanism to - * ensure absolute consistency in this case. - */ - if (gf_timer_call_after(this->ctx, spec, jbrc_retry_cb, local)) { - return 0; - } + local->curr_xl = next_xl; + local->scars += 1; + spec.tv_sec = 1; + spec.tv_nsec = 0; + /* + * WARNING + * + * Just calling gf_timer_call_after like this leaves open the + * possibility that writes will get reordered, if a first write is + * rescheduled and then a second comes along to find an updated + * priv->active before the first actually executes. We might need to + * implement a stricter (and more complicated) queuing mechanism to + * ensure absolute consistency in this case. + */ + if (gf_timer_call_after(this->ctx, spec, jbrc_retry_cb, local)) { + return 0; + } unwind: - call_stub_destroy(local->stub); - STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, - @SHORT_ARGS@); - return 0; + call_stub_destroy(local->stub); + STACK_UNWIND_STRICT(@NAME @, frame, op_ret, op_errno, @SHORT_ARGS @); + return 0; } /* template-name cont-func */ -int32_t -jbrc_@NAME@_continue (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) +int32_t jbrc_ @NAME + @_continue(call_frame_t *frame, xlator_t *this, @LONG_ARGS @) { - jbrc_local_t *local = frame->local; + jbrc_local_t *local = frame->local; - STACK_WIND_COOKIE (frame, jbrc_@NAME@_cbk, local->curr_xl, - local->curr_xl, local->curr_xl->fops->@NAME@, - @SHORT_ARGS@); - return 0; + STACK_WIND_COOKIE(frame, jbrc_ @NAME @_cbk, local->curr_xl, local->curr_xl, + local->curr_xl->fops->@NAME @, @SHORT_ARGS @); + return 0; } diff --git a/xlators/experimental/jbr-client/src/jbrc.c b/xlators/experimental/jbr-client/src/jbrc.c index 9bb9346c5c0..01c3020e117 100644 --- a/xlators/experimental/jbr-client/src/jbrc.c +++ b/xlators/experimental/jbr-client/src/jbrc.c @@ -20,8 +20,8 @@ #include "jbrc.h" #include "statedump.h" -#define SCAR_LIMIT 20 -#define HILITE(x) ("[1;33m"x"[0m") +#define SCAR_LIMIT 20 +#define HILITE(x) ("[1;33m" x "[0m") /* * The fops are actually generated by gen-fops.py; the rest was mostly copied @@ -29,292 +29,283 @@ */ enum gf_dht_mem_types_ { - gf_mt_jbrc_private_t = gf_common_mt_end + 1, - gf_mt_jbrc_end + gf_mt_jbrc_private_t = gf_common_mt_end + 1, + gf_mt_jbrc_end }; -char *JBRC_XATTR = "user.jbr.active"; +char *JBRC_XATTR = "user.jbr.active"; -static inline -xlator_t * -ACTIVE_CHILD (xlator_t *parent) +static inline xlator_t * +ACTIVE_CHILD(xlator_t *parent) { - jbrc_private_t *priv = parent->private; + jbrc_private_t *priv = parent->private; - return priv ? priv->active : FIRST_CHILD(parent); + return priv ? priv->active : FIRST_CHILD(parent); } xlator_t * -next_xlator (xlator_t *this, xlator_t *prev) +next_xlator(xlator_t *this, xlator_t *prev) { - xlator_list_t *trav; + xlator_list_t *trav; - for (trav = this->children; trav; trav = trav->next) { - if (trav->xlator == prev) { - return trav->next ? trav->next->xlator - : this->children->xlator; - } + for (trav = this->children; trav; trav = trav->next) { + if (trav->xlator == prev) { + return trav->next ? trav->next->xlator : this->children->xlator; } + } - return NULL; + return NULL; } void -jbrc_retry_cb (void *cb_arg) +jbrc_retry_cb(void *cb_arg) { - jbrc_local_t *local = cb_arg; + jbrc_local_t *local = cb_arg; - gf_msg (__func__, GF_LOG_INFO, 0, J_MSG_RETRY_MSG, - HILITE("retrying %p"), local); - call_resume_wind(local->stub); + gf_msg(__func__, GF_LOG_INFO, 0, J_MSG_RETRY_MSG, HILITE("retrying %p"), + local); + call_resume_wind(local->stub); } #pragma generate int32_t -jbrc_forget (xlator_t *this, inode_t *inode) +jbrc_forget(xlator_t *this, inode_t *inode) { - gf_msg_callingfn (this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL, - "xlator does not implement forget_cbk"); - return 0; + gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL, + "xlator does not implement forget_cbk"); + return 0; } - int32_t -jbrc_releasedir (xlator_t *this, fd_t *fd) +jbrc_releasedir(xlator_t *this, fd_t *fd) { - gf_msg_callingfn (this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL, - "xlator does not implement releasedir_cbk"); - return 0; + gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL, + "xlator does not implement releasedir_cbk"); + return 0; } int32_t -jbrc_release (xlator_t *this, fd_t *fd) +jbrc_release(xlator_t *this, fd_t *fd) { - gf_msg_callingfn (this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL, - "xlator does not implement release_cbk"); - return 0; + gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, J_MSG_INIT_FAIL, + "xlator does not implement release_cbk"); + return 0; } struct xlator_fops fops = { - .lookup = jbrc_lookup, - .stat = jbrc_stat, - .fstat = jbrc_fstat, - .truncate = jbrc_truncate, - .ftruncate = jbrc_ftruncate, - .access = jbrc_access, - .readlink = jbrc_readlink, - .mknod = jbrc_mknod, - .mkdir = jbrc_mkdir, - .unlink = jbrc_unlink, - .rmdir = jbrc_rmdir, - .symlink = jbrc_symlink, - .rename = jbrc_rename, - .link = jbrc_link, - .create = jbrc_create, - .open = jbrc_open, - .readv = jbrc_readv, - .writev = jbrc_writev, - .flush = jbrc_flush, - .fsync = jbrc_fsync, - .opendir = jbrc_opendir, - .readdir = jbrc_readdir, - .readdirp = jbrc_readdirp, - .fsyncdir = jbrc_fsyncdir, - .statfs = jbrc_statfs, - .setxattr = jbrc_setxattr, - .getxattr = jbrc_getxattr, - .fsetxattr = jbrc_fsetxattr, - .fgetxattr = jbrc_fgetxattr, - .removexattr = jbrc_removexattr, - .fremovexattr = jbrc_fremovexattr, - .lk = jbrc_lk, - .inodelk = jbrc_inodelk, - .finodelk = jbrc_finodelk, - .entrylk = jbrc_entrylk, - .fentrylk = jbrc_fentrylk, - .rchecksum = jbrc_rchecksum, - .xattrop = jbrc_xattrop, - .fxattrop = jbrc_fxattrop, - .setattr = jbrc_setattr, - .fsetattr = jbrc_fsetattr, - .fallocate = jbrc_fallocate, - .discard = jbrc_discard, -}; - -struct xlator_cbks cbks = { + .lookup = jbrc_lookup, + .stat = jbrc_stat, + .fstat = jbrc_fstat, + .truncate = jbrc_truncate, + .ftruncate = jbrc_ftruncate, + .access = jbrc_access, + .readlink = jbrc_readlink, + .mknod = jbrc_mknod, + .mkdir = jbrc_mkdir, + .unlink = jbrc_unlink, + .rmdir = jbrc_rmdir, + .symlink = jbrc_symlink, + .rename = jbrc_rename, + .link = jbrc_link, + .create = jbrc_create, + .open = jbrc_open, + .readv = jbrc_readv, + .writev = jbrc_writev, + .flush = jbrc_flush, + .fsync = jbrc_fsync, + .opendir = jbrc_opendir, + .readdir = jbrc_readdir, + .readdirp = jbrc_readdirp, + .fsyncdir = jbrc_fsyncdir, + .statfs = jbrc_statfs, + .setxattr = jbrc_setxattr, + .getxattr = jbrc_getxattr, + .fsetxattr = jbrc_fsetxattr, + .fgetxattr = jbrc_fgetxattr, + .removexattr = jbrc_removexattr, + .fremovexattr = jbrc_fremovexattr, + .lk = jbrc_lk, + .inodelk = jbrc_inodelk, + .finodelk = jbrc_finodelk, + .entrylk = jbrc_entrylk, + .fentrylk = jbrc_fentrylk, + .rchecksum = jbrc_rchecksum, + .xattrop = jbrc_xattrop, + .fxattrop = jbrc_fxattrop, + .setattr = jbrc_setattr, + .fsetattr = jbrc_fsetattr, + .fallocate = jbrc_fallocate, + .discard = jbrc_discard, }; +struct xlator_cbks cbks = {}; int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO ("jbrc", this, out); + GF_VALIDATE_OR_GOTO("jbrc", this, out); - ret = xlator_mem_acct_init (this, gf_mt_jbrc_end + 1); + ret = xlator_mem_acct_init(this, gf_mt_jbrc_end + 1); - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR, - "Memory accounting init failed"); - return ret; - } -out: + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR, + "Memory accounting init failed"); return ret; + } +out: + return ret; } - int32_t -jbrc_init (xlator_t *this) +jbrc_init(xlator_t *this) { - jbrc_private_t *priv = NULL; - xlator_list_t *trav = NULL; - - this->local_pool = mem_pool_new (jbrc_local_t, 128); - if (!this->local_pool) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR, - "failed to create jbrc_local_t pool"); - goto err; - } + jbrc_private_t *priv = NULL; + xlator_list_t *trav = NULL; - priv = GF_CALLOC (1, sizeof (*priv), gf_mt_jbrc_private_t); - if (!priv) { - goto err; - } + this->local_pool = mem_pool_new(jbrc_local_t, 128); + if (!this->local_pool) { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR, + "failed to create jbrc_local_t pool"); + goto err; + } - for (trav = this->children; trav; trav = trav->next) { - ++(priv->n_children); - } + priv = GF_CALLOC(1, sizeof(*priv), gf_mt_jbrc_private_t); + if (!priv) { + goto err; + } - priv->active = FIRST_CHILD(this); - this->private = priv; - return 0; + for (trav = this->children; trav; trav = trav->next) { + ++(priv->n_children); + } + + priv->active = FIRST_CHILD(this); + this->private = priv; + return 0; err: - if (priv) { - GF_FREE(priv); - } - return -1; + if (priv) { + GF_FREE(priv); + } + return -1; } void -jbrc_fini (xlator_t *this) +jbrc_fini(xlator_t *this) { - GF_FREE(this->private); + GF_FREE(this->private); } int -jbrc_get_child_index (xlator_t *this, xlator_t *kid) +jbrc_get_child_index(xlator_t *this, xlator_t *kid) { - xlator_list_t *trav; - int retval = -1; - - for (trav = this->children; trav; trav = trav->next) { - ++retval; - if (trav->xlator == kid) { - return retval; - } + xlator_list_t *trav; + int retval = -1; + + for (trav = this->children; trav; trav = trav->next) { + ++retval; + if (trav->xlator == kid) { + return retval; } + } - return -1; + return -1; } uint8_t -jbrc_count_up_kids (jbrc_private_t *priv) +jbrc_count_up_kids(jbrc_private_t *priv) { - uint8_t retval = 0; - uint8_t i; + uint8_t retval = 0; + uint8_t i; - for (i = 0; i < priv->n_children; ++i) { - if (priv->kid_state & (1 << i)) { - ++retval; - } + for (i = 0; i < priv->n_children; ++i) { + if (priv->kid_state & (1 << i)) { + ++retval; } + } - return retval; + return retval; } int32_t -jbrc_notify (xlator_t *this, int32_t event, void *data, ...) +jbrc_notify(xlator_t *this, int32_t event, void *data, ...) { - int32_t ret = 0; - int32_t index = 0; - jbrc_private_t *priv = NULL; + int32_t ret = 0; + int32_t index = 0; + jbrc_private_t *priv = NULL; - GF_VALIDATE_OR_GOTO (THIS->name, this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO(THIS->name, this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); - switch (event) { + switch (event) { case GF_EVENT_CHILD_UP: - index = jbrc_get_child_index(this, data); - if (index >= 0) { - priv->kid_state |= (1 << index); - priv->up_children = jbrc_count_up_kids(priv); - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, - "got CHILD_UP for %s, now %u kids", - ((xlator_t *)data)->name, - priv->up_children); - } - ret = default_notify (this, event, data); - break; + index = jbrc_get_child_index(this, data); + if (index >= 0) { + priv->kid_state |= (1 << index); + priv->up_children = jbrc_count_up_kids(priv); + gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, + "got CHILD_UP for %s, now %u kids", + ((xlator_t *)data)->name, priv->up_children); + } + ret = default_notify(this, event, data); + break; case GF_EVENT_CHILD_DOWN: - index = jbrc_get_child_index(this, data); - if (index >= 0) { - priv->kid_state &= ~(1 << index); - priv->up_children = jbrc_count_up_kids(priv); - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, - "got CHILD_DOWN for %s, now %u kids", - ((xlator_t *)data)->name, - priv->up_children); - } - break; + index = jbrc_get_child_index(this, data); + if (index >= 0) { + priv->kid_state &= ~(1 << index); + priv->up_children = jbrc_count_up_kids(priv); + gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, + "got CHILD_DOWN for %s, now %u kids", + ((xlator_t *)data)->name, priv->up_children); + } + break; default: - ret = default_notify (this, event, data); - } + ret = default_notify(this, event, data); + } out: - return ret; + return ret; } int -jbrc_priv_dump (xlator_t *this) +jbrc_priv_dump(xlator_t *this) { - jbrc_private_t *priv = NULL; - char key_prefix[GF_DUMP_MAX_BUF_LEN]; - xlator_list_t *trav = NULL; - int32_t i = -1; + jbrc_private_t *priv = NULL; + char key_prefix[GF_DUMP_MAX_BUF_LEN]; + xlator_list_t *trav = NULL; + int32_t i = -1; - GF_VALIDATE_OR_GOTO (THIS->name, this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); + GF_VALIDATE_OR_GOTO(THIS->name, this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); - snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", - this->type, this->name); - gf_proc_dump_add_section(key_prefix); + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name); + gf_proc_dump_add_section(key_prefix); - gf_proc_dump_write("up_children", "%u", priv->up_children); + gf_proc_dump_write("up_children", "%u", priv->up_children); - for (trav = this->children, i = 0; trav; trav = trav->next, i++) { - snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "child_%d", i); - gf_proc_dump_write(key_prefix, "%s", trav->xlator->name); - } + for (trav = this->children, i = 0; trav; trav = trav->next, i++) { + snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "child_%d", i); + gf_proc_dump_write(key_prefix, "%s", trav->xlator->name); + } out: - return 0; + return 0; } struct xlator_dumpops dumpops = { - .priv = jbrc_priv_dump, + .priv = jbrc_priv_dump, }; class_methods_t class_methods = { - .init = jbrc_init, - .fini = jbrc_fini, - .notify = jbrc_notify, + .init = jbrc_init, + .fini = jbrc_fini, + .notify = jbrc_notify, }; struct volume_options options[] = { - { .key = {NULL} }, + {.key = {NULL}}, }; diff --git a/xlators/experimental/jbr-server/src/all-templates.c b/xlators/experimental/jbr-server/src/all-templates.c index 530c4187571..a3ae74fea0f 100644 --- a/xlators/experimental/jbr-server/src/all-templates.c +++ b/xlators/experimental/jbr-server/src/all-templates.c @@ -3,46 +3,40 @@ * will be ignored until we reach the first template-name comment. */ - /* template-name read-fop */ -int32_t -jbr_@NAME@ (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) +int32_t jbr_ @NAME @(call_frame_t *frame, xlator_t *this, @LONG_ARGS @) { - jbr_private_t *priv = NULL; - gf_boolean_t in_recon = _gf_false; - int32_t op_errno = 0; - int32_t recon_term, recon_index; - - GF_VALIDATE_OR_GOTO ("jbr", this, err); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, err); - GF_VALIDATE_OR_GOTO (this->name, frame, err); - - op_errno = EREMOTE; - - /* allow reads during reconciliation * - * TBD: allow "dirty" reads on non-leaders * - */ - if (xdata && - (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) && - (dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) { - in_recon = _gf_true; - } - - if ((!priv->leader) && (in_recon == _gf_false)) { - goto err; - } - - STACK_WIND (frame, default_@NAME@_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, - @SHORT_ARGS@); - return 0; + jbr_private_t *priv = NULL; + gf_boolean_t in_recon = _gf_false; + int32_t op_errno = 0; + int32_t recon_term, recon_index; + + GF_VALIDATE_OR_GOTO("jbr", this, err); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, err); + GF_VALIDATE_OR_GOTO(this->name, frame, err); + + op_errno = EREMOTE; + + /* allow reads during reconciliation * + * TBD: allow "dirty" reads on non-leaders * + */ + if (xdata && (dict_get_int32(xdata, RECON_TERM_XATTR, &recon_term) == 0) && + (dict_get_int32(xdata, RECON_INDEX_XATTR, &recon_index) == 0)) { + in_recon = _gf_true; + } + + if ((!priv->leader) && (in_recon == _gf_false)) { + goto err; + } + + STACK_WIND(frame, default_ @NAME @_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME @, @SHORT_ARGS @); + return 0; err: - STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno, - @ERROR_ARGS@); - return 0; + STACK_UNWIND_STRICT(@NAME @, frame, -1, op_errno, @ERROR_ARGS @); + return 0; } /* template-name read-perform_local_op */ @@ -64,479 +58,445 @@ err: /* No "complete" function needed for @NAME@ */ /* template-name write-fop */ -int32_t -jbr_@NAME@ (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) +int32_t jbr_ @NAME @(call_frame_t *frame, xlator_t *this, @LONG_ARGS @) { - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - int32_t ret = -1; - int op_errno = ENOMEM; + jbr_local_t *local = NULL; + jbr_private_t *priv = NULL; + int32_t ret = -1; + int op_errno = ENOMEM; - GF_VALIDATE_OR_GOTO ("jbr", this, err); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, err); - GF_VALIDATE_OR_GOTO (this->name, frame, err); + GF_VALIDATE_OR_GOTO("jbr", this, err); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, err); + GF_VALIDATE_OR_GOTO(this->name, frame, err); #if defined(JBR_CG_NEED_FD) - ret = jbr_leader_checks_and_init (frame, this, &op_errno, xdata, fd); + ret = jbr_leader_checks_and_init(frame, this, &op_errno, xdata, fd); #else - ret = jbr_leader_checks_and_init (frame, this, &op_errno, xdata, NULL); + ret = jbr_leader_checks_and_init(frame, this, &op_errno, xdata, NULL); #endif - if (ret) - goto err; - - local = frame->local; - - /* - * If we let it through despite not being the leader, then we just want - * to pass it on down without all of the additional xattrs, queuing, and - * so on. However, jbr_*_complete does depend on the initialization - * immediately above this. - */ - if (!priv->leader) { - STACK_WIND (frame, jbr_@NAME@_complete, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, - @SHORT_ARGS@); - return 0; + if (ret) + goto err; + + local = frame->local; + + /* + * If we let it through despite not being the leader, then we just want + * to pass it on down without all of the additional xattrs, queuing, and + * so on. However, jbr_*_complete does depend on the initialization + * immediately above this. + */ + if (!priv->leader) { + STACK_WIND(frame, jbr_ @NAME @_complete, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME @, @SHORT_ARGS @); + return 0; + } + + ret = jbr_initialize_xdata_set_attrs(this, &xdata); + if (ret) + goto err; + + local->xdata = dict_ref(xdata); + local->stub = fop_ @NAME + @_stub(frame, jbr_ @NAME @_continue, @SHORT_ARGS @); + if (!local->stub) { + goto err; + } + + /* + * Can be used to just call_dispatch or be customised per fop to * + * perform ops specific to that particular fop. * + */ + ret = jbr_ @NAME @_perform_local_op(frame, this, &op_errno, @SHORT_ARGS @); + if (ret) + goto err; + + return ret; +err: + if (local) { + if (local->stub) { + call_stub_destroy(local->stub); } - - ret = jbr_initialize_xdata_set_attrs (this, &xdata); - if (ret) - goto err; - - local->xdata = dict_ref(xdata); - local->stub = fop_@NAME@_stub (frame, jbr_@NAME@_continue, - @SHORT_ARGS@); - if (!local->stub) { - goto err; + if (local->qstub) { + call_stub_destroy(local->qstub); } - - /* - * Can be used to just call_dispatch or be customised per fop to * - * perform ops specific to that particular fop. * - */ - ret = jbr_@NAME@_perform_local_op (frame, this, &op_errno, - @SHORT_ARGS@); - if (ret) - goto err; - - return ret; -err: - if (local) { - if (local->stub) { - call_stub_destroy(local->stub); - } - if (local->qstub) { - call_stub_destroy(local->qstub); - } - if (local->fd) { - fd_unref(local->fd); - } - mem_put(local); + if (local->fd) { + fd_unref(local->fd); } - STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno, - @ERROR_ARGS@); - return 0; + mem_put(local); + } + STACK_UNWIND_STRICT(@NAME @, frame, -1, op_errno, @ERROR_ARGS @); + return 0; } /* template-name write-perform_local_op */ -int32_t -jbr_@NAME@_perform_local_op (call_frame_t *frame, xlator_t *this, int *op_errno, - @LONG_ARGS@) +int32_t jbr_ @NAME @_perform_local_op(call_frame_t *frame, xlator_t *this, + int *op_errno, @LONG_ARGS @) { - int32_t ret = -1; + int32_t ret = -1; - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, op_errno, out); + GF_VALIDATE_OR_GOTO("jbr", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); - ret = jbr_@NAME@_call_dispatch (frame, this, op_errno, - @SHORT_ARGS@); + ret = jbr_ @NAME @_call_dispatch(frame, this, op_errno, @SHORT_ARGS @); out: - return ret; + return ret; } /* template-name write-call_dispatch */ -int32_t -jbr_@NAME@_call_dispatch (call_frame_t *frame, xlator_t *this, int *op_errno, - @LONG_ARGS@) +int32_t jbr_ @NAME @_call_dispatch(call_frame_t *frame, xlator_t *this, + int *op_errno, @LONG_ARGS @) { - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - int32_t ret = -1; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); - GF_VALIDATE_OR_GOTO (this->name, op_errno, out); + jbr_local_t *local = NULL; + jbr_private_t *priv = NULL; + int32_t ret = -1; + + GF_VALIDATE_OR_GOTO("jbr", this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, local, out); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); #if defined(JBR_CG_QUEUE) - jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode); - if (!ictx) { - *op_errno = EIO; - goto out; + jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode); + if (!ictx) { + *op_errno = EIO; + goto out; + } + + LOCK(&ictx->lock); + if (ictx->active) { + gf_msg_debug(this->name, 0, "queuing request due to conflict"); + /* + * TBD: enqueue only for real conflict + * + * Currently we just act like all writes are in + * conflict with one another. What we should really do + * is check the active/pending queues and defer only if + * there's a conflict there. + * + * It's important to check the pending queue because we + * might have an active request X which conflicts with + * a pending request Y, and this request Z might + * conflict with Y but not X. If we checked only the + * active queue then Z could jump ahead of Y, which + * would be incorrect. + */ + local->qstub = fop_ @NAME + @_stub(frame, jbr_ @NAME @_dispatch, @SHORT_ARGS @); + if (!local->qstub) { + UNLOCK(&ictx->lock); + goto out; } - - LOCK(&ictx->lock); - if (ictx->active) { - gf_msg_debug (this->name, 0, - "queuing request due to conflict"); - /* - * TBD: enqueue only for real conflict - * - * Currently we just act like all writes are in - * conflict with one another. What we should really do - * is check the active/pending queues and defer only if - * there's a conflict there. - * - * It's important to check the pending queue because we - * might have an active request X which conflicts with - * a pending request Y, and this request Z might - * conflict with Y but not X. If we checked only the - * active queue then Z could jump ahead of Y, which - * would be incorrect. - */ - local->qstub = fop_@NAME@_stub (frame, - jbr_@NAME@_dispatch, - @SHORT_ARGS@); - if (!local->qstub) { - UNLOCK(&ictx->lock); - goto out; - } - list_add_tail(&local->qlinks, &ictx->pqueue); - ++(ictx->pending); - UNLOCK(&ictx->lock); - ret = 0; - goto out; - } else { - list_add_tail(&local->qlinks, &ictx->aqueue); - ++(ictx->active); - } + list_add_tail(&local->qlinks, &ictx->pqueue); + ++(ictx->pending); UNLOCK(&ictx->lock); + ret = 0; + goto out; + } else { + list_add_tail(&local->qlinks, &ictx->aqueue); + ++(ictx->active); + } + UNLOCK(&ictx->lock); #endif - ret = jbr_@NAME@_dispatch (frame, this, @SHORT_ARGS@); + ret = jbr_ @NAME @_dispatch(frame, this, @SHORT_ARGS @); out: - return ret; + return ret; } /* template-name write-dispatch */ -int32_t -jbr_@NAME@_dispatch (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) +int32_t jbr_ @NAME @_dispatch(call_frame_t *frame, xlator_t *this, @LONG_ARGS @) { - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - int32_t ret = -1; - xlator_list_t *trav; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); - - /* - * TBD: unblock pending request(s) if we fail after this point but - * before we get to jbr_@NAME@_complete (where that code currently - * resides). - */ - - local->call_count = priv->n_children - 1; - for (trav = this->children->next; trav; trav = trav->next) { - STACK_WIND (frame, jbr_@NAME@_fan_in, - trav->xlator, trav->xlator->fops->@NAME@, - @SHORT_ARGS@); - } - - /* TBD: variable Issue count */ - ret = 0; + jbr_local_t *local = NULL; + jbr_private_t *priv = NULL; + int32_t ret = -1; + xlator_list_t *trav; + + GF_VALIDATE_OR_GOTO("jbr", this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, local, out); + + /* + * TBD: unblock pending request(s) if we fail after this point but + * before we get to jbr_@NAME@_complete (where that code currently + * resides). + */ + + local->call_count = priv->n_children - 1; + for (trav = this->children->next; trav; trav = trav->next) { + STACK_WIND(frame, jbr_ @NAME @_fan_in, trav->xlator, + trav->xlator->fops->@NAME @, @SHORT_ARGS @); + } + + /* TBD: variable Issue count */ + ret = 0; out: - return ret; + return ret; } /* template-name write-fan-in */ -int32_t -jbr_@NAME@_fan_in (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - @LONG_ARGS@) +int32_t jbr_ @NAME @_fan_in(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, @LONG_ARGS @) { - jbr_local_t *local = NULL; - int32_t ret = -1; - uint8_t call_count; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); - - gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n", - op_ret, op_errno); - - LOCK(&frame->lock); - call_count = --(local->call_count); - if (op_ret != -1) { - /* Increment the number of successful acks * - * received for the operation. * - */ - (local->successful_acks)++; - local->successful_op_ret = op_ret; - } - gf_msg_debug (this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n", - op_ret, op_errno, local->successful_acks); - UNLOCK(&frame->lock); - - /* TBD: variable Completion count */ - if (call_count == 0) { - call_resume(local->stub); - } - - ret = 0; + jbr_local_t *local = NULL; + int32_t ret = -1; + uint8_t call_count; + + GF_VALIDATE_OR_GOTO("jbr", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, local, out); + + gf_msg_trace(this->name, 0, "op_ret = %d, op_errno = %d\n", op_ret, + op_errno); + + LOCK(&frame->lock); + call_count = --(local->call_count); + if (op_ret != -1) { + /* Increment the number of successful acks * + * received for the operation. * + */ + (local->successful_acks)++; + local->successful_op_ret = op_ret; + } + gf_msg_debug(this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n", + op_ret, op_errno, local->successful_acks); + UNLOCK(&frame->lock); + + /* TBD: variable Completion count */ + if (call_count == 0) { + call_resume(local->stub); + } + + ret = 0; out: - return ret; + return ret; } /* template-name write-continue */ -int32_t -jbr_@NAME@_continue (call_frame_t *frame, xlator_t *this, - @LONG_ARGS@) +int32_t jbr_ @NAME @_continue(call_frame_t *frame, xlator_t *this, @LONG_ARGS @) { - int32_t ret = -1; - gf_boolean_t result = _gf_false; - jbr_local_t *local = NULL; - jbr_local_t *new_local = NULL; - jbr_private_t *priv = NULL; - int32_t op_errno = 0; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - priv = this->private; - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, local, out); - - /* Perform quorum check to see if the leader needs * - * to perform the operation. If the operation will not * - * meet quorum irrespective of the leader's result * - * there is no point in the leader performing the fop * - */ - result = fop_quorum_check (this, (double)priv->n_children, - (double)local->successful_acks + 1); - if (result == _gf_false) { - gf_msg (this->name, GF_LOG_ERROR, EROFS, - J_MSG_QUORUM_NOT_MET, "Didn't receive enough acks " - "to meet quorum. Failing the operation without trying " - "it on the leader."); + int32_t ret = -1; + gf_boolean_t result = _gf_false; + jbr_local_t *local = NULL; + jbr_local_t *new_local = NULL; + jbr_private_t *priv = NULL; + int32_t op_errno = 0; + + GF_VALIDATE_OR_GOTO("jbr", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + priv = this->private; + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + GF_VALIDATE_OR_GOTO(this->name, local, out); + + /* Perform quorum check to see if the leader needs * + * to perform the operation. If the operation will not * + * meet quorum irrespective of the leader's result * + * there is no point in the leader performing the fop * + */ + result = fop_quorum_check(this, (double)priv->n_children, + (double)local->successful_acks + 1); + if (result == _gf_false) { + gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET, + "Didn't receive enough acks " + "to meet quorum. Failing the operation without trying " + "it on the leader."); #if defined(JBR_CG_QUEUE) - /* - * In case of a fop failure, before unwinding need to * - * remove it from queue * - */ - ret = jbr_remove_from_queue (frame, this); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_GENERIC, "Failed to remove from queue."); - } + /* + * In case of a fop failure, before unwinding need to * + * remove it from queue * + */ + ret = jbr_remove_from_queue(frame, this); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_GENERIC, + "Failed to remove from queue."); + } #endif - /* - * In this case, the quorum is not met on the followers * - * So the operation will not be performed on the leader * - * and a rollback will be sent via GF_FOP_IPC to all the * - * followers, where this particular fop's term and index * - * numbers will be journaled, and later used to rollback * - */ - call_frame_t *new_frame; - - new_frame = copy_frame (frame); - - if (new_frame) { - new_local = mem_get0(this->local_pool); - if (new_local) { - INIT_LIST_HEAD(&new_local->qlinks); - ret = dict_set_int32 (local->xdata, - "rollback-fop", - GF_FOP_@UPNAME@); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, - "failed to set rollback-fop"); - } else { - new_local->xdata = dict_ref(local->xdata); - new_frame->local = new_local; - jbr_ipc_call_dispatch (new_frame, - this, &op_errno, - FDL_IPC_JBR_SERVER_ROLLBACK, - new_local->xdata); - } - } else { - gf_log (this->name, GF_LOG_WARNING, - "Could not create local for new_frame"); - } + /* + * In this case, the quorum is not met on the followers * + * So the operation will not be performed on the leader * + * and a rollback will be sent via GF_FOP_IPC to all the * + * followers, where this particular fop's term and index * + * numbers will be journaled, and later used to rollback * + */ + call_frame_t *new_frame; + + new_frame = copy_frame(frame); + + if (new_frame) { + new_local = mem_get0(this->local_pool); + if (new_local) { + INIT_LIST_HEAD(&new_local->qlinks); + ret = dict_set_int32(local->xdata, "rollback-fop", + GF_FOP_ @UPNAME @); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, + "failed to set rollback-fop"); } else { - gf_log (this->name, GF_LOG_WARNING, - "Could not send rollback ipc"); + new_local->xdata = dict_ref(local->xdata); + new_frame->local = new_local; + jbr_ipc_call_dispatch(new_frame, this, &op_errno, + FDL_IPC_JBR_SERVER_ROLLBACK, + new_local->xdata); } - - STACK_UNWIND_STRICT (@NAME@, frame, -1, EROFS, - @ERROR_ARGS@); + } else { + gf_log(this->name, GF_LOG_WARNING, + "Could not create local for new_frame"); + } } else { - STACK_WIND (frame, jbr_@NAME@_complete, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, - @SHORT_ARGS@); + gf_log(this->name, GF_LOG_WARNING, "Could not send rollback ipc"); } + STACK_UNWIND_STRICT(@NAME @, frame, -1, EROFS, @ERROR_ARGS @); + } else { + STACK_WIND(frame, jbr_ @NAME @_complete, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->@NAME @, @SHORT_ARGS @); + } + out: - return 0; + return 0; } /* template-name write-complete */ -int32_t -jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - @LONG_ARGS@) +int32_t jbr_ @NAME @_complete(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, @LONG_ARGS @) { - int32_t ret = -1; - gf_boolean_t result = _gf_false; - jbr_private_t *priv = NULL; - jbr_local_t *local = NULL; - jbr_local_t *new_local = NULL; - - GF_VALIDATE_OR_GOTO ("jbr", this, err); - GF_VALIDATE_OR_GOTO (this->name, frame, err); - priv = this->private; - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, priv, err); - GF_VALIDATE_OR_GOTO (this->name, local, err); - - /* If the fop failed on the leader, then reduce one successful ack - * before calculating the fop quorum - */ - LOCK(&frame->lock); - if (op_ret == -1) - (local->successful_acks)--; - UNLOCK(&frame->lock); + int32_t ret = -1; + gf_boolean_t result = _gf_false; + jbr_private_t *priv = NULL; + jbr_local_t *local = NULL; + jbr_local_t *new_local = NULL; + + GF_VALIDATE_OR_GOTO("jbr", this, err); + GF_VALIDATE_OR_GOTO(this->name, frame, err); + priv = this->private; + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, priv, err); + GF_VALIDATE_OR_GOTO(this->name, local, err); + + /* If the fop failed on the leader, then reduce one successful ack + * before calculating the fop quorum + */ + LOCK(&frame->lock); + if (op_ret == -1) + (local->successful_acks)--; + UNLOCK(&frame->lock); #if defined(JBR_CG_QUEUE) - ret = jbr_remove_from_queue (frame, this); - if (ret) - goto err; + ret = jbr_remove_from_queue(frame, this); + if (ret) + goto err; #endif #if defined(JBR_CG_FSYNC) - jbr_mark_fd_dirty(this, local); + jbr_mark_fd_dirty(this, local); #endif #if defined(JBR_CG_NEED_FD) - fd_unref(local->fd); + fd_unref(local->fd); #endif - /* After the leader completes the fop, a quorum check is * - * performed, taking into account the outcome of the fop * - * on the leader. Irrespective of the fop being successful * - * or failing on the leader, the result of the quorum will * - * determine if the overall fop is successful or not. For * - * example, a fop might have succeeded on every node except * - * the leader, in which case as quorum is being met, the fop * - * will be treated as a successful fop, even though it failed * - * on the leader. On follower nodes, no quorum check should * - * be done, and the result is returned to the leader as is. * - */ - if (priv->leader) { - result = fop_quorum_check (this, (double)priv->n_children, - (double)local->successful_acks + 1); - if (result == _gf_false) { - op_ret = -1; - op_errno = EROFS; - gf_msg (this->name, GF_LOG_ERROR, EROFS, - J_MSG_QUORUM_NOT_MET, "Quorum is not met. " - "The operation has failed."); + /* After the leader completes the fop, a quorum check is * + * performed, taking into account the outcome of the fop * + * on the leader. Irrespective of the fop being successful * + * or failing on the leader, the result of the quorum will * + * determine if the overall fop is successful or not. For * + * example, a fop might have succeeded on every node except * + * the leader, in which case as quorum is being met, the fop * + * will be treated as a successful fop, even though it failed * + * on the leader. On follower nodes, no quorum check should * + * be done, and the result is returned to the leader as is. * + */ + if (priv->leader) { + result = fop_quorum_check(this, (double)priv->n_children, + (double)local->successful_acks + 1); + if (result == _gf_false) { + op_ret = -1; + op_errno = EROFS; + gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET, + "Quorum is not met. " + "The operation has failed."); + /* + * In this case, the quorum is not met after the * + * operation is performed on the leader. Hence a * + * rollback will be sent via GF_FOP_IPC to the leader * + * where this particular fop's term and index numbers * + * will be journaled, and later used to rollback. * + * The same will be done on all the followers * + */ + call_frame_t *new_frame; + + new_frame = copy_frame(frame); + if (new_frame) { + new_local = mem_get0(this->local_pool); + if (new_local) { + INIT_LIST_HEAD(&new_local->qlinks); + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, + "op = %d", new_frame->op); + ret = dict_set_int32(local->xdata, "rollback-fop", + GF_FOP_ @UPNAME @); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, + "failed to set " + "rollback-fop"); + } else { + new_local->xdata = dict_ref(local->xdata); + new_frame->local = new_local; /* - * In this case, the quorum is not met after the * - * operation is performed on the leader. Hence a * - * rollback will be sent via GF_FOP_IPC to the leader * - * where this particular fop's term and index numbers * - * will be journaled, and later used to rollback. * - * The same will be done on all the followers * + * Calling STACK_WIND instead * + * of jbr_ipc as it will not * + * unwind to the previous * + * translators like it will * + * in case of jbr_ipc. * */ - call_frame_t *new_frame; - - new_frame = copy_frame (frame); - if (new_frame) { - new_local = mem_get0(this->local_pool); - if (new_local) { - INIT_LIST_HEAD(&new_local->qlinks); - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, "op = %d", - new_frame->op); - ret = dict_set_int32 (local->xdata, - "rollback-fop", - GF_FOP_@UPNAME@); - if (ret) { - gf_msg (this->name, - GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, - "failed to set " - "rollback-fop"); - } else { - new_local->xdata = dict_ref (local->xdata); - new_frame->local = new_local; - /* - * Calling STACK_WIND instead * - * of jbr_ipc as it will not * - * unwind to the previous * - * translators like it will * - * in case of jbr_ipc. * - */ - STACK_WIND (new_frame, - jbr_ipc_complete, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ipc, - FDL_IPC_JBR_SERVER_ROLLBACK, - new_local->xdata); - } - } else { - gf_log (this->name, GF_LOG_WARNING, - "Could not create local " - "for new_frame"); - } - } else { - gf_log (this->name, GF_LOG_WARNING, - "Could not send rollback ipc"); - } + STACK_WIND( + new_frame, jbr_ipc_complete, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, + FDL_IPC_JBR_SERVER_ROLLBACK, new_local->xdata); + } } else { + gf_log(this->name, GF_LOG_WARNING, + "Could not create local " + "for new_frame"); + } + } else { + gf_log(this->name, GF_LOG_WARNING, + "Could not send rollback ipc"); + } + } else { #if defined(JBR_CG_NEED_FD) - op_ret = local->successful_op_ret; + op_ret = local->successful_op_ret; #else - op_ret = 0; + op_ret = 0; #endif - op_errno = 0; - gf_msg_debug (this->name, 0, - "Quorum has met. The operation has succeeded."); - } + op_errno = 0; + gf_msg_debug(this->name, 0, + "Quorum has met. The operation has succeeded."); } + } - /* - * Unrefing the reference taken in jbr_@NAME@ () * - */ - dict_unref (local->xdata); - - STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, - @SHORT_ARGS@); + /* + * Unrefing the reference taken in jbr_@NAME@ () * + */ + dict_unref(local->xdata); + STACK_UNWIND_STRICT(@NAME @, frame, op_ret, op_errno, @SHORT_ARGS @); - return 0; + return 0; err: - STACK_UNWIND_STRICT (@NAME@, frame, -1, 0, - @SHORT_ARGS@); + STACK_UNWIND_STRICT(@NAME @, frame, -1, 0, @SHORT_ARGS @); - return 0; + return 0; } diff --git a/xlators/experimental/jbr-server/src/jbr.c b/xlators/experimental/jbr-server/src/jbr.c index cd03dfe9b8a..49d7eb467f7 100644 --- a/xlators/experimental/jbr-server/src/jbr.c +++ b/xlators/experimental/jbr-server/src/jbr.c @@ -29,14 +29,14 @@ #include "jbr-internal.h" #include "jbr-messages.h" -#define JBR_FLUSH_INTERVAL 5 +#define JBR_FLUSH_INTERVAL 5 enum { - /* echo "cluster/jbr-server" | md5sum | cut -c 1-8 */ - JBR_SERVER_IPC_BASE = 0x0e2d66a5, - JBR_SERVER_TERM_RANGE, - JBR_SERVER_OPEN_TERM, - JBR_SERVER_NEXT_ENTRY + /* echo "cluster/jbr-server" | md5sum | cut -c 1-8 */ + JBR_SERVER_IPC_BASE = 0x0e2d66a5, + JBR_SERVER_TERM_RANGE, + JBR_SERVER_OPEN_TERM, + JBR_SERVER_NEXT_ENTRY }; /* @@ -44,551 +44,525 @@ enum { * jbr_lk_perform_local_op call it, before code is generated. * */ int32_t -jbr_lk_call_dispatch (call_frame_t *frame, xlator_t *this, int *op_errno, - fd_t *fd, int32_t cmd, struct gf_flock *lock, - dict_t *xdata); +jbr_lk_call_dispatch(call_frame_t *frame, xlator_t *this, int *op_errno, + fd_t *fd, int32_t cmd, struct gf_flock *lock, + dict_t *xdata); int32_t -jbr_lk_dispatch (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct gf_flock *lock, - dict_t *xdata); +jbr_lk_dispatch(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *lock, dict_t *xdata); int32_t -jbr_ipc_call_dispatch (call_frame_t *frame, xlator_t *this, int *op_errno, - int32_t op, dict_t *xdata); +jbr_ipc_call_dispatch(call_frame_t *frame, xlator_t *this, int *op_errno, + int32_t op, dict_t *xdata); int32_t -jbr_ipc_complete (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - dict_t *xdata); +jbr_ipc_complete(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata); /* Used to check the quorum of acks received after the fop * confirming the status of the fop on all the brick processes * for this particular subvolume */ gf_boolean_t -fop_quorum_check (xlator_t *this, double n_children, - double current_state) +fop_quorum_check(xlator_t *this, double n_children, double current_state) { - jbr_private_t *priv = NULL; - gf_boolean_t result = _gf_false; - double required = 0; - double current = 0; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - - required = n_children * priv->quorum_pct; - - /* - * Before performing the fop on the leader, we need to check, - * if there is any merit in performing the fop on the leader. - * In a case, where even a successful write on the leader, will - * not meet quorum, there is no point in trying the fop on the - * leader. - * When this function is called after the leader has tried - * performing the fop, this check will calculate quorum taking into - * account the status of the fop on the leader. If the leader's - * op_ret was -1, the complete function would account that by - * decrementing successful_acks by 1 - */ - - current = current_state * 100.0; - - if (current < required) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_QUORUM_NOT_MET, - "Quorum not met. quorum_pct = %f " - "Current State = %f, Required State = %f", - priv->quorum_pct, current, - required); - } else - result = _gf_true; + jbr_private_t *priv = NULL; + gf_boolean_t result = _gf_false; + double required = 0; + double current = 0; + + GF_VALIDATE_OR_GOTO("jbr", this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + + required = n_children * priv->quorum_pct; + + /* + * Before performing the fop on the leader, we need to check, + * if there is any merit in performing the fop on the leader. + * In a case, where even a successful write on the leader, will + * not meet quorum, there is no point in trying the fop on the + * leader. + * When this function is called after the leader has tried + * performing the fop, this check will calculate quorum taking into + * account the status of the fop on the leader. If the leader's + * op_ret was -1, the complete function would account that by + * decrementing successful_acks by 1 + */ + + current = current_state * 100.0; + + if (current < required) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_QUORUM_NOT_MET, + "Quorum not met. quorum_pct = %f " + "Current State = %f, Required State = %f", + priv->quorum_pct, current, required); + } else + result = _gf_true; out: - return result; + return result; } jbr_inode_ctx_t * -jbr_get_inode_ctx (xlator_t *this, inode_t *inode) +jbr_get_inode_ctx(xlator_t *this, inode_t *inode) { - uint64_t ctx_int = 0LL; - jbr_inode_ctx_t *ctx_ptr; - - if (__inode_ctx_get(inode, this, &ctx_int) == 0) { - ctx_ptr = (jbr_inode_ctx_t *)(long)ctx_int; - } else { - ctx_ptr = GF_CALLOC (1, sizeof(*ctx_ptr), - gf_mt_jbr_inode_ctx_t); - if (ctx_ptr) { - ctx_int = (uint64_t)(long)ctx_ptr; - if (__inode_ctx_set(inode, this, &ctx_int) == 0) { - LOCK_INIT(&ctx_ptr->lock); - INIT_LIST_HEAD(&ctx_ptr->aqueue); - INIT_LIST_HEAD(&ctx_ptr->pqueue); - } else { - GF_FREE(ctx_ptr); - ctx_ptr = NULL; - } - } - - } - - return ctx_ptr; + uint64_t ctx_int = 0LL; + jbr_inode_ctx_t *ctx_ptr; + + if (__inode_ctx_get(inode, this, &ctx_int) == 0) { + ctx_ptr = (jbr_inode_ctx_t *)(long)ctx_int; + } else { + ctx_ptr = GF_CALLOC(1, sizeof(*ctx_ptr), gf_mt_jbr_inode_ctx_t); + if (ctx_ptr) { + ctx_int = (uint64_t)(long)ctx_ptr; + if (__inode_ctx_set(inode, this, &ctx_int) == 0) { + LOCK_INIT(&ctx_ptr->lock); + INIT_LIST_HEAD(&ctx_ptr->aqueue); + INIT_LIST_HEAD(&ctx_ptr->pqueue); + } else { + GF_FREE(ctx_ptr); + ctx_ptr = NULL; + } + } + } + + return ctx_ptr; } jbr_fd_ctx_t * -jbr_get_fd_ctx (xlator_t *this, fd_t *fd) +jbr_get_fd_ctx(xlator_t *this, fd_t *fd) { - uint64_t ctx_int = 0LL; - jbr_fd_ctx_t *ctx_ptr; - - if (__fd_ctx_get(fd, this, &ctx_int) == 0) { - ctx_ptr = (jbr_fd_ctx_t *)(long)ctx_int; - } else { - ctx_ptr = GF_CALLOC (1, sizeof(*ctx_ptr), gf_mt_jbr_fd_ctx_t); - if (ctx_ptr) { - if (__fd_ctx_set(fd, this, (uint64_t)ctx_ptr) == 0) { - INIT_LIST_HEAD(&ctx_ptr->dirty_list); - INIT_LIST_HEAD(&ctx_ptr->fd_list); - } else { - GF_FREE(ctx_ptr); - ctx_ptr = NULL; - } - } - - } - - return ctx_ptr; + uint64_t ctx_int = 0LL; + jbr_fd_ctx_t *ctx_ptr; + + if (__fd_ctx_get(fd, this, &ctx_int) == 0) { + ctx_ptr = (jbr_fd_ctx_t *)(long)ctx_int; + } else { + ctx_ptr = GF_CALLOC(1, sizeof(*ctx_ptr), gf_mt_jbr_fd_ctx_t); + if (ctx_ptr) { + if (__fd_ctx_set(fd, this, (uint64_t)ctx_ptr) == 0) { + INIT_LIST_HEAD(&ctx_ptr->dirty_list); + INIT_LIST_HEAD(&ctx_ptr->fd_list); + } else { + GF_FREE(ctx_ptr); + ctx_ptr = NULL; + } + } + } + + return ctx_ptr; } void -jbr_mark_fd_dirty (xlator_t *this, jbr_local_t *local) +jbr_mark_fd_dirty(xlator_t *this, jbr_local_t *local) { - fd_t *fd = local->fd; - jbr_fd_ctx_t *ctx_ptr; - jbr_dirty_list_t *dirty; - jbr_private_t *priv = this->private; - - /* - * TBD: don't do any of this for O_SYNC/O_DIRECT writes. - * Unfortunately, that optimization requires that we distinguish - * between writev and other "write" calls, saving the original flags - * and checking them in the callback. Too much work for too little - * gain right now. - */ - - LOCK(&fd->lock); - ctx_ptr = jbr_get_fd_ctx(this, fd); - dirty = GF_CALLOC(1, sizeof(*dirty), gf_mt_jbr_dirty_t); - if (ctx_ptr && dirty) { - gf_msg_trace (this->name, 0, - "marking fd %p as dirty (%p)", fd, dirty); - /* TBD: fill dirty->id from what changelog gave us */ - list_add_tail(&dirty->links, &ctx_ptr->dirty_list); - if (list_empty(&ctx_ptr->fd_list)) { - /* Add a ref so _release doesn't get called. */ - ctx_ptr->fd = fd_ref(fd); - LOCK(&priv->dirty_lock); - list_add_tail (&ctx_ptr->fd_list, - &priv->dirty_fds); - UNLOCK(&priv->dirty_lock); - } - } else { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - J_MSG_MEM_ERR, "could not mark %p dirty", fd); - if (ctx_ptr) { - GF_FREE(ctx_ptr); - } - if (dirty) { - GF_FREE(dirty); - } - } - UNLOCK(&fd->lock); + fd_t *fd = local->fd; + jbr_fd_ctx_t *ctx_ptr; + jbr_dirty_list_t *dirty; + jbr_private_t *priv = this->private; + + /* + * TBD: don't do any of this for O_SYNC/O_DIRECT writes. + * Unfortunately, that optimization requires that we distinguish + * between writev and other "write" calls, saving the original flags + * and checking them in the callback. Too much work for too little + * gain right now. + */ + + LOCK(&fd->lock); + ctx_ptr = jbr_get_fd_ctx(this, fd); + dirty = GF_CALLOC(1, sizeof(*dirty), gf_mt_jbr_dirty_t); + if (ctx_ptr && dirty) { + gf_msg_trace(this->name, 0, "marking fd %p as dirty (%p)", fd, dirty); + /* TBD: fill dirty->id from what changelog gave us */ + list_add_tail(&dirty->links, &ctx_ptr->dirty_list); + if (list_empty(&ctx_ptr->fd_list)) { + /* Add a ref so _release doesn't get called. */ + ctx_ptr->fd = fd_ref(fd); + LOCK(&priv->dirty_lock); + list_add_tail(&ctx_ptr->fd_list, &priv->dirty_fds); + UNLOCK(&priv->dirty_lock); + } + } else { + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR, + "could not mark %p dirty", fd); + if (ctx_ptr) { + GF_FREE(ctx_ptr); + } + if (dirty) { + GF_FREE(dirty); + } + } + UNLOCK(&fd->lock); } -#define JBR_TERM_XATTR "trusted.jbr.term" -#define JBR_INDEX_XATTR "trusted.jbr.index" -#define JBR_REP_COUNT_XATTR "trusted.jbr.rep-count" -#define RECON_TERM_XATTR "trusted.jbr.recon-term" -#define RECON_INDEX_XATTR "trusted.jbr.recon-index" +#define JBR_TERM_XATTR "trusted.jbr.term" +#define JBR_INDEX_XATTR "trusted.jbr.index" +#define JBR_REP_COUNT_XATTR "trusted.jbr.rep-count" +#define RECON_TERM_XATTR "trusted.jbr.recon-term" +#define RECON_INDEX_XATTR "trusted.jbr.recon-index" int32_t -jbr_leader_checks_and_init (call_frame_t *frame, xlator_t *this, int *op_errno, - dict_t *xdata, fd_t *fd) +jbr_leader_checks_and_init(call_frame_t *frame, xlator_t *this, int *op_errno, + dict_t *xdata, fd_t *fd) { - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - int32_t ret = -1; - gf_boolean_t result = _gf_false; - int from_leader = _gf_false; - int from_recon = _gf_false; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, op_errno, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - - /* - * Our first goal here is to avoid "split brain surprise" for users who - * specify exactly 50% with two- or three-way replication. That means - * either a more-than check against half the total replicas or an - * at-least check against half of our peers (one less). Of the two, - * only an at-least check supports the intuitive use of 100% to mean - * all replicas must be present, because "more than 100%" will never - * succeed regardless of which count we use. This leaves us with a - * slightly non-traditional definition of quorum ("at least X% of peers - * not including ourselves") but one that's useful enough to be worth - * it. - * - * Note that n_children and up_children *do* include the local - * subvolume, so we need to subtract one in each case. - */ - if (priv->leader) { - result = fop_quorum_check (this, (double)(priv->n_children - 1), - (double)(priv->up_children - 1)); - - if (result == _gf_false) { - /* Emulate the AFR client-side-quorum behavior. */ - gf_msg (this->name, GF_LOG_ERROR, EROFS, - J_MSG_QUORUM_NOT_MET, "Sufficient number of " - "subvolumes are not up to meet quorum."); - *op_errno = EROFS; - goto out; - } + jbr_local_t *local = NULL; + jbr_private_t *priv = NULL; + int32_t ret = -1; + gf_boolean_t result = _gf_false; + int from_leader = _gf_false; + int from_recon = _gf_false; + + GF_VALIDATE_OR_GOTO("jbr", this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + + /* + * Our first goal here is to avoid "split brain surprise" for users who + * specify exactly 50% with two- or three-way replication. That means + * either a more-than check against half the total replicas or an + * at-least check against half of our peers (one less). Of the two, + * only an at-least check supports the intuitive use of 100% to mean + * all replicas must be present, because "more than 100%" will never + * succeed regardless of which count we use. This leaves us with a + * slightly non-traditional definition of quorum ("at least X% of peers + * not including ourselves") but one that's useful enough to be worth + * it. + * + * Note that n_children and up_children *do* include the local + * subvolume, so we need to subtract one in each case. + */ + if (priv->leader) { + result = fop_quorum_check(this, (double)(priv->n_children - 1), + (double)(priv->up_children - 1)); + + if (result == _gf_false) { + /* Emulate the AFR client-side-quorum behavior. */ + gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET, + "Sufficient number of " + "subvolumes are not up to meet quorum."); + *op_errno = EROFS; + goto out; + } + } else { + if (xdata) { + from_leader = !!dict_get(xdata, JBR_TERM_XATTR); + from_recon = !!dict_get(xdata, RECON_TERM_XATTR) && + !!dict_get(xdata, RECON_INDEX_XATTR); } else { - if (xdata) { - from_leader = !!dict_get(xdata, JBR_TERM_XATTR); - from_recon = !!dict_get(xdata, RECON_TERM_XATTR) - && !!dict_get(xdata, RECON_INDEX_XATTR); - } else { - from_leader = from_recon = _gf_false; - } - - /* follower/recon path * - * just send it to local node * - */ - if (!from_leader && !from_recon) { - *op_errno = EREMOTE; - goto out; - } + from_leader = from_recon = _gf_false; } - local = mem_get0(this->local_pool); - if (!local) { - goto out; + /* follower/recon path * + * just send it to local node * + */ + if (!from_leader && !from_recon) { + *op_errno = EREMOTE; + goto out; } + } + + local = mem_get0(this->local_pool); + if (!local) { + goto out; + } - if (fd) - local->fd = fd_ref(fd); - else - local->fd = NULL; + if (fd) + local->fd = fd_ref(fd); + else + local->fd = NULL; - INIT_LIST_HEAD(&local->qlinks); - local->successful_acks = 0; - frame->local = local; + INIT_LIST_HEAD(&local->qlinks); + local->successful_acks = 0; + frame->local = local; - ret = 0; + ret = 0; out: - return ret; + return ret; } int32_t -jbr_initialize_xdata_set_attrs (xlator_t *this, dict_t **xdata) +jbr_initialize_xdata_set_attrs(xlator_t *this, dict_t **xdata) { - jbr_private_t *priv = NULL; - int32_t ret = -1; - uint32_t ti = 0; + jbr_private_t *priv = NULL; + int32_t ret = -1; + uint32_t ti = 0; - GF_VALIDATE_OR_GOTO ("jbr", this, out); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, xdata, out); + GF_VALIDATE_OR_GOTO("jbr", this, out); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + GF_VALIDATE_OR_GOTO(this->name, xdata, out); + if (!*xdata) { + *xdata = dict_new(); if (!*xdata) { - *xdata = dict_new(); - if (!*xdata) { - gf_msg (this->name, GF_LOG_ERROR, ENOMEM, - J_MSG_MEM_ERR, "failed to allocate xdata"); - goto out; - } - } - - if (dict_set_int32(*xdata, JBR_TERM_XATTR, priv->current_term) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, "failed to set jbr-term"); - goto out; - } - - LOCK(&priv->index_lock); - ti = ++(priv->index); - UNLOCK(&priv->index_lock); - if (dict_set_int32(*xdata, JBR_INDEX_XATTR, ti) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, "failed to set index"); - goto out; - } - - ret = 0; + gf_msg(this->name, GF_LOG_ERROR, ENOMEM, J_MSG_MEM_ERR, + "failed to allocate xdata"); + goto out; + } + } + + if (dict_set_int32(*xdata, JBR_TERM_XATTR, priv->current_term) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, + "failed to set jbr-term"); + goto out; + } + + LOCK(&priv->index_lock); + ti = ++(priv->index); + UNLOCK(&priv->index_lock); + if (dict_set_int32(*xdata, JBR_INDEX_XATTR, ti) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, + "failed to set index"); + goto out; + } + + ret = 0; out: - return ret; + return ret; } int32_t -jbr_remove_from_queue (call_frame_t *frame, xlator_t *this) +jbr_remove_from_queue(call_frame_t *frame, xlator_t *this) { - int32_t ret = -1; - jbr_inode_ctx_t *ictx = NULL; - jbr_local_t *local = NULL; - jbr_local_t *next = NULL; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); - - if (local->qlinks.next != &local->qlinks) { - list_del(&local->qlinks); - ictx = jbr_get_inode_ctx(this, local->fd->inode); - if (ictx) { - LOCK(&ictx->lock); - if (ictx->pending) { - /* - * TBD: dequeue *all* non-conflicting - * reqs - * - * With the stub implementation there - * can only be one request active at a - * time (zero here) so it's not an - * issue. In a real implementation - * there might still be other active - * requests to check against, and - * multiple pending requests that could - * continue. - */ - gf_msg_debug (this->name, 0, - "unblocking next request"); - --(ictx->pending); - next = list_entry (ictx->pqueue.next, - jbr_local_t, qlinks); - list_del(&next->qlinks); - list_add_tail(&next->qlinks, - &ictx->aqueue); - call_resume(next->qstub); - } else { - --(ictx->active); - } - UNLOCK(&ictx->lock); - } + int32_t ret = -1; + jbr_inode_ctx_t *ictx = NULL; + jbr_local_t *local = NULL; + jbr_local_t *next = NULL; + + GF_VALIDATE_OR_GOTO("jbr", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, local, out); + + if (local->qlinks.next != &local->qlinks) { + list_del(&local->qlinks); + ictx = jbr_get_inode_ctx(this, local->fd->inode); + if (ictx) { + LOCK(&ictx->lock); + if (ictx->pending) { + /* + * TBD: dequeue *all* non-conflicting + * reqs + * + * With the stub implementation there + * can only be one request active at a + * time (zero here) so it's not an + * issue. In a real implementation + * there might still be other active + * requests to check against, and + * multiple pending requests that could + * continue. + */ + gf_msg_debug(this->name, 0, "unblocking next request"); + --(ictx->pending); + next = list_entry(ictx->pqueue.next, jbr_local_t, qlinks); + list_del(&next->qlinks); + list_add_tail(&next->qlinks, &ictx->aqueue); + call_resume(next->qstub); + } else { + --(ictx->active); + } + UNLOCK(&ictx->lock); } + } - ret = 0; + ret = 0; out: - return ret; + return ret; } int32_t -jbr_lk_complete (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - struct gf_flock *flock, dict_t *xdata) +jbr_lk_complete(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct gf_flock *flock, + dict_t *xdata) { - int32_t ret = -1; - jbr_private_t *priv = NULL; - jbr_local_t *local = NULL; - gf_boolean_t result = _gf_false; - - GF_VALIDATE_OR_GOTO ("jbr", this, err); - priv = this->private; - GF_VALIDATE_OR_GOTO (this->name, priv, err); - GF_VALIDATE_OR_GOTO (this->name, frame, err); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, err); - GF_VALIDATE_OR_GOTO (this->name, flock, err); - GF_VALIDATE_OR_GOTO (this->name, xdata, err); + int32_t ret = -1; + jbr_private_t *priv = NULL; + jbr_local_t *local = NULL; + gf_boolean_t result = _gf_false; + + GF_VALIDATE_OR_GOTO("jbr", this, err); + priv = this->private; + GF_VALIDATE_OR_GOTO(this->name, priv, err); + GF_VALIDATE_OR_GOTO(this->name, frame, err); + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, local, err); + GF_VALIDATE_OR_GOTO(this->name, flock, err); + GF_VALIDATE_OR_GOTO(this->name, xdata, err); + + /* + * Remove from queue for unlock operation only * + * For lock operation, it will be done in fan-in * + */ + if (flock->l_type == F_UNLCK) { + ret = jbr_remove_from_queue(frame, this); + if (ret) + goto err; + } + + /* + * On a follower, unwind with the op_ret and op_errno. On a * + * leader, if the fop is a locking fop, and its a failure, * + * send fail, else call stub which will dispatch the fop to * + * the followers. * + * * + * If the fop is a unlocking fop, check quorum. If quorum * + * is met, then send success. Else Rollback on leader, * + * followed by followers, and then send -ve ack to client. * + */ + if (priv->leader) { + /* Increase the successful acks if it's a success. */ + LOCK(&frame->lock); + if (op_ret != -1) + (local->successful_acks)++; + UNLOCK(&frame->lock); - /* - * Remove from queue for unlock operation only * - * For lock operation, it will be done in fan-in * - */ if (flock->l_type == F_UNLCK) { - ret = jbr_remove_from_queue (frame, this); - if (ret) - goto err; - } - - /* - * On a follower, unwind with the op_ret and op_errno. On a * - * leader, if the fop is a locking fop, and its a failure, * - * send fail, else call stub which will dispatch the fop to * - * the followers. * - * * - * If the fop is a unlocking fop, check quorum. If quorum * - * is met, then send success. Else Rollback on leader, * - * followed by followers, and then send -ve ack to client. * - */ - if (priv->leader) { - - /* Increase the successful acks if it's a success. */ - LOCK(&frame->lock); - if (op_ret != -1) - (local->successful_acks)++; - UNLOCK(&frame->lock); - - if (flock->l_type == F_UNLCK) { - result = fop_quorum_check (this, - (double)priv->n_children, - (double)local->successful_acks); - if (result == _gf_false) { - op_ret = -1; - op_errno = EROFS; - gf_msg (this->name, GF_LOG_ERROR, EROFS, - J_MSG_QUORUM_NOT_MET, - "Quorum is not met. " - "The operation has failed."); - - /* TODO: PERFORM UNLOCK ROLLBACK ON LEADER * - * FOLLOWED BY FOLLOWERS. */ - } else { - op_ret = 0; - op_errno = 0; - } - - fd_unref(local->fd); - STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, - flock, xdata); - } else { - if (op_ret == -1) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_LOCK_FAILURE, - "The lock operation failed on " - "the leader."); - - fd_unref(local->fd); - STACK_UNWIND_STRICT (lk, frame, op_ret, - op_errno, flock, xdata); - } else { - if (!local->stub) { - goto err; - } - - call_resume(local->stub); - } - } + result = fop_quorum_check(this, (double)priv->n_children, + (double)local->successful_acks); + if (result == _gf_false) { + op_ret = -1; + op_errno = EROFS; + gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET, + "Quorum is not met. " + "The operation has failed."); + + /* TODO: PERFORM UNLOCK ROLLBACK ON LEADER * + * FOLLOWED BY FOLLOWERS. */ + } else { + op_ret = 0; + op_errno = 0; + } + + fd_unref(local->fd); + STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, flock, xdata); } else { + if (op_ret == -1) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_LOCK_FAILURE, + "The lock operation failed on " + "the leader."); + fd_unref(local->fd); - STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, - flock, xdata); + STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, flock, xdata); + } else { + if (!local->stub) { + goto err; + } + + call_resume(local->stub); + } } + } else { + fd_unref(local->fd); + STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, flock, xdata); + } - return 0; + return 0; err: - if (local) { - if (local->stub) { - call_stub_destroy(local->stub); - } - if (local->qstub) { - call_stub_destroy(local->qstub); - } - if (local->fd) { - fd_unref(local->fd); - } - mem_put(local); + if (local) { + if (local->stub) { + call_stub_destroy(local->stub); } - STACK_UNWIND_STRICT (lk, frame, -1, op_errno, - flock, xdata); - return 0; + if (local->qstub) { + call_stub_destroy(local->qstub); + } + if (local->fd) { + fd_unref(local->fd); + } + mem_put(local); + } + STACK_UNWIND_STRICT(lk, frame, -1, op_errno, flock, xdata); + return 0; } int32_t -jbr_lk_fan_in (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct gf_flock *flock, - dict_t *xdata) +jbr_lk_fan_in(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct gf_flock *flock, dict_t *xdata) { - uint8_t call_count = -1; - int32_t ret = -1; - gf_boolean_t result = _gf_false; - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - priv = this->private; - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, local, out); - - gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n", - op_ret, op_errno); + uint8_t call_count = -1; + int32_t ret = -1; + gf_boolean_t result = _gf_false; + jbr_local_t *local = NULL; + jbr_private_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("jbr", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + priv = this->private; + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + GF_VALIDATE_OR_GOTO(this->name, local, out); + + gf_msg_trace(this->name, 0, "op_ret = %d, op_errno = %d\n", op_ret, + op_errno); + + LOCK(&frame->lock); + call_count = --(local->call_count); + if (op_ret != -1) { + /* Increment the number of successful acks * + * received for the operation. * + */ + (local->successful_acks)++; + local->successful_op_ret = op_ret; + } + gf_msg_debug(this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n", + op_ret, op_errno, local->successful_acks); + UNLOCK(&frame->lock); + + if (call_count == 0) { + /* + * If the fop is a locking fop, then check quorum. If quorum * + * is met, send successful ack to the client. If quorum is * + * not met, then rollback locking on followers, followed by * + * rollback of locking on leader, and then sending -ve ack * + * to the client. * + * * + * If the fop is a unlocking fop, then call stub. * + */ + if (flock->l_type == F_UNLCK) { + call_resume(local->stub); + } else { + /* + * Remove from queue for locking fops, for unlocking * + * fops, it is taken care of in jbr_lk_complete * + */ + ret = jbr_remove_from_queue(frame, this); + if (ret) + goto out; - LOCK(&frame->lock); - call_count = --(local->call_count); - if (op_ret != -1) { - /* Increment the number of successful acks * - * received for the operation. * + fd_unref(local->fd); + + result = fop_quorum_check(this, (double)priv->n_children, + (double)local->successful_acks); + if (result == _gf_false) { + gf_msg(this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET, + "Didn't receive enough acks to meet " + "quorum. Failing the locking " + "operation and initiating rollback on " + "followers and the leader " + "respectively."); + + /* TODO: PERFORM ROLLBACK OF LOCKING ON + * FOLLOWERS, FOLLOWED BY ROLLBACK ON + * LEADER. */ - (local->successful_acks)++; - local->successful_op_ret = op_ret; - } - gf_msg_debug (this->name, 0, "succ_acks = %d, op_ret = %d, op_errno = %d\n", - op_ret, op_errno, local->successful_acks); - UNLOCK(&frame->lock); - if (call_count == 0) { - /* - * If the fop is a locking fop, then check quorum. If quorum * - * is met, send successful ack to the client. If quorum is * - * not met, then rollback locking on followers, followed by * - * rollback of locking on leader, and then sending -ve ack * - * to the client. * - * * - * If the fop is a unlocking fop, then call stub. * - */ - if (flock->l_type == F_UNLCK) { - call_resume(local->stub); - } else { - /* - * Remove from queue for locking fops, for unlocking * - * fops, it is taken care of in jbr_lk_complete * - */ - ret = jbr_remove_from_queue (frame, this); - if (ret) - goto out; - - fd_unref(local->fd); - - result = fop_quorum_check (this, - (double)priv->n_children, - (double)local->successful_acks); - if (result == _gf_false) { - gf_msg (this->name, GF_LOG_ERROR, EROFS, - J_MSG_QUORUM_NOT_MET, - "Didn't receive enough acks to meet " - "quorum. Failing the locking " - "operation and initiating rollback on " - "followers and the leader " - "respectively."); - - /* TODO: PERFORM ROLLBACK OF LOCKING ON - * FOLLOWERS, FOLLOWED BY ROLLBACK ON - * LEADER. - */ - - STACK_UNWIND_STRICT (lk, frame, -1, EROFS, - flock, xdata); - } else { - STACK_UNWIND_STRICT (lk, frame, 0, 0, - flock, xdata); - } - } + STACK_UNWIND_STRICT(lk, frame, -1, EROFS, flock, xdata); + } else { + STACK_UNWIND_STRICT(lk, frame, 0, 0, flock, xdata); + } } + } - ret = 0; + ret = 0; out: - return ret; + return ret; } /* @@ -596,154 +570,146 @@ out: * function so as to support queues. */ int32_t -jbr_perform_lk_on_leader (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct gf_flock *flock, - dict_t *xdata) +jbr_perform_lk_on_leader(call_frame_t *frame, xlator_t *this, fd_t *fd, + int32_t cmd, struct gf_flock *flock, dict_t *xdata) { - int32_t ret = -1; + int32_t ret = -1; - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - GF_VALIDATE_OR_GOTO (this->name, flock, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); + GF_VALIDATE_OR_GOTO("jbr", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + GF_VALIDATE_OR_GOTO(this->name, flock, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); - STACK_WIND (frame, jbr_lk_complete, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk, - fd, cmd, flock, xdata); + STACK_WIND(frame, jbr_lk_complete, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lk, fd, cmd, flock, xdata); - ret = 0; + ret = 0; out: - return ret; + return ret; } int32_t -jbr_lk_perform_local_op (call_frame_t *frame, xlator_t *this, int *op_errno, - fd_t *fd, int32_t cmd, struct gf_flock *flock, - dict_t *xdata) +jbr_lk_perform_local_op(call_frame_t *frame, xlator_t *this, int *op_errno, + fd_t *fd, int32_t cmd, struct gf_flock *flock, + dict_t *xdata) { - int32_t ret = -1; - jbr_local_t *local = NULL; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, op_errno, out); - GF_VALIDATE_OR_GOTO (this->name, flock, out); - - /* - * Check if the fop is a locking fop or unlocking fop, and - * handle it accordingly. If it is a locking fop, take the - * lock on leader first, and then send it to the followers. - * If it is a unlocking fop, unlock the followers first, - * and then on meeting quorum perform the unlock on the leader. - */ - if (flock->l_type == F_UNLCK) { - ret = jbr_lk_call_dispatch (frame, this, op_errno, - fd, cmd, flock, xdata); - if (ret) - goto out; - } else { - jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode); - - if (!ictx) { - *op_errno = EIO; - goto out; - } - - LOCK(&ictx->lock); - if (ictx->active) { - gf_msg_debug (this->name, 0, - "queuing request due to conflict"); - - local->qstub = fop_lk_stub (frame, - jbr_perform_lk_on_leader, - fd, cmd, flock, xdata); - if (!local->qstub) { - UNLOCK(&ictx->lock); - goto out; - } - list_add_tail(&local->qlinks, &ictx->pqueue); - ++(ictx->pending); - UNLOCK(&ictx->lock); - ret = 0; - goto out; - } else { - list_add_tail(&local->qlinks, &ictx->aqueue); - ++(ictx->active); - } + int32_t ret = -1; + jbr_local_t *local = NULL; + + GF_VALIDATE_OR_GOTO("jbr", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, local, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, op_errno, out); + GF_VALIDATE_OR_GOTO(this->name, flock, out); + + /* + * Check if the fop is a locking fop or unlocking fop, and + * handle it accordingly. If it is a locking fop, take the + * lock on leader first, and then send it to the followers. + * If it is a unlocking fop, unlock the followers first, + * and then on meeting quorum perform the unlock on the leader. + */ + if (flock->l_type == F_UNLCK) { + ret = jbr_lk_call_dispatch(frame, this, op_errno, fd, cmd, flock, + xdata); + if (ret) + goto out; + } else { + jbr_inode_ctx_t *ictx = jbr_get_inode_ctx(this, fd->inode); + + if (!ictx) { + *op_errno = EIO; + goto out; + } + + LOCK(&ictx->lock); + if (ictx->active) { + gf_msg_debug(this->name, 0, "queuing request due to conflict"); + + local->qstub = fop_lk_stub(frame, jbr_perform_lk_on_leader, fd, cmd, + flock, xdata); + if (!local->qstub) { UNLOCK(&ictx->lock); - ret = jbr_perform_lk_on_leader (frame, this, fd, cmd, - flock, xdata); - if (ret == -1) - goto out; + goto out; + } + list_add_tail(&local->qlinks, &ictx->pqueue); + ++(ictx->pending); + UNLOCK(&ictx->lock); + ret = 0; + goto out; + } else { + list_add_tail(&local->qlinks, &ictx->aqueue); + ++(ictx->active); } + UNLOCK(&ictx->lock); + ret = jbr_perform_lk_on_leader(frame, this, fd, cmd, flock, xdata); + if (ret == -1) + goto out; + } - ret = 0; + ret = 0; out: - return ret; + return ret; } int32_t -jbr_lk_continue (call_frame_t *frame, xlator_t *this, - fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata) +jbr_lk_continue(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, + struct gf_flock *flock, dict_t *xdata) { - int32_t ret = -1; - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; - - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - priv = this->private; - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, priv, out); - GF_VALIDATE_OR_GOTO (this->name, local, out); - GF_VALIDATE_OR_GOTO (this->name, flock, out); - GF_VALIDATE_OR_GOTO (this->name, fd, out); - GF_VALIDATE_OR_GOTO (this->name, xdata, out); - + int32_t ret = -1; + jbr_local_t *local = NULL; + jbr_private_t *priv = NULL; + + GF_VALIDATE_OR_GOTO("jbr", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + priv = this->private; + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, priv, out); + GF_VALIDATE_OR_GOTO(this->name, local, out); + GF_VALIDATE_OR_GOTO(this->name, flock, out); + GF_VALIDATE_OR_GOTO(this->name, fd, out); + GF_VALIDATE_OR_GOTO(this->name, xdata, out); + + /* + * If it's a locking fop, then call dispatch to followers * + * If it's a unlock fop, then perform the unlock operation * + */ + if (flock->l_type == F_UNLCK) { + STACK_WIND(frame, jbr_lk_complete, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->lk, fd, cmd, flock, xdata); + } else { /* - * If it's a locking fop, then call dispatch to followers * - * If it's a unlock fop, then perform the unlock operation * + * Directly call jbr_lk_dispatch instead of appending * + * in queue, which is done at jbr_lk_perform_local_op * + * for locking fops * */ - if (flock->l_type == F_UNLCK) { - STACK_WIND (frame, jbr_lk_complete, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk, - fd, cmd, flock, xdata); - } else { - /* - * Directly call jbr_lk_dispatch instead of appending * - * in queue, which is done at jbr_lk_perform_local_op * - * for locking fops * - */ - ret = jbr_lk_dispatch (frame, this, fd, cmd, - flock, xdata); - if (ret) { - STACK_UNWIND_STRICT (lk, frame, -1, 0, - flock, xdata); - goto out; - } + ret = jbr_lk_dispatch(frame, this, fd, cmd, flock, xdata); + if (ret) { + STACK_UNWIND_STRICT(lk, frame, -1, 0, flock, xdata); + goto out; } + } - ret = 0; + ret = 0; out: - return ret; + return ret; } uint8_t -jbr_count_up_kids (jbr_private_t *priv) +jbr_count_up_kids(jbr_private_t *priv) { - uint8_t retval = 0; - uint8_t i; + uint8_t retval = 0; + uint8_t i; - for (i = 0; i < priv->n_children; ++i) { - if (priv->kid_state & (1 << i)) { - ++retval; - } + for (i = 0; i < priv->n_children; ++i) { + if (priv->kid_state & (1 << i)) { + ++retval; } + } - return retval; + return retval; } /* @@ -758,706 +724,673 @@ jbr_count_up_kids (jbr_private_t *priv) */ int32_t -jbr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +jbr_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, + int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf, + dict_t *xdata) { - jbr_local_t *local = frame->local; - gf_boolean_t unwind; - - LOCK(&frame->lock); - unwind = !--(local->call_count); - UNLOCK(&frame->lock); - - if (unwind) { - STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, - postbuf, xdata); - } - return 0; + jbr_local_t *local = frame->local; + gf_boolean_t unwind; + + LOCK(&frame->lock); + unwind = !--(local->call_count); + UNLOCK(&frame->lock); + + if (unwind) { + STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, + xdata); + } + return 0; } int32_t -jbr_fsync_local_cbk (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, struct iatt *prebuf, - struct iatt *postbuf, dict_t *xdata) +jbr_fsync_local_cbk(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, struct iatt *prebuf, + struct iatt *postbuf, dict_t *xdata) { - jbr_dirty_list_t *dirty; - jbr_dirty_list_t *dtmp; - jbr_local_t *local = frame->local; - - list_for_each_entry_safe (dirty, dtmp, &local->qlinks, links) { - gf_msg_trace (this->name, 0, - "sending post-op on %p (%p)", local->fd, dirty); - GF_FREE(dirty); - } - - return jbr_fsync_cbk (frame, cookie, this, op_ret, op_errno, - prebuf, postbuf, xdata); + jbr_dirty_list_t *dirty; + jbr_dirty_list_t *dtmp; + jbr_local_t *local = frame->local; + + list_for_each_entry_safe(dirty, dtmp, &local->qlinks, links) + { + gf_msg_trace(this->name, 0, "sending post-op on %p (%p)", local->fd, + dirty); + GF_FREE(dirty); + } + + return jbr_fsync_cbk(frame, cookie, this, op_ret, op_errno, prebuf, postbuf, + xdata); } int32_t -jbr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, - dict_t *xdata) +jbr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, + dict_t *xdata) { - jbr_private_t *priv = this->private; - jbr_local_t *local; - uint64_t ctx_int = 0LL; - jbr_fd_ctx_t *ctx_ptr; - xlator_list_t *trav; - - local = mem_get0(this->local_pool); - if (!local) { - STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, - NULL, NULL, xdata); - return 0; - } - INIT_LIST_HEAD(&local->qlinks); - frame->local = local; - - /* Move the dirty list from the fd to the fsync request. */ - LOCK(&fd->lock); - if (__fd_ctx_get(fd, this, &ctx_int) == 0) { - ctx_ptr = (jbr_fd_ctx_t *)(long)ctx_int; - list_splice_init (&ctx_ptr->dirty_list, - &local->qlinks); - } - UNLOCK(&fd->lock); - - /* Issue the local call. */ - local->call_count = priv->leader ? priv->n_children : 1; - STACK_WIND (frame, jbr_fsync_local_cbk, - FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync, - fd, flags, xdata); - - /* Issue remote calls if we're the leader. */ - if (priv->leader) { - for (trav = this->children->next; trav; trav = trav->next) { - STACK_WIND (frame, jbr_fsync_cbk, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->fsync, - fd, flags, xdata); - } - } - + jbr_private_t *priv = this->private; + jbr_local_t *local; + uint64_t ctx_int = 0LL; + jbr_fd_ctx_t *ctx_ptr; + xlator_list_t *trav; + + local = mem_get0(this->local_pool); + if (!local) { + STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, NULL, NULL, xdata); return 0; + } + INIT_LIST_HEAD(&local->qlinks); + frame->local = local; + + /* Move the dirty list from the fd to the fsync request. */ + LOCK(&fd->lock); + if (__fd_ctx_get(fd, this, &ctx_int) == 0) { + ctx_ptr = (jbr_fd_ctx_t *)(long)ctx_int; + list_splice_init(&ctx_ptr->dirty_list, &local->qlinks); + } + UNLOCK(&fd->lock); + + /* Issue the local call. */ + local->call_count = priv->leader ? priv->n_children : 1; + STACK_WIND(frame, jbr_fsync_local_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, flags, xdata); + + /* Issue remote calls if we're the leader. */ + if (priv->leader) { + for (trav = this->children->next; trav; trav = trav->next) { + STACK_WIND(frame, jbr_fsync_cbk, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->fsync, fd, flags, xdata); + } + } + + return 0; } int32_t -jbr_getxattr_special (call_frame_t *frame, xlator_t *this, loc_t *loc, - const char *name, dict_t *xdata) +jbr_getxattr_special(call_frame_t *frame, xlator_t *this, loc_t *loc, + const char *name, dict_t *xdata) { - dict_t *result; - jbr_private_t *priv = this->private; + dict_t *result; + jbr_private_t *priv = this->private; - if (!priv->leader) { - STACK_UNWIND_STRICT (getxattr, frame, -1, EREMOTE, NULL, NULL); - return 0; - } + if (!priv->leader) { + STACK_UNWIND_STRICT(getxattr, frame, -1, EREMOTE, NULL, NULL); + return 0; + } - if (!name || (strcmp(name, JBR_REP_COUNT_XATTR) != 0)) { - STACK_WIND_TAIL (frame, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->getxattr, - loc, name, xdata); - return 0; - } + if (!name || (strcmp(name, JBR_REP_COUNT_XATTR) != 0)) { + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->getxattr, loc, name, xdata); + return 0; + } - result = dict_new(); - if (!result) { - goto dn_failed; - } + result = dict_new(); + if (!result) { + goto dn_failed; + } - priv->up_children = jbr_count_up_kids(this->private); - if (dict_set_uint32(result, JBR_REP_COUNT_XATTR, - priv->up_children) != 0) { - goto dsu_failed; - } + priv->up_children = jbr_count_up_kids(this->private); + if (dict_set_uint32(result, JBR_REP_COUNT_XATTR, priv->up_children) != 0) { + goto dsu_failed; + } - STACK_UNWIND_STRICT (getxattr, frame, 0, 0, result, NULL); - dict_unref(result); - return 0; + STACK_UNWIND_STRICT(getxattr, frame, 0, 0, result, NULL); + dict_unref(result); + return 0; dsu_failed: - dict_unref(result); + dict_unref(result); dn_failed: - STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL); - return 0; + STACK_UNWIND_STRICT(getxattr, frame, -1, ENOMEM, NULL, NULL); + return 0; } void -jbr_flush_fd (xlator_t *this, jbr_fd_ctx_t *fd_ctx) +jbr_flush_fd(xlator_t *this, jbr_fd_ctx_t *fd_ctx) { - jbr_dirty_list_t *dirty; - jbr_dirty_list_t *dtmp; + jbr_dirty_list_t *dirty; + jbr_dirty_list_t *dtmp; - list_for_each_entry_safe (dirty, dtmp, &fd_ctx->dirty_list, links) { - gf_msg_trace (this->name, 0, - "sending post-op on %p (%p)", fd_ctx->fd, dirty); - GF_FREE(dirty); - } + list_for_each_entry_safe(dirty, dtmp, &fd_ctx->dirty_list, links) + { + gf_msg_trace(this->name, 0, "sending post-op on %p (%p)", fd_ctx->fd, + dirty); + GF_FREE(dirty); + } - INIT_LIST_HEAD(&fd_ctx->dirty_list); + INIT_LIST_HEAD(&fd_ctx->dirty_list); } void * -jbr_flush_thread (void *ctx) +jbr_flush_thread(void *ctx) { - xlator_t *this = ctx; - jbr_private_t *priv = this->private; - struct list_head dirty_fds; - jbr_fd_ctx_t *fd_ctx; - jbr_fd_ctx_t *fd_tmp; - int ret; - - for (;;) { - /* - * We have to be very careful to avoid lock inversions here, so - * we can't just hold priv->dirty_lock while we take and - * release locks for each fd. Instead, we only hold dirty_lock - * at the beginning of each iteration, as we (effectively) make - * a copy of the current list head and then clear the original. - * This leads to four scenarios for adding the first entry to - * an fd and potentially putting it on the global list. - * - * (1) While we're asleep. No lock contention, it just gets - * added and will be processed on the next iteration. - * - * (2) After we've made a local copy, but before we've started - * processing that fd. The new entry will be added to the - * fd (under its lock), and we'll process it on the current - * iteration. - * - * (3) While we're processing the fd. They'll block on the fd - * lock, then see that the list is empty and put it on the - * global list. We'll process it here on the next - * iteration. - * - * (4) While we're working, but after we've processed that fd. - * Same as (1) as far as that fd is concerned. - */ - INIT_LIST_HEAD(&dirty_fds); - LOCK(&priv->dirty_lock); - list_splice_init(&priv->dirty_fds, &dirty_fds); - UNLOCK(&priv->dirty_lock); - - list_for_each_entry_safe (fd_ctx, fd_tmp, &dirty_fds, fd_list) { - ret = syncop_fsync(FIRST_CHILD(this), fd_ctx->fd, 0, - NULL, NULL, NULL, NULL); - if (ret) { - gf_msg (this->name, GF_LOG_WARNING, 0, - J_MSG_SYS_CALL_FAILURE, - "failed to fsync %p (%d)", - fd_ctx->fd, -ret); - } - - LOCK(&fd_ctx->fd->lock); - jbr_flush_fd(this, fd_ctx); - list_del_init(&fd_ctx->fd_list); - UNLOCK(&fd_ctx->fd->lock); - fd_unref(fd_ctx->fd); - } - - sleep(JBR_FLUSH_INTERVAL); - } - - return NULL; + xlator_t *this = ctx; + jbr_private_t *priv = this->private; + struct list_head dirty_fds; + jbr_fd_ctx_t *fd_ctx; + jbr_fd_ctx_t *fd_tmp; + int ret; + + for (;;) { + /* + * We have to be very careful to avoid lock inversions here, so + * we can't just hold priv->dirty_lock while we take and + * release locks for each fd. Instead, we only hold dirty_lock + * at the beginning of each iteration, as we (effectively) make + * a copy of the current list head and then clear the original. + * This leads to four scenarios for adding the first entry to + * an fd and potentially putting it on the global list. + * + * (1) While we're asleep. No lock contention, it just gets + * added and will be processed on the next iteration. + * + * (2) After we've made a local copy, but before we've started + * processing that fd. The new entry will be added to the + * fd (under its lock), and we'll process it on the current + * iteration. + * + * (3) While we're processing the fd. They'll block on the fd + * lock, then see that the list is empty and put it on the + * global list. We'll process it here on the next + * iteration. + * + * (4) While we're working, but after we've processed that fd. + * Same as (1) as far as that fd is concerned. + */ + INIT_LIST_HEAD(&dirty_fds); + LOCK(&priv->dirty_lock); + list_splice_init(&priv->dirty_fds, &dirty_fds); + UNLOCK(&priv->dirty_lock); + + list_for_each_entry_safe(fd_ctx, fd_tmp, &dirty_fds, fd_list) + { + ret = syncop_fsync(FIRST_CHILD(this), fd_ctx->fd, 0, NULL, NULL, + NULL, NULL); + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, J_MSG_SYS_CALL_FAILURE, + "failed to fsync %p (%d)", fd_ctx->fd, -ret); + } + + LOCK(&fd_ctx->fd->lock); + jbr_flush_fd(this, fd_ctx); + list_del_init(&fd_ctx->fd_list); + UNLOCK(&fd_ctx->fd->lock); + fd_unref(fd_ctx->fd); + } + + sleep(JBR_FLUSH_INTERVAL); + } + + return NULL; } - int32_t -jbr_get_changelog_dir (xlator_t *this, char **cl_dir_p) +jbr_get_changelog_dir(xlator_t *this, char **cl_dir_p) { - xlator_t *cl_xl; - - /* Find our changelog translator. */ - cl_xl = this; - while (cl_xl) { - if (strcmp(cl_xl->type, "features/changelog") == 0) { - break; - } - cl_xl = cl_xl->children->xlator; - } - if (!cl_xl) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_INIT_FAIL, - "failed to find changelog translator"); - return ENOENT; - } - - /* Find the actual changelog directory. */ - if (dict_get_str(cl_xl->options, "changelog-dir", cl_dir_p) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_INIT_FAIL, - "failed to find changelog-dir for %s", cl_xl->name); - return ENODATA; - } - - return 0; + xlator_t *cl_xl; + + /* Find our changelog translator. */ + cl_xl = this; + while (cl_xl) { + if (strcmp(cl_xl->type, "features/changelog") == 0) { + break; + } + cl_xl = cl_xl->children->xlator; + } + if (!cl_xl) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_INIT_FAIL, + "failed to find changelog translator"); + return ENOENT; + } + + /* Find the actual changelog directory. */ + if (dict_get_str(cl_xl->options, "changelog-dir", cl_dir_p) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_INIT_FAIL, + "failed to find changelog-dir for %s", cl_xl->name); + return ENODATA; + } + + return 0; } - void -jbr_get_terms (call_frame_t *frame, xlator_t *this) +jbr_get_terms(call_frame_t *frame, xlator_t *this) { - int32_t op_errno = 0; - char *cl_dir = NULL; - int32_t term_first = -1; - int32_t term_contig = -1; - int32_t term_last = -1; - int term_num = 0; - char *probe_str = NULL; - dict_t *my_xdata = NULL; - DIR *fp = NULL; - struct dirent *entry = NULL; - struct dirent scratch[2] = {{0,},}; - - op_errno = jbr_get_changelog_dir(this, &cl_dir); - if (op_errno) { - goto err; /* Error was already logged. */ - } - op_errno = ENODATA; /* Most common error after this. */ - - fp = sys_opendir (cl_dir); - if (!fp) { + int32_t op_errno = 0; + char *cl_dir = NULL; + int32_t term_first = -1; + int32_t term_contig = -1; + int32_t term_last = -1; + int term_num = 0; + char *probe_str = NULL; + dict_t *my_xdata = NULL; + DIR *fp = NULL; + struct dirent *entry = NULL; + struct dirent scratch[2] = { + { + 0, + }, + }; + + op_errno = jbr_get_changelog_dir(this, &cl_dir); + if (op_errno) { + goto err; /* Error was already logged. */ + } + op_errno = ENODATA; /* Most common error after this. */ + + fp = sys_opendir(cl_dir); + if (!fp) { + op_errno = errno; + goto err; + } + + /* Find first and last terms. */ + for (;;) { + errno = 0; + entry = sys_readdir(fp, scratch); + if (!entry || errno != 0) { + if (errno != 0) { op_errno = errno; goto err; - } - - /* Find first and last terms. */ - for (;;) { - errno = 0; - entry = sys_readdir (fp, scratch); - if (!entry || errno != 0) { - if (errno != 0) { - op_errno = errno; - goto err; - } - break; - } - - if (fnmatch("TERM.*", entry->d_name, FNM_PATHNAME) != 0) { - continue; - } - /* +5 points to the character after the period */ - term_num = atoi(entry->d_name+5); - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, - "%s => %d", entry->d_name, term_num); - if (term_num < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_INVALID, - "invalid term file name %s", entry->d_name); - op_errno = EINVAL; - goto err; - } - if ((term_first < 0) || (term_first > term_num)) { - term_first = term_num; - } - if ((term_last < 0) || (term_last < term_num)) { - term_last = term_num; - } - } - if ((term_first < 0) || (term_last < 0)) { - /* TBD: are we *sure* there should always be at least one? */ - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_NO_DATA, "no terms found"); - op_errno = EINVAL; - goto err; - } - - (void) sys_closedir (fp); - fp = NULL; - - /* - * Find term_contig, which is the earliest term for which there are - * no gaps between it and term_last. - */ - for (term_contig = term_last; term_contig > 0; --term_contig) { - if (gf_asprintf(&probe_str, "%s/TERM.%d", - cl_dir, term_contig-1) <= 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_MEM_ERR, - "failed to format term %d", term_contig-1); - goto err; - } - if (sys_access(probe_str, F_OK) != 0) { - GF_FREE(probe_str); - probe_str = NULL; - break; - } - GF_FREE(probe_str); - probe_str = NULL; - } - - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, - "found terms %d-%d (%d)", - term_first, term_last, term_contig); - - /* Return what we've found */ - my_xdata = dict_new(); - if (!my_xdata) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_MEM_ERR, - "failed to allocate reply dictionary"); - goto err; - } - if (dict_set_int32(my_xdata, "term-first", term_first) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, - "failed to set term-first"); - goto err; - } - if (dict_set_int32(my_xdata, "term-contig", term_contig) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, - "failed to set term-contig"); - goto err; - } - if (dict_set_int32(my_xdata, "term-last", term_last) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, - "failed to set term-last"); - goto err; - } - - /* Finally! */ - STACK_UNWIND_STRICT (ipc, frame, 0, 0, my_xdata); - dict_unref(my_xdata); - return; + } + break; + } + + if (fnmatch("TERM.*", entry->d_name, FNM_PATHNAME) != 0) { + continue; + } + /* +5 points to the character after the period */ + term_num = atoi(entry->d_name + 5); + gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, "%s => %d", + entry->d_name, term_num); + if (term_num < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_INVALID, + "invalid term file name %s", entry->d_name); + op_errno = EINVAL; + goto err; + } + if ((term_first < 0) || (term_first > term_num)) { + term_first = term_num; + } + if ((term_last < 0) || (term_last < term_num)) { + term_last = term_num; + } + } + if ((term_first < 0) || (term_last < 0)) { + /* TBD: are we *sure* there should always be at least one? */ + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA, "no terms found"); + op_errno = EINVAL; + goto err; + } + + (void)sys_closedir(fp); + fp = NULL; + + /* + * Find term_contig, which is the earliest term for which there are + * no gaps between it and term_last. + */ + for (term_contig = term_last; term_contig > 0; --term_contig) { + if (gf_asprintf(&probe_str, "%s/TERM.%d", cl_dir, term_contig - 1) <= + 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR, + "failed to format term %d", term_contig - 1); + goto err; + } + if (sys_access(probe_str, F_OK) != 0) { + GF_FREE(probe_str); + probe_str = NULL; + break; + } + GF_FREE(probe_str); + probe_str = NULL; + } + + gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, "found terms %d-%d (%d)", + term_first, term_last, term_contig); + + /* Return what we've found */ + my_xdata = dict_new(); + if (!my_xdata) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR, + "failed to allocate reply dictionary"); + goto err; + } + if (dict_set_int32(my_xdata, "term-first", term_first) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, + "failed to set term-first"); + goto err; + } + if (dict_set_int32(my_xdata, "term-contig", term_contig) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, + "failed to set term-contig"); + goto err; + } + if (dict_set_int32(my_xdata, "term-last", term_last) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, + "failed to set term-last"); + goto err; + } + + /* Finally! */ + STACK_UNWIND_STRICT(ipc, frame, 0, 0, my_xdata); + dict_unref(my_xdata); + return; err: - if (fp) { - (void) sys_closedir (fp); - } - if (my_xdata) { - dict_unref(my_xdata); - } + if (fp) { + (void)sys_closedir(fp); + } + if (my_xdata) { + dict_unref(my_xdata); + } - if (probe_str) - GF_FREE (probe_str); + if (probe_str) + GF_FREE(probe_str); - STACK_UNWIND_STRICT (ipc, frame, -1, op_errno, NULL); + STACK_UNWIND_STRICT(ipc, frame, -1, op_errno, NULL); } - long -get_entry_count (xlator_t *this, int fd) +get_entry_count(xlator_t *this, int fd) { - struct stat buf; - long min; /* last entry not known to be empty */ - long max; /* first entry known to be empty */ - long curr; - char entry[CHANGELOG_ENTRY_SIZE]; - - if (sys_fstat (fd, &buf) < 0) { - return -1; - } + struct stat buf; + long min; /* last entry not known to be empty */ + long max; /* first entry known to be empty */ + long curr; + char entry[CHANGELOG_ENTRY_SIZE]; - min = 0; - max = buf.st_size / CHANGELOG_ENTRY_SIZE; + if (sys_fstat(fd, &buf) < 0) { + return -1; + } - while ((min+1) < max) { - curr = (min + max) / 2; - if (sys_lseek(fd, curr*CHANGELOG_ENTRY_SIZE, SEEK_SET) < 0) { - return -1; - } - if (sys_read(fd, entry, sizeof(entry)) != sizeof(entry)) { - return -1; - } - if ((entry[0] == '_') && (entry[1] == 'P')) { - min = curr; - } else { - max = curr; - } - } + min = 0; + max = buf.st_size / CHANGELOG_ENTRY_SIZE; - if (sys_lseek(fd, 0, SEEK_SET) < 0) { - gf_msg (this->name, GF_LOG_WARNING, 0, - J_MSG_SYS_CALL_FAILURE, - "failed to reset offset"); + while ((min + 1) < max) { + curr = (min + max) / 2; + if (sys_lseek(fd, curr * CHANGELOG_ENTRY_SIZE, SEEK_SET) < 0) { + return -1; } - return max; -} - - -void -jbr_open_term (call_frame_t *frame, xlator_t *this, dict_t *xdata) -{ - int32_t op_errno; - char *cl_dir; - char *term; - char *path = NULL; - jbr_private_t *priv = this->private; - - op_errno = jbr_get_changelog_dir(this, &cl_dir); - if (op_errno) { - goto err; + if (sys_read(fd, entry, sizeof(entry)) != sizeof(entry)) { + return -1; } - - if (dict_get_str(xdata, "term", &term) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_NO_DATA, "missing term"); - op_errno = ENODATA; - goto err; - } - - if (gf_asprintf(&path, "%s/TERM.%s", cl_dir, term) < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_MEM_ERR, "failed to construct path"); - op_errno = ENOMEM; - goto err; + if ((entry[0] == '_') && (entry[1] == 'P')) { + min = curr; + } else { + max = curr; } + } - if (priv->term_fd >= 0) { - sys_close (priv->term_fd); - } - priv->term_fd = open(path, O_RDONLY); - if (priv->term_fd < 0) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_SYS_CALL_FAILURE, - "failed to open term file"); - goto err; - } + if (sys_lseek(fd, 0, SEEK_SET) < 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, J_MSG_SYS_CALL_FAILURE, + "failed to reset offset"); + } + return max; +} - priv->term_total = get_entry_count(this, priv->term_fd); - if (priv->term_total < 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_NO_DATA, "failed to get entry count"); - sys_close (priv->term_fd); - priv->term_fd = -1; - op_errno = EIO; - goto err; - } - priv->term_read = 0; +void +jbr_open_term(call_frame_t *frame, xlator_t *this, dict_t *xdata) +{ + int32_t op_errno; + char *cl_dir; + char *term; + char *path = NULL; + jbr_private_t *priv = this->private; + + op_errno = jbr_get_changelog_dir(this, &cl_dir); + if (op_errno) { + goto err; + } + + if (dict_get_str(xdata, "term", &term) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA, "missing term"); + op_errno = ENODATA; + goto err; + } + + if (gf_asprintf(&path, "%s/TERM.%s", cl_dir, term) < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR, + "failed to construct path"); + op_errno = ENOMEM; + goto err; + } + + if (priv->term_fd >= 0) { + sys_close(priv->term_fd); + } + priv->term_fd = open(path, O_RDONLY); + if (priv->term_fd < 0) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_SYS_CALL_FAILURE, + "failed to open term file"); + goto err; + } + + priv->term_total = get_entry_count(this, priv->term_fd); + if (priv->term_total < 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA, + "failed to get entry count"); + sys_close(priv->term_fd); + priv->term_fd = -1; + op_errno = EIO; + goto err; + } + priv->term_read = 0; - /* Success! */ - STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL); - GF_FREE (path); - return; + /* Success! */ + STACK_UNWIND_STRICT(ipc, frame, 0, 0, NULL); + GF_FREE(path); + return; err: - STACK_UNWIND_STRICT (ipc, frame, -1, op_errno, NULL); - GF_FREE (path); + STACK_UNWIND_STRICT(ipc, frame, -1, op_errno, NULL); + GF_FREE(path); } - void -jbr_next_entry (call_frame_t *frame, xlator_t *this) +jbr_next_entry(call_frame_t *frame, xlator_t *this) { - int32_t op_errno = ENOMEM; - jbr_private_t *priv = this->private; - ssize_t nbytes; - dict_t *my_xdata; - - if (priv->term_fd < 0) { - op_errno = EBADFD; - goto err; - } - - if (priv->term_read >= priv->term_total) { - op_errno = ENODATA; - goto err; - } - - nbytes = sys_read (priv->term_fd, priv->term_buf, CHANGELOG_ENTRY_SIZE); - if (nbytes < CHANGELOG_ENTRY_SIZE) { - if (nbytes < 0) { - op_errno = errno; - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_SYS_CALL_FAILURE, - "error reading next entry: %s", - strerror(errno)); - } else { - op_errno = EIO; - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_SYS_CALL_FAILURE, - "got %zd/%d bytes for next entry", - nbytes, CHANGELOG_ENTRY_SIZE); - } - goto err; - } - ++(priv->term_read); - - my_xdata = dict_new(); - if (!my_xdata) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_MEM_ERR, "failed to allocate reply xdata"); - goto err; - } - - if (dict_set_static_bin(my_xdata, "data", - priv->term_buf, CHANGELOG_ENTRY_SIZE) != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_DICT_FLR, "failed to assign reply xdata"); - goto err; - } - - STACK_UNWIND_STRICT (ipc, frame, 0, 0, my_xdata); - dict_unref(my_xdata); - return; + int32_t op_errno = ENOMEM; + jbr_private_t *priv = this->private; + ssize_t nbytes; + dict_t *my_xdata; + + if (priv->term_fd < 0) { + op_errno = EBADFD; + goto err; + } + + if (priv->term_read >= priv->term_total) { + op_errno = ENODATA; + goto err; + } + + nbytes = sys_read(priv->term_fd, priv->term_buf, CHANGELOG_ENTRY_SIZE); + if (nbytes < CHANGELOG_ENTRY_SIZE) { + if (nbytes < 0) { + op_errno = errno; + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_SYS_CALL_FAILURE, + "error reading next entry: %s", strerror(errno)); + } else { + op_errno = EIO; + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_SYS_CALL_FAILURE, + "got %zd/%d bytes for next entry", nbytes, + CHANGELOG_ENTRY_SIZE); + } + goto err; + } + ++(priv->term_read); + + my_xdata = dict_new(); + if (!my_xdata) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR, + "failed to allocate reply xdata"); + goto err; + } + + if (dict_set_static_bin(my_xdata, "data", priv->term_buf, + CHANGELOG_ENTRY_SIZE) != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_DICT_FLR, + "failed to assign reply xdata"); + goto err; + } + + STACK_UNWIND_STRICT(ipc, frame, 0, 0, my_xdata); + dict_unref(my_xdata); + return; err: - STACK_UNWIND_STRICT (ipc, frame, -1, op_errno, NULL); + STACK_UNWIND_STRICT(ipc, frame, -1, op_errno, NULL); } int32_t -jbr_ipc_fan_in (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, dict_t *xdata) +jbr_ipc_fan_in(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - jbr_local_t *local = NULL; - int32_t ret = -1; - uint8_t call_count; + jbr_local_t *local = NULL; + int32_t ret = -1; + uint8_t call_count; - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); + GF_VALIDATE_OR_GOTO("jbr", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, local, out); - gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n", - op_ret, op_errno); + gf_msg_trace(this->name, 0, "op_ret = %d, op_errno = %d\n", op_ret, + op_errno); - LOCK(&frame->lock); - call_count = --(local->call_count); - UNLOCK(&frame->lock); + LOCK(&frame->lock); + call_count = --(local->call_count); + UNLOCK(&frame->lock); - if (call_count == 0) { + if (call_count == 0) { #if defined(JBR_CG_QUEUE) - ret = jbr_remove_from_queue (frame, this); - if (ret) { - gf_msg (this->name, GF_LOG_ERROR, 0, - J_MSG_GENERIC, "Failed to remove from queue."); - } -#endif - /* - * Unrefing the reference taken in continue() or complete() * - */ - dict_unref (local->xdata); - STACK_DESTROY (frame->root); + ret = jbr_remove_from_queue(frame, this); + if (ret) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_GENERIC, + "Failed to remove from queue."); } +#endif + /* + * Unrefing the reference taken in continue() or complete() * + */ + dict_unref(local->xdata); + STACK_DESTROY(frame->root); + } - ret = 0; + ret = 0; out: - return ret; + return ret; } int32_t -jbr_ipc_complete (call_frame_t *frame, void *cookie, xlator_t *this, - int32_t op_ret, int32_t op_errno, - dict_t *xdata) +jbr_ipc_complete(call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) { - jbr_local_t *local = NULL; + jbr_local_t *local = NULL; - GF_VALIDATE_OR_GOTO ("jbr", this, out); - GF_VALIDATE_OR_GOTO (this->name, frame, out); - local = frame->local; - GF_VALIDATE_OR_GOTO (this->name, local, out); + GF_VALIDATE_OR_GOTO("jbr", this, out); + GF_VALIDATE_OR_GOTO(this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO(this->name, local, out); - jbr_ipc_call_dispatch (frame, - this, &op_errno, - FDL_IPC_JBR_SERVER_ROLLBACK, - local->xdata); + jbr_ipc_call_dispatch(frame, this, &op_errno, FDL_IPC_JBR_SERVER_ROLLBACK, + local->xdata); out: - return 0; + return 0; } int32_t -jbr_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +jbr_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) { - switch (op) { + switch (op) { case JBR_SERVER_TERM_RANGE: - jbr_get_terms(frame, this); - break; + jbr_get_terms(frame, this); + break; case JBR_SERVER_OPEN_TERM: - jbr_open_term(frame, this, xdata); - break; + jbr_open_term(frame, this, xdata); + break; case JBR_SERVER_NEXT_ENTRY: - jbr_next_entry(frame, this); - break; + jbr_next_entry(frame, this); + break; case FDL_IPC_JBR_SERVER_ROLLBACK: - /* - * Just send the fop down to fdl. Need not * - * dispatch it to other bricks in the sub- * - * volume, as it will be done where the op * - * has failed. * - */ + /* + * Just send the fop down to fdl. Need not * + * dispatch it to other bricks in the sub- * + * volume, as it will be done where the op * + * has failed. * + */ default: - STACK_WIND_TAIL (frame, - FIRST_CHILD(this), - FIRST_CHILD(this)->fops->ipc, - op, xdata); - } + STACK_WIND_TAIL(frame, FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, op, xdata); + } - return 0; + return 0; } #pragma generate int32_t -jbr_forget (xlator_t *this, inode_t *inode) +jbr_forget(xlator_t *this, inode_t *inode) { - uint64_t ctx = 0LL; + uint64_t ctx = 0LL; - if ((inode_ctx_del(inode, this, &ctx) == 0) && ctx) { - GF_FREE((void *)(long)ctx); - } + if ((inode_ctx_del(inode, this, &ctx) == 0) && ctx) { + GF_FREE((void *)(long)ctx); + } - return 0; + return 0; } int32_t -jbr_release (xlator_t *this, fd_t *fd) +jbr_release(xlator_t *this, fd_t *fd) { - uint64_t ctx = 0LL; + uint64_t ctx = 0LL; - if ((fd_ctx_del(fd, this, &ctx) == 0) && ctx) { - GF_FREE((void *)(long)ctx); - } + if ((fd_ctx_del(fd, this, &ctx) == 0) && ctx) { + GF_FREE((void *)(long)ctx); + } - return 0; + return 0; } struct xlator_cbks cbks = { - .forget = jbr_forget, - .release = jbr_release, + .forget = jbr_forget, + .release = jbr_release, }; int -jbr_reconfigure (xlator_t *this, dict_t *options) +jbr_reconfigure(xlator_t *this, dict_t *options) { - jbr_private_t *priv = this->private; + jbr_private_t *priv = this->private; - GF_OPTION_RECONF ("leader", - priv->config_leader, options, bool, err); - GF_OPTION_RECONF ("quorum-percent", - priv->quorum_pct, options, percent, err); - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, - "reconfigure called, config_leader = %d, quorum_pct = %.1f\n", - priv->leader, priv->quorum_pct); + GF_OPTION_RECONF("leader", priv->config_leader, options, bool, err); + GF_OPTION_RECONF("quorum-percent", priv->quorum_pct, options, percent, err); + gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, + "reconfigure called, config_leader = %d, quorum_pct = %.1f\n", + priv->leader, priv->quorum_pct); - priv->leader = priv->config_leader; + priv->leader = priv->config_leader; - return 0; + return 0; err: - return -1; + return -1; } int -jbr_get_child_index (xlator_t *this, xlator_t *kid) +jbr_get_child_index(xlator_t *this, xlator_t *kid) { - xlator_list_t *trav; - int retval = -1; + xlator_list_t *trav; + int retval = -1; - for (trav = this->children; trav; trav = trav->next) { - ++retval; - if (trav->xlator == kid) { - return retval; - } + for (trav = this->children; trav; trav = trav->next) { + ++retval; + if (trav->xlator == kid) { + return retval; } + } - return -1; + return -1; } /* @@ -1469,289 +1402,275 @@ jbr_get_child_index (xlator_t *this, xlator_t *kid) * then generate counts on demand. */ int -jbr_notify (xlator_t *this, int event, void *data, ...) +jbr_notify(xlator_t *this, int event, void *data, ...) { - jbr_private_t *priv = this->private; - int index = -1; - int ret = -1; - gf_boolean_t result = _gf_false; - gf_boolean_t relevant = _gf_false; + jbr_private_t *priv = this->private; + int index = -1; + int ret = -1; + gf_boolean_t result = _gf_false; + gf_boolean_t relevant = _gf_false; - switch (event) { + switch (event) { case GF_EVENT_CHILD_UP: - index = jbr_get_child_index(this, data); - if (index >= 0) { - /* Check if the child was previously down - * and it's not a false CHILD_UP - */ - if (!(priv->kid_state & (1 << index))) { - relevant = _gf_true; - } - - priv->kid_state |= (1 << index); - priv->up_children = jbr_count_up_kids(priv); - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, - "got CHILD_UP for %s, now %u kids", - ((xlator_t *)data)->name, - priv->up_children); - if (!priv->config_leader && (priv->up_children > 1)) { - priv->leader = _gf_false; - } - - /* If it's not relevant, or we have already * - * sent CHILD_UP just break */ - if (!relevant || priv->child_up) - break; - - /* If it's not a leader, just send the notify up */ - if (!priv->leader) { - ret = default_notify(this, event, data); - if (!ret) - priv->child_up = _gf_true; - break; - } - - result = fop_quorum_check (this, - (double)(priv->n_children - 1), - (double)(priv->up_children - 1)); - if (result == _gf_false) { - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, "Not enough children " - "are up to meet quorum. Waiting to " - "send CHILD_UP from leader"); - } else { - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, "Enough children are up " - "to meet quorum. Sending CHILD_UP " - "from leader"); - ret = default_notify(this, event, data); - if (!ret) - priv->child_up = _gf_true; - } + index = jbr_get_child_index(this, data); + if (index >= 0) { + /* Check if the child was previously down + * and it's not a false CHILD_UP + */ + if (!(priv->kid_state & (1 << index))) { + relevant = _gf_true; + } + + priv->kid_state |= (1 << index); + priv->up_children = jbr_count_up_kids(priv); + gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, + "got CHILD_UP for %s, now %u kids", + ((xlator_t *)data)->name, priv->up_children); + if (!priv->config_leader && (priv->up_children > 1)) { + priv->leader = _gf_false; + } + + /* If it's not relevant, or we have already * + * sent CHILD_UP just break */ + if (!relevant || priv->child_up) + break; + + /* If it's not a leader, just send the notify up */ + if (!priv->leader) { + ret = default_notify(this, event, data); + if (!ret) + priv->child_up = _gf_true; + break; } - break; + + result = fop_quorum_check(this, (double)(priv->n_children - 1), + (double)(priv->up_children - 1)); + if (result == _gf_false) { + gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, + "Not enough children " + "are up to meet quorum. Waiting to " + "send CHILD_UP from leader"); + } else { + gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, + "Enough children are up " + "to meet quorum. Sending CHILD_UP " + "from leader"); + ret = default_notify(this, event, data); + if (!ret) + priv->child_up = _gf_true; + } + } + break; case GF_EVENT_CHILD_DOWN: - index = jbr_get_child_index(this, data); - if (index >= 0) { - /* Check if the child was previously up - * and it's not a false CHILD_DOWN - */ - if (priv->kid_state & (1 << index)) { - relevant = _gf_true; - } - priv->kid_state &= ~(1 << index); - priv->up_children = jbr_count_up_kids(priv); - gf_msg (this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, - "got CHILD_DOWN for %s, now %u kids", - ((xlator_t *)data)->name, - priv->up_children); - if (!priv->config_leader && (priv->up_children < 2) - && relevant) { - priv->leader = _gf_true; - } - - /* If it's not relevant, or we have already * - * sent CHILD_DOWN just break */ - if (!relevant || !priv->child_up) - break; - - /* If it's not a leader, just break coz we shouldn't * - * propagate the failure from the failure till it * - * itself goes down * - */ - if (!priv->leader) { - break; - } - - result = fop_quorum_check (this, - (double)(priv->n_children - 1), - (double)(priv->up_children - 1)); - if (result == _gf_false) { - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, "Enough children are " - "to down to fail quorum. " - "Sending CHILD_DOWN from leader"); - ret = default_notify(this, event, data); - if (!ret) - priv->child_up = _gf_false; - } else { - gf_msg (this->name, GF_LOG_INFO, 0, - J_MSG_GENERIC, "Not enough children " - "are down to fail quorum. Waiting to " - "send CHILD_DOWN from leader"); - } + index = jbr_get_child_index(this, data); + if (index >= 0) { + /* Check if the child was previously up + * and it's not a false CHILD_DOWN + */ + if (priv->kid_state & (1 << index)) { + relevant = _gf_true; + } + priv->kid_state &= ~(1 << index); + priv->up_children = jbr_count_up_kids(priv); + gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, + "got CHILD_DOWN for %s, now %u kids", + ((xlator_t *)data)->name, priv->up_children); + if (!priv->config_leader && (priv->up_children < 2) && + relevant) { + priv->leader = _gf_true; } - break; + + /* If it's not relevant, or we have already * + * sent CHILD_DOWN just break */ + if (!relevant || !priv->child_up) + break; + + /* If it's not a leader, just break coz we shouldn't * + * propagate the failure from the failure till it * + * itself goes down * + */ + if (!priv->leader) { + break; + } + + result = fop_quorum_check(this, (double)(priv->n_children - 1), + (double)(priv->up_children - 1)); + if (result == _gf_false) { + gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, + "Enough children are " + "to down to fail quorum. " + "Sending CHILD_DOWN from leader"); + ret = default_notify(this, event, data); + if (!ret) + priv->child_up = _gf_false; + } else { + gf_msg(this->name, GF_LOG_INFO, 0, J_MSG_GENERIC, + "Not enough children " + "are down to fail quorum. Waiting to " + "send CHILD_DOWN from leader"); + } + } + break; default: - ret = default_notify(this, event, data); - } + ret = default_notify(this, event, data); + } - return ret; + return ret; } - int32_t -mem_acct_init (xlator_t *this) +mem_acct_init(xlator_t *this) { - int ret = -1; + int ret = -1; - GF_VALIDATE_OR_GOTO ("jbr", this, out); + GF_VALIDATE_OR_GOTO("jbr", this, out); - ret = xlator_mem_acct_init (this, gf_mt_jbr_end + 1); + ret = xlator_mem_acct_init(this, gf_mt_jbr_end + 1); - if (ret != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR, - "Memory accounting init" "failed"); - return ret; - } -out: + if (ret != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR, + "Memory accounting init" + "failed"); return ret; + } +out: + return ret; } - void -jbr_deallocate_priv (jbr_private_t *priv) +jbr_deallocate_priv(jbr_private_t *priv) { - if (!priv) { - return; - } + if (!priv) { + return; + } - GF_FREE(priv); + GF_FREE(priv); } - int32_t -jbr_init (xlator_t *this) +jbr_init(xlator_t *this) { - xlator_list_t *remote; - xlator_list_t *local; - jbr_private_t *priv = NULL; - xlator_list_t *trav; - pthread_t kid; - extern xlator_t global_xlator; - glusterfs_ctx_t *oldctx = global_xlator.ctx; - - /* - * Any fop that gets special treatment has to be patched in here, - * because the compiled-in table is produced by the code generator and - * only contains generated functions. Note that we have to go through - * this->fops because of some dynamic-linking strangeness; modifying - * the static table doesn't work. - */ - this->fops->getxattr = jbr_getxattr_special; - this->fops->fsync = jbr_fsync; - - local = this->children; - if (!local) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA, - "no local subvolume"); - goto err; - } - - remote = local->next; - if (!remote) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA, - "no remote subvolumes"); - goto err; - } - - this->local_pool = mem_pool_new (jbr_local_t, 128); - if (!this->local_pool) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR, - "failed to create jbr_local_t pool"); - goto err; - } - - priv = GF_CALLOC (1, sizeof(*priv), gf_mt_jbr_private_t); - if (!priv) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR, - "could not allocate priv"); - goto err; - } - - for (trav = this->children; trav; trav = trav->next) { - ++(priv->n_children); - } - - LOCK_INIT(&priv->dirty_lock); - LOCK_INIT(&priv->index_lock); - INIT_LIST_HEAD(&priv->dirty_fds); - priv->term_fd = -1; - - this->private = priv; - - GF_OPTION_INIT ("leader", priv->config_leader, bool, err); - GF_OPTION_INIT ("quorum-percent", priv->quorum_pct, percent, err); - - priv->leader = priv->config_leader; - priv->child_up = _gf_false; - - if (gf_thread_create (&kid, NULL, jbr_flush_thread, this, - "jbrflush") != 0) { - gf_msg (this->name, GF_LOG_ERROR, 0, J_MSG_SYS_CALL_FAILURE, - "could not start flush thread"); - /* TBD: treat this as a fatal error? */ - } - - /* - * Calling glfs_new changes old->ctx, even if THIS still points - * to global_xlator. That causes problems later in the main - * thread, when gf_log_dump_graph tries to use the FILE after - * we've mucked with it and gets a segfault in __fprintf_chk. - * We can avoid all that by undoing the damage before we - * continue. - */ - global_xlator.ctx = oldctx; - - return 0; + xlator_list_t *remote; + xlator_list_t *local; + jbr_private_t *priv = NULL; + xlator_list_t *trav; + pthread_t kid; + extern xlator_t global_xlator; + glusterfs_ctx_t *oldctx = global_xlator.ctx; + + /* + * Any fop that gets special treatment has to be patched in here, + * because the compiled-in table is produced by the code generator and + * only contains generated functions. Note that we have to go through + * this->fops because of some dynamic-linking strangeness; modifying + * the static table doesn't work. + */ + this->fops->getxattr = jbr_getxattr_special; + this->fops->fsync = jbr_fsync; + + local = this->children; + if (!local) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA, + "no local subvolume"); + goto err; + } + + remote = local->next; + if (!remote) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_NO_DATA, + "no remote subvolumes"); + goto err; + } + + this->local_pool = mem_pool_new(jbr_local_t, 128); + if (!this->local_pool) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR, + "failed to create jbr_local_t pool"); + goto err; + } + + priv = GF_CALLOC(1, sizeof(*priv), gf_mt_jbr_private_t); + if (!priv) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_MEM_ERR, + "could not allocate priv"); + goto err; + } + + for (trav = this->children; trav; trav = trav->next) { + ++(priv->n_children); + } + + LOCK_INIT(&priv->dirty_lock); + LOCK_INIT(&priv->index_lock); + INIT_LIST_HEAD(&priv->dirty_fds); + priv->term_fd = -1; + + this->private = priv; + + GF_OPTION_INIT("leader", priv->config_leader, bool, err); + GF_OPTION_INIT("quorum-percent", priv->quorum_pct, percent, err); + + priv->leader = priv->config_leader; + priv->child_up = _gf_false; + + if (gf_thread_create(&kid, NULL, jbr_flush_thread, this, "jbrflush") != 0) { + gf_msg(this->name, GF_LOG_ERROR, 0, J_MSG_SYS_CALL_FAILURE, + "could not start flush thread"); + /* TBD: treat this as a fatal error? */ + } + + /* + * Calling glfs_new changes old->ctx, even if THIS still points + * to global_xlator. That causes problems later in the main + * thread, when gf_log_dump_graph tries to use the FILE after + * we've mucked with it and gets a segfault in __fprintf_chk. + * We can avoid all that by undoing the damage before we + * continue. + */ + global_xlator.ctx = oldctx; + + return 0; err: - jbr_deallocate_priv(priv); - return -1; + jbr_deallocate_priv(priv); + return -1; } - void -jbr_fini (xlator_t *this) +jbr_fini(xlator_t *this) { - jbr_deallocate_priv(this->private); + jbr_deallocate_priv(this->private); } class_methods_t class_methods = { - .init = jbr_init, - .fini = jbr_fini, - .reconfigure = jbr_reconfigure, - .notify = jbr_notify, + .init = jbr_init, + .fini = jbr_fini, + .reconfigure = jbr_reconfigure, + .notify = jbr_notify, }; struct volume_options options[] = { - { .key = {"leader"}, - .type = GF_OPTION_TYPE_BOOL, - .default_value = "false", - .description = "Start in the leader role. This is only for " - "bootstrapping the code, and should go away when we " - "have real leader election." - }, - { .key = {"vol-name"}, - .type = GF_OPTION_TYPE_STR, - .description = "volume name" - }, - { .key = {"my-name"}, - .type = GF_OPTION_TYPE_STR, - .description = "brick name in form of host:/path" - }, - { .key = {"etcd-servers"}, - .type = GF_OPTION_TYPE_STR, - .description = "list of comma separated etc servers" - }, - { .key = {"subvol-uuid"}, - .type = GF_OPTION_TYPE_STR, - .description = "UUID for this JBR (sub)volume" - }, - { .key = {"quorum-percent"}, - .type = GF_OPTION_TYPE_PERCENT, - .default_value = "50.0", - .description = "percentage of rep_count-1 that must be up" - }, - { .key = {NULL} }, + {.key = {"leader"}, + .type = GF_OPTION_TYPE_BOOL, + .default_value = "false", + .description = "Start in the leader role. This is only for " + "bootstrapping the code, and should go away when we " + "have real leader election."}, + {.key = {"vol-name"}, + .type = GF_OPTION_TYPE_STR, + .description = "volume name"}, + {.key = {"my-name"}, + .type = GF_OPTION_TYPE_STR, + .description = "brick name in form of host:/path"}, + {.key = {"etcd-servers"}, + .type = GF_OPTION_TYPE_STR, + .description = "list of comma separated etc servers"}, + {.key = {"subvol-uuid"}, + .type = GF_OPTION_TYPE_STR, + .description = "UUID for this JBR (sub)volume"}, + {.key = {"quorum-percent"}, + .type = GF_OPTION_TYPE_PERCENT, + .default_value = "50.0", + .description = "percentage of rep_count-1 that must be up"}, + {.key = {NULL}}, }; diff --git a/xlators/experimental/posix2/ds/src/posix2-ds-main.c b/xlators/experimental/posix2/ds/src/posix2-ds-main.c index 675c4d7c9da..4e399a98ed4 100644 --- a/xlators/experimental/posix2/ds/src/posix2-ds-main.c +++ b/xlators/experimental/posix2/ds/src/posix2-ds-main.c @@ -20,34 +20,31 @@ #include "statedump.h" int32_t -posix2_ds_init (xlator_t *this) +posix2_ds_init(xlator_t *this) { - if (this->children) { - gf_log (this->name, GF_LOG_ERROR, - "This (%s) is a leaf xlator, but found children", - this->name); - return -1; - } - - return 0; + if (this->children) { + gf_log(this->name, GF_LOG_ERROR, + "This (%s) is a leaf xlator, but found children", this->name); + return -1; + } + + return 0; } void -posix2_ds_fini (xlator_t *this) +posix2_ds_fini(xlator_t *this) { - return; + return; } class_methods_t class_methods = { - .init = posix2_ds_init, - .fini = posix2_ds_fini, + .init = posix2_ds_init, + .fini = posix2_ds_fini, }; -struct xlator_fops fops = { -}; +struct xlator_fops fops = {}; -struct xlator_cbks cbks = { -}; +struct xlator_cbks cbks = {}; /* struct xlator_dumpops dumpops = { @@ -55,5 +52,5 @@ struct xlator_dumpops dumpops = { */ struct volume_options options[] = { - { .key = {NULL} }, + {.key = {NULL}}, }; diff --git a/xlators/experimental/posix2/mds/src/posix2-mds-main.c b/xlators/experimental/posix2/mds/src/posix2-mds-main.c index 71ff4e0089c..58da05f2091 100644 --- a/xlators/experimental/posix2/mds/src/posix2-mds-main.c +++ b/xlators/experimental/posix2/mds/src/posix2-mds-main.c @@ -20,34 +20,31 @@ #include "statedump.h" int32_t -posix2_mds_init (xlator_t *this) +posix2_mds_init(xlator_t *this) { - if (this->children) { - gf_log (this->name, GF_LOG_ERROR, - "This (%s) is a leaf xlator, but found children", - this->name); - return -1; - } - - return 0; + if (this->children) { + gf_log(this->name, GF_LOG_ERROR, + "This (%s) is a leaf xlator, but found children", this->name); + return -1; + } + + return 0; } void -posix2_mds_fini (xlator_t *this) +posix2_mds_fini(xlator_t *this) { - return; + return; } class_methods_t class_methods = { - .init = posix2_mds_init, - .fini = posix2_mds_fini, + .init = posix2_mds_init, + .fini = posix2_mds_fini, }; -struct xlator_fops fops = { -}; +struct xlator_fops fops = {}; -struct xlator_cbks cbks = { -}; +struct xlator_cbks cbks = {}; /* struct xlator_dumpops dumpops = { @@ -55,5 +52,5 @@ struct xlator_dumpops dumpops = { */ struct volume_options options[] = { - { .key = {NULL} }, + {.key = {NULL}}, }; |