diff options
Diffstat (limited to 'xlators')
-rw-r--r-- | xlators/experimental/fdl/src/Makefile.am | 2 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/dump-tmpl.c | 35 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/fdl-tmpl.c | 33 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/fdl.h | 30 | ||||
-rwxr-xr-x | xlators/experimental/fdl/src/gen_fdl.py | 35 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/jnl-types.h | 14 | ||||
-rw-r--r-- | xlators/experimental/fdl/src/recon-tmpl.c | 2 | ||||
-rw-r--r-- | xlators/experimental/jbr-server/src/Makefile.am | 2 | ||||
-rw-r--r-- | xlators/experimental/jbr-server/src/all-templates.c | 124 | ||||
-rwxr-xr-x | xlators/experimental/jbr-server/src/gen-fops.py | 2 | ||||
-rw-r--r-- | xlators/experimental/jbr-server/src/jbr-internal.h | 1 | ||||
-rw-r--r-- | xlators/experimental/jbr-server/src/jbr.c | 81 |
12 files changed, 327 insertions, 34 deletions
diff --git a/xlators/experimental/fdl/src/Makefile.am b/xlators/experimental/fdl/src/Makefile.am index 9ec9d4f06ff..3f1eccc0ed6 100644 --- a/xlators/experimental/fdl/src/Makefile.am +++ b/xlators/experimental/fdl/src/Makefile.am @@ -1,7 +1,7 @@ xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental xlator_LTLIBRARIES = fdl.la -noinst_HEADERS = jnl-types.h +noinst_HEADERS = fdl.h nodist_fdl_la_SOURCES = fdl.c fdl_la_LDFLAGS = -module -avoid-version diff --git a/xlators/experimental/fdl/src/dump-tmpl.c b/xlators/experimental/fdl/src/dump-tmpl.c index cac1071a9c1..32b0fef6af3 100644 --- a/xlators/experimental/fdl/src/dump-tmpl.c +++ b/xlators/experimental/fdl/src/dump-tmpl.c @@ -2,17 +2,42 @@ #ifndef _CONFIG_H #define _CONFIG_H #include "config.h" +#include <ctype.h> #endif #include "glfs.h" #include "iatt.h" #include "xlator.h" -#include "jnl-types.h" +#include "fdl.h" + +/* + * Returns 0 if the string is ASCII printable * + * and -1 if it's not ASCII printable * + */ +int str_isprint (char *s) +{ + int ret = -1; + + if (!s) + goto out; + + while (s[0] != '\0') { + if (!isprint(s[0])) + goto out; + else + s++; + } + + ret = 0; +out: + return ret; +} #pragma fragment DICT { int key_len, data_len; char *key_ptr; + char *key_val; printf ("@ARGNAME@ = dict {\n"); for (;;) { key_len = *((int *)new_meta); @@ -23,8 +48,14 @@ key_ptr = new_meta; new_meta += key_len; data_len = *((int *)new_meta); + key_val = new_meta + sizeof(int); new_meta += sizeof(int) + data_len; - printf (" %s = <%d bytes>\n", key_ptr, data_len); + if (str_isprint(key_val)) + printf (" %s = <%d bytes>\n", + key_ptr, data_len); + else + printf (" %s = %s <%d bytes>\n", + key_ptr, key_val, data_len); } printf ("}\n"); } diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c b/xlators/experimental/fdl/src/fdl-tmpl.c index fdcfafbac31..a92f6676ce1 100644 --- a/xlators/experimental/fdl/src/fdl-tmpl.c +++ b/xlators/experimental/fdl/src/fdl-tmpl.c @@ -21,7 +21,7 @@ #include "defaults.h" #include "syscall.h" #include "xlator.h" -#include "jnl-types.h" +#include "fdl.h" /* TBD: make tunable */ #define META_FILE_SIZE (1 << 20) @@ -55,6 +55,9 @@ typedef struct { int first_term; } fdl_private_t; +int32_t +fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata); + void fdl_enqueue (xlator_t *this, call_stub_t *stub) { @@ -341,8 +344,21 @@ err_unlocked: } int32_t +fdl_ipc_continue (call_frame_t *frame, xlator_t *this, + int32_t op, dict_t *xdata) +{ + /* + * Nothing to be done here. Just Unwind. * + */ + STACK_UNWIND_STRICT (ipc, frame, 0, 0, xdata); + + return 0; +} + +int32_t fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) { + call_stub_t *stub; fdl_private_t *priv = this->private; dict_t *tdict; int32_t gt_err = EIO; @@ -381,6 +397,20 @@ fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) } break; + case FDL_IPC_JBR_SERVER_ROLLBACK: + /* + * In case of a rollback from jbr-server, dump * + * the term and index number in the journal, * + * which will later be used to rollback the fop * + */ + stub = fop_ipc_stub (frame, fdl_ipc_continue, + op, xdata); + fdl_len_ipc (stub); + stub->serialize = fdl_serialize_ipc; + fdl_enqueue (this, stub); + + break; + default: STACK_WIND_TAIL (frame, FIRST_CHILD(this), @@ -423,7 +453,6 @@ fdl_init (xlator_t *this) * bit cleaner than messing with the generation to add a hand-written * exception. */ - this->fops->ipc = fdl_ipc; if (pthread_create(&priv->worker,NULL,fdl_worker,this) != 0) { gf_log (this->name, GF_LOG_ERROR, diff --git a/xlators/experimental/fdl/src/fdl.h b/xlators/experimental/fdl/src/fdl.h new file mode 100644 index 00000000000..32e38c93f2d --- /dev/null +++ b/xlators/experimental/fdl/src/fdl.h @@ -0,0 +1,30 @@ +/* + Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _FDL_H_ +#define _FDL_H_ + +#define NEW_REQUEST (uint8_t)'N' + +typedef struct { + uint8_t event_type; /* e.g. NEW_REQUEST */ + uint8_t fop_type; /* e.g. GF_FOP_SETATTR */ + uint16_t request_id; + uint32_t ext_length; +} event_header_t; + +enum { + FDL_IPC_BASE = 0xfeedbee5, /* ... and they make honey */ + FDL_IPC_CHANGE_TERM, + FDL_IPC_GET_TERMS, + FDL_IPC_JBR_SERVER_ROLLBACK +}; + +#endif /* _FDL_H_ */ diff --git a/xlators/experimental/fdl/src/gen_fdl.py b/xlators/experimental/fdl/src/gen_fdl.py index 7f6b1aaaeaa..d59f12a4841 100755 --- a/xlators/experimental/fdl/src/gen_fdl.py +++ b/xlators/experimental/fdl/src/gen_fdl.py @@ -300,19 +300,44 @@ def get_special_subs (args): ser_code += ser_tmpl.replace("@SRC@",src) return len_code, ser_code +# Mention those fops in the selective_generate table, for which +# only a few common functions will be generated, and mention those +# functions. Rest of the functions can be customized +selective_generate = { + "ipc": "len,serialize", + } + def gen_fdl (): entrypoints = [] for name, value in ops.iteritems(): if "journal" not in [ x[0] for x in value ]: continue + + # generate all functions for all the fops + # except for the ones in selective_generate for which + # generate only the functions mentioned in the + # selective_generate table + gen_funcs = "len,serialize,callback,continue,fop" + if name in selective_generate: + gen_funcs = selective_generate[name].split(",") + len_code, ser_code = get_special_subs(value) fop_subs[name]["@LEN_CODE@"] = len_code[:-1] fop_subs[name]["@SER_CODE@"] = ser_code[:-1] - print generate(LEN_TEMPLATE,name,fop_subs) - print generate(SER_TEMPLATE,name,fop_subs) - print generate(CBK_TEMPLATE,name,cbk_subs) - print generate(CONTINUE_TEMPLATE,name,fop_subs) - print generate(FOP_TEMPLATE,name,fop_subs) + if 'len' in gen_funcs: + print generate(LEN_TEMPLATE,name,fop_subs) + if 'serialize' in gen_funcs: + print generate(SER_TEMPLATE,name,fop_subs) + if name == 'writev': + print "#define DESTAGE_ASYNC" + if 'callback' in gen_funcs: + print generate(CBK_TEMPLATE,name,cbk_subs) + if 'continue' in gen_funcs: + print generate(CONTINUE_TEMPLATE,name,fop_subs) + if 'fop' in gen_funcs: + print generate(FOP_TEMPLATE,name,fop_subs) + if name == 'writev': + print "#undef DESTAGE_ASYNC" entrypoints.append(name) print "struct xlator_fops fops = {" for ep in entrypoints: diff --git a/xlators/experimental/fdl/src/jnl-types.h b/xlators/experimental/fdl/src/jnl-types.h deleted file mode 100644 index 8cb39d01a25..00000000000 --- a/xlators/experimental/fdl/src/jnl-types.h +++ /dev/null @@ -1,14 +0,0 @@ -#define NEW_REQUEST (uint8_t)'N' - -typedef struct { - uint8_t event_type; /* e.g. NEW_REQUEST */ - uint8_t fop_type; /* e.g. GF_FOP_SETATTR */ - uint16_t request_id; - uint32_t ext_length; -} event_header_t; - -enum { - FDL_IPC_BASE = 0xfeedbee5, /* ... and they make honey */ - FDL_IPC_CHANGE_TERM, - FDL_IPC_GET_TERMS, -}; diff --git a/xlators/experimental/fdl/src/recon-tmpl.c b/xlators/experimental/fdl/src/recon-tmpl.c index 523bda39418..ab5edb1a378 100644 --- a/xlators/experimental/fdl/src/recon-tmpl.c +++ b/xlators/experimental/fdl/src/recon-tmpl.c @@ -11,7 +11,7 @@ #include "xlator.h" #include "glfs-internal.h" -#include "jnl-types.h" +#include "fdl.h" #define GFAPI_SUCCESS 0 diff --git a/xlators/experimental/jbr-server/src/Makefile.am b/xlators/experimental/jbr-server/src/Makefile.am index 5d6209d709a..5dc0273b2f7 100644 --- a/xlators/experimental/jbr-server/src/Makefile.am +++ b/xlators/experimental/jbr-server/src/Makefile.am @@ -10,11 +10,13 @@ jbr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \ noinst_HEADERS = jbr-internal.h \ $(top_srcdir)/xlators/lib/src/libxlator.h \ + $(top_srcdir)/xlators/experimental/fdl/src/fdl.h \ $(top_srcdir)/glusterfsd/src/glusterfsd.h AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \ -I$(top_srcdir)/xlators/lib/src -I$(top_srcdir)/rpc/rpc-lib/src \ + -I$(top_srcdir)/xlators/experimental/fdl/src/ \ -DSBIN_DIR=\"$(sbindir)\" -I$(top_srcdir)/api/src \ -DJBR_SCRIPT_PREFIX=\"$(jbrdir)\" \ -I$(top_srcdir)/xlators/experimental/jbr-client/src/ diff --git a/xlators/experimental/jbr-server/src/all-templates.c b/xlators/experimental/jbr-server/src/all-templates.c index 0fb96ac0436..9720442e63f 100644 --- a/xlators/experimental/jbr-server/src/all-templates.c +++ b/xlators/experimental/jbr-server/src/all-templates.c @@ -105,6 +105,7 @@ jbr_@NAME@ (call_frame_t *frame, xlator_t *this, if (ret) goto err; + local->xdata = dict_ref(xdata); local->stub = fop_@NAME@_stub (frame, jbr_@NAME@_continue, @SHORT_ARGS@); if (!local->stub) { @@ -248,7 +249,6 @@ jbr_@NAME@_dispatch (call_frame_t *frame, xlator_t *this, */ local->call_count = priv->n_children - 1; - local->successful_acks = 0; for (trav = this->children->next; trav; trav = trav->next) { STACK_WIND (frame, jbr_@NAME@_fan_in, trav->xlator, trav->xlator->fops->@NAME@, @@ -307,9 +307,12 @@ int32_t jbr_@NAME@_continue (call_frame_t *frame, xlator_t *this, @LONG_ARGS@) { - gf_boolean_t result = _gf_false; - jbr_local_t *local = NULL; - jbr_private_t *priv = NULL; + int32_t ret = -1; + gf_boolean_t result = _gf_false; + jbr_local_t *local = NULL; + jbr_local_t *new_local = NULL; + jbr_private_t *priv = NULL; + int32_t op_errno = 0; GF_VALIDATE_OR_GOTO ("jbr", this, out); GF_VALIDATE_OR_GOTO (this->name, frame, out); @@ -330,6 +333,58 @@ jbr_@NAME@_continue (call_frame_t *frame, xlator_t *this, J_MSG_QUORUM_NOT_MET, "Didn't receive enough acks " "to meet quorum. Failing the operation without trying " "it on the leader."); + +#if defined(JBR_CG_QUEUE) + /* + * In case of a fop failure, before unwinding need to * + * remove it from queue * + */ + ret = jbr_remove_from_queue (frame, this); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + J_MSG_GENERIC, "Failed to remove from queue."); + } +#endif + + /* + * In this case, the quorum is not met on the followers * + * So the operation will not be performed on the leader * + * and a rollback will be sent via GF_FOP_IPC to all the * + * followers, where this particular fop's term and index * + * numbers will be journaled, and later used to rollback * + */ + call_frame_t *new_frame; + + new_frame = copy_frame (frame); + + if (new_frame) { + new_local = mem_get0(this->local_pool); + if (new_local) { + INIT_LIST_HEAD(&new_local->qlinks); + ret = dict_set_int32 (local->xdata, + "rollback-fop", + GF_FOP_@UPNAME@); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + J_MSG_DICT_FLR, + "failed to set rollback-fop"); + } else { + new_local->xdata = dict_ref(local->xdata); + new_frame->local = new_local; + jbr_ipc_call_dispatch (new_frame, + this, &op_errno, + FDL_IPC_JBR_SERVER_ROLLBACK, + new_local->xdata); + } + } else { + gf_log (this->name, GF_LOG_WARNING, + "Could not create local for new_frame"); + } + } else { + gf_log (this->name, GF_LOG_WARNING, + "Could not send rollback ipc"); + } + STACK_UNWIND_STRICT (@NAME@, frame, -1, EROFS, @ERROR_ARGS@); } else { @@ -348,12 +403,11 @@ jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret, int32_t op_errno, @LONG_ARGS@) { -#if defined(JBR_CG_QUEUE) int32_t ret = -1; -#endif gf_boolean_t result = _gf_false; jbr_private_t *priv = NULL; jbr_local_t *local = NULL; + jbr_local_t *new_local = NULL; GF_VALIDATE_OR_GOTO ("jbr", this, err); GF_VALIDATE_OR_GOTO (this->name, frame, err); @@ -404,6 +458,59 @@ jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this, gf_msg (this->name, GF_LOG_ERROR, EROFS, J_MSG_QUORUM_NOT_MET, "Quorum is not met. " "The operation has failed."); + /* + * In this case, the quorum is not met after the * + * operation is performed on the leader. Hence a * + * rollback will be sent via GF_FOP_IPC to the leader * + * where this particular fop's term and index numbers * + * will be journaled, and later used to rollback. * + * The same will be done on all the followers * + */ + call_frame_t *new_frame; + + new_frame = copy_frame (frame); + if (new_frame) { + new_local = mem_get0(this->local_pool); + if (new_local) { + INIT_LIST_HEAD(&new_local->qlinks); + gf_msg (this->name, GF_LOG_ERROR, 0, + J_MSG_DICT_FLR, "op = %d", + new_frame->op); + ret = dict_set_int32 (local->xdata, + "rollback-fop", + GF_FOP_@UPNAME@); + if (ret) { + gf_msg (this->name, + GF_LOG_ERROR, 0, + J_MSG_DICT_FLR, + "failed to set " + "rollback-fop"); + } else { + new_local->xdata = dict_ref (local->xdata); + new_frame->local = new_local; + /* + * Calling STACK_WIND instead * + * of jbr_ipc as it will not * + * unwind to the previous * + * translators like it will * + * in case of jbr_ipc. * + */ + STACK_WIND (new_frame, + jbr_ipc_complete, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, + FDL_IPC_JBR_SERVER_ROLLBACK, + new_local->xdata); + } + } else { + gf_log (this->name, GF_LOG_WARNING, + "Could not create local " + "for new_frame"); + } + } else { + gf_log (this->name, GF_LOG_WARNING, + "Could not send rollback ipc"); + } } else { #if defined(JBR_CG_NEED_FD) op_ret = local->successful_op_ret; @@ -416,6 +523,11 @@ jbr_@NAME@_complete (call_frame_t *frame, void *cookie, xlator_t *this, } } + /* + * Unrefing the reference taken in jbr_@NAME@ () * + */ + dict_unref (local->xdata); + STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@); diff --git a/xlators/experimental/jbr-server/src/gen-fops.py b/xlators/experimental/jbr-server/src/gen-fops.py index 8a2b47c5345..c4a5556a8fa 100755 --- a/xlators/experimental/jbr-server/src/gen-fops.py +++ b/xlators/experimental/jbr-server/src/gen-fops.py @@ -101,6 +101,7 @@ fop_table = { "unlink": "write", "writev": "write,fsync,queue", "xattrop": "write", + "ipc": "write", } # Mention those fops in the selective_generate table, for which @@ -108,6 +109,7 @@ fop_table = { # functions. Rest of the functions can be customized selective_generate = { "lk": "fop,dispatch,call_dispatch", + "ipc": "dispatch,call_dispatch", } # Stolen from gen_fdl.py diff --git a/xlators/experimental/jbr-server/src/jbr-internal.h b/xlators/experimental/jbr-server/src/jbr-internal.h index ab1dfc16de2..46a29910d1f 100644 --- a/xlators/experimental/jbr-server/src/jbr-internal.h +++ b/xlators/experimental/jbr-server/src/jbr-internal.h @@ -86,6 +86,7 @@ typedef struct { uint32_t successful_op_ret; fd_t *fd; struct list_head qlinks; + dict_t *xdata; } jbr_local_t; /* diff --git a/xlators/experimental/jbr-server/src/jbr.c b/xlators/experimental/jbr-server/src/jbr.c index 926b5b3c742..c3f0344df00 100644 --- a/xlators/experimental/jbr-server/src/jbr.c +++ b/xlators/experimental/jbr-server/src/jbr.c @@ -24,6 +24,7 @@ #include "syncop.h" #include "syscall.h" #include "compat-errno.h" +#include "fdl.h" #include "jbr-internal.h" #include "jbr-messages.h" @@ -52,6 +53,15 @@ jbr_lk_dispatch (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata); +int32_t +jbr_ipc_call_dispatch (call_frame_t *frame, xlator_t *this, int *op_errno, + int32_t op, dict_t *xdata); + +int32_t +jbr_ipc_complete (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + dict_t *xdata); + /* Used to check the quorum of acks received after the fop * confirming the status of the fop on all the brick processes * for this particular subvolume @@ -277,6 +287,7 @@ jbr_leader_checks_and_init (call_frame_t *frame, xlator_t *this, int *op_errno, local->fd = NULL; INIT_LIST_HEAD(&local->qlinks); + local->successful_acks = 0; frame->local = local; ret = 0; @@ -718,8 +729,6 @@ out: return ret; } -#pragma generate - uint8_t jbr_count_up_kids (jbr_private_t *priv) { @@ -1285,6 +1294,65 @@ err: STACK_UNWIND_STRICT (ipc, frame, -1, op_errno, NULL); } +int32_t +jbr_ipc_fan_in (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, dict_t *xdata) +{ + jbr_local_t *local = NULL; + int32_t ret = -1; + uint8_t call_count; + + GF_VALIDATE_OR_GOTO ("jbr", this, out); + GF_VALIDATE_OR_GOTO (this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO (this->name, local, out); + + gf_msg_trace (this->name, 0, "op_ret = %d, op_errno = %d\n", + op_ret, op_errno); + + LOCK(&frame->lock); + call_count = --(local->call_count); + UNLOCK(&frame->lock); + + if (call_count == 0) { +#if defined(JBR_CG_QUEUE) + ret = jbr_remove_from_queue (frame, this); + if (ret) { + gf_msg (this->name, GF_LOG_ERROR, 0, + J_MSG_GENERIC, "Failed to remove from queue."); + } +#endif + /* + * Unrefing the reference taken in continue() or complete() * + */ + dict_unref (local->xdata); + STACK_DESTROY (frame->root); + } + + ret = 0; +out: + return ret; +} + +int32_t +jbr_ipc_complete (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + dict_t *xdata) +{ + jbr_local_t *local = NULL; + + GF_VALIDATE_OR_GOTO ("jbr", this, out); + GF_VALIDATE_OR_GOTO (this->name, frame, out); + local = frame->local; + GF_VALIDATE_OR_GOTO (this->name, local, out); + + jbr_ipc_call_dispatch (frame, + this, &op_errno, + FDL_IPC_JBR_SERVER_ROLLBACK, + local->xdata); +out: + return 0; +} int32_t jbr_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) @@ -1299,6 +1367,13 @@ jbr_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) case JBR_SERVER_NEXT_ENTRY: jbr_next_entry(frame, this); break; + case FDL_IPC_JBR_SERVER_ROLLBACK: + /* + * Just send the fop down to fdl. Need not * + * dispatch it to other bricks in the sub- * + * volume, as it will be done where the op * + * has failed. * + */ default: STACK_WIND_TAIL (frame, FIRST_CHILD(this), @@ -1309,6 +1384,7 @@ jbr_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) return 0; } +#pragma generate int32_t jbr_forget (xlator_t *this, inode_t *inode) @@ -1556,7 +1632,6 @@ jbr_init (xlator_t *this) */ this->fops->getxattr = jbr_getxattr_special; this->fops->fsync = jbr_fsync; - this->fops->ipc = jbr_ipc; local = this->children; if (!local) { |