diff options
30 files changed, 2269 insertions, 110 deletions
diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases index 40b6ed21192..7181dd2f6e8 100644 --- a/api/src/gfapi.aliases +++ b/api/src/gfapi.aliases @@ -140,3 +140,4 @@ _priv_glfs_resolve _glfs_resolve$GFAPI_PRIVATE_3.7.0  _priv_glfs_process_upcall_event _glfs_process_upcall_event$GFAPI_PRIVATE_3.7.0  _pub_glfs_h_lookupat _glfs_h_lookupat$GFAPI_3.7.4 +_pub_glfs_ipc_xd _glfs_ipc_xd@GFAPI_4.0.4 diff --git a/api/src/gfapi.map b/api/src/gfapi.map index d42ae2b97af..b35984a088c 100644 --- a/api/src/gfapi.map +++ b/api/src/gfapi.map @@ -167,3 +167,8 @@ GFAPI_3.7.4 {  	global:  		glfs_h_lookupat;  } GFAPI_PRIVATE_3.7.0; + +GFAPI_4.0.0 { +	global: +		glfs_ipc_xd; +} GFAPI_3.7.4; diff --git a/api/src/glfs.c b/api/src/glfs.c index b151936a6e8..037b579225f 100644 --- a/api/src/glfs.c +++ b/api/src/glfs.c @@ -1233,7 +1233,7 @@ invalid_fs:  GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volfile, 3.6.0);  int -pub_glfs_ipc (struct glfs *fs, int opcode) +pub_glfs_ipc_xd (struct glfs *fs, int opcode, dict_t *xd_in, dict_t **xd_out)  {  	xlator_t        *subvol = NULL;          int             ret = -1; @@ -1248,7 +1248,7 @@ pub_glfs_ipc (struct glfs *fs, int opcode)  		goto out;  	} -	ret = syncop_ipc (subvol, opcode, NULL, NULL); +	ret = syncop_ipc (subvol, opcode, xd_in, xd_out);          DECODE_SYNCOP_ERR (ret);  out: @@ -1259,4 +1259,12 @@ invalid_fs:          return ret;  } +GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ipc_xd, 4.0.0); + +int +pub_glfs_ipc (struct glfs *fs, int opcode) +{ +        return pub_glfs_ipc_xd (fs, opcode, NULL, NULL); +} +  GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ipc, 3.7.0); diff --git a/configure.ac b/configure.ac index 5d2fe342b74..29e36648aac 100644 --- a/configure.ac +++ b/configure.ac @@ -117,6 +117,8 @@ AC_CONFIG_FILES([Makefile                  xlators/features/Makefile                  xlators/features/arbiter/Makefile                  xlators/features/arbiter/src/Makefile +                xlators/experimental/fdl/Makefile +                xlators/experimental/fdl/src/Makefile                  xlators/features/changelog/Makefile                  xlators/features/changelog/src/Makefile                  xlators/features/changelog/lib/Makefile diff --git a/glusterfs.spec.in b/glusterfs.spec.in index 53a65ae4fed..9f04bc37e10 100644 --- a/glusterfs.spec.in +++ b/glusterfs.spec.in @@ -952,6 +952,7 @@ fi  %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/barrier.so  %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/cdc.so  %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changelog.so +%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental/fdl.so  %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/gfid-access.so  %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/read-only.so  %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/shard.so @@ -1217,6 +1218,8 @@ fi  /usr/lib/firewalld/services/glusterfs.xml  %endif +%{_sbindir}/gf_logdump +%{_sbindir}/gf_recon  %changelog  * Sat Jan 16 2016 Niels de Vos <ndevos@redhat.com> diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c index bbaca1e7277..c980e7bc640 100644 --- a/glusterfsd/src/glusterfsd.c +++ b/glusterfsd/src/glusterfsd.c @@ -1199,6 +1199,26 @@ parse_opts (int key, char *arg, struct argp_state *state)          return 0;  } +gf_boolean_t +should_call_fini (glusterfs_ctx_t *ctx, xlator_t *trav) +{ +        /* There's nothing to call, so the other checks don't matter. */ +        if (!trav->fini) { +                return _gf_false; +        } + +        /* This preserves previous behavior in glusterd. */ +        if (ctx->process_mode == GF_GLUSTERD_PROCESS) { +                return _gf_true; +        } + +        /* This is the only one known to be safe in glusterfsd. */ +        if (!strcmp(trav->type,"experimental/fdl")) { +                return _gf_true; +        } + +        return _gf_false; +}  void  cleanup_and_exit (int signum) @@ -1271,20 +1291,17 @@ cleanup_and_exit (int signum)          /*call fini for glusterd xlator */          /* TODO : Invoke fini for rest of the xlators */ -        if (ctx->process_mode == GF_GLUSTERD_PROCESS) { - -                trav = NULL; -                if (ctx->active) -                        trav = ctx->active->top; -                while (trav) { -                        if (trav->fini) { -                                THIS = trav; -                                trav->fini (trav); -                        } -                        trav = trav->next; +        trav = NULL; +        if (ctx->active) +                trav = ctx->active->top; +        while (trav) { +                if (should_call_fini(ctx,trav)) { +                        THIS = trav; +                        trav->fini (trav);                  } - +                trav = trav->next;          } +          exit(0);  } diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am index 46e2e021134..c6d93c925ac 100644 --- a/libglusterfs/src/Makefile.am +++ b/libglusterfs/src/Makefile.am @@ -83,7 +83,7 @@ y.tab.h: graph.y  defaults.c: defaults-tmpl.c generator.py gen-defaults.py  	$(PYTHON) $(srcdir)/gen-defaults.py $(srcdir)/defaults-tmpl.c > $@ -CLEANFILES = graph.lex.c y.tab.c y.tab.h defaults.c +CLEANFILES = $(nodist_libglusterfs_la_SOURCES)  if UNITTEST  CLEANFILES += *.gcda *.gcno *_xunit.xml diff --git a/libglusterfs/src/call-stub.h b/libglusterfs/src/call-stub.h index 01621368ee9..82a49c1d7b9 100644 --- a/libglusterfs/src/call-stub.h +++ b/libglusterfs/src/call-stub.h @@ -17,12 +17,15 @@  #include "stack.h"  #include "list.h" -typedef struct { +typedef struct _call_stub {  	struct list_head list;  	char wind;  	call_frame_t *frame;  	glusterfs_fop_t fop;          struct mem_pool *stub_mem_pool; /* pointer to stub mempool in ctx_t */ +        uint32_t jnl_meta_len; +        uint32_t jnl_data_len; +        void (*serialize) (struct _call_stub *, char *, char *);  	union {  		fop_lookup_t lookup; diff --git a/libglusterfs/src/generator.py b/libglusterfs/src/generator.py index 5e8f6c29cd4..8be68337baa 100644..100755 --- a/libglusterfs/src/generator.py +++ b/libglusterfs/src/generator.py @@ -2,6 +2,65 @@  import string +# ops format: 'fop-arg' name type stub-field [nosync] +#             'cbk-arg' name type +#             'extra'   name type arg-str +#             'journal' fop-type +#             'link'    inode iatt +# +# 'role' indicates the significance of this line to the code generator (sort of +# our own type). +# +# For fop-arg, we first need to know the name and the type of the arg so that +# we can generate SHORT_ARGS (for function calls) and LONG_ARGS (for +# declarations).  For code that uses stubs, we also need to know the name of +# the stub field, which might be different than the argument itself.  Lastly, +# for code that uses syncops, we need to know whether whoever wrote the syncop +# for this fop "forgot" to include this argument.  (Editorial: this kind of +# creeping inconsistency is why we should have used code generation for stubs +# and syncops as well as defaults all along.)  To address this need, we use the +# optional 'nosync' field for arguments (e.g. mkdir.umask) that we should skip +# in generated syncop code. +# +# 'cbk-arg' is like fop-arg but simpler and used for generating callbacks +# instead of fop functions. +# +# 'extra' is also like fop-arg, but it's another hack for syncops.  This time +# the problem is that some of what would normally be *callback* arguments are +# instead created in the caller and passed to the syncop.  We handle that by +# adding an entry at the appropriate place in the fop-arg list, with the name +# and type to generate a declaration and an argument string to generate the +# actual syncop call. +# +# The mere presence of a 'journal' item is sufficient for most of the journal +# code to recognize that it should do something.  However, reconciliation also +# needs to decide how reconciliation builds the arguments it needs to call down +# to the syncop layer, based on what's in the journal.  To do that, we divide +# ops into three types and store those types in the ops table.  In general, +# these three types work as follows. +# +#    For an fd-op, the GFID in the journal is used (in loc.gfid) field to +#    look up an inode, then an anonymous fd is found/created for that inode. +# +#    For an inode-op, the GFID in the journal is used the same way, but no fd +#    is needed. +# +#    For an entry-op, the *parent* GFID and name from the journal are used to +#    look up an inode (via loc.pargfid and par.name respectively). +# +# The only places this seems to fall down is for link and create.  In link, +# which is generally an entry-op, the source is looked up as though it's an +# inode-op.  In create, we have an fd argument but it's really a return +# argument so we get a fresh inode instead of looking one up.  Those two cases +# need to be handled as special cases in the reconciliation code. +# +# 'link' is (hopefully) the last of the journal/syncop hacks.  Much like +# 'extra', some values that are returned as callback arguments in the normal +# case are handled differently for syncops.  For syncops that create objects +# (e.g. mkdir) we need to link those objects into our inode table.  The 'inode' +# and 'iatt' fields here give us the information we need to construct the +# proper inode_link call(s). +  ops = {}  ops['fgetxattr'] = ( @@ -13,19 +72,21 @@ ops['fgetxattr'] = (  )  ops['fsetxattr'] = ( -	('fop-arg',	'fd',			'fd_t *'), -	('fop-arg',	'dict',			'dict_t *'), -	('fop-arg',	'flags',		'int32_t'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'fd',			'fd_t *',			'fd'), +	('fop-arg',	'dict',			'dict_t *',			'xattr'), +	('fop-arg',	'flags',		'int32_t',			'flags'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'fd-op'),  )  ops['setxattr'] = ( -	('fop-arg',	'loc',			'loc_t *'), -	('fop-arg',	'dict',			'dict_t *'), -	('fop-arg',	'flags',		'int32_t'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'loc',			'loc_t *',			'loc'), +	('fop-arg',	'dict',			'dict_t *',			'xattr'), +	('fop-arg',	'flags',		'int32_t',			'flags'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'inode-op'),  )  ops['statfs'] = ( @@ -73,16 +134,17 @@ ops['flush'] = (  )  ops['writev'] = ( -	('fop-arg',	'fd',			'fd_t *'), -	('fop-arg',	'vector',		'struct iovec *'), +	('fop-arg',	'fd',			'fd_t *',			'fd'), +	('fop-arg',	'vector',		'struct iovec *',	'vector'),  	('fop-arg',	'count',		'int32_t'), -	('fop-arg',	'off',			'off_t'), -	('fop-arg',	'flags',		'uint32_t'), +	('fop-arg',	'off',			'off_t',			'offset'), +	('fop-arg',	'flags',		'uint32_t',			'flags'),  	('fop-arg',	'iobref',		'struct iobref *'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'prebuf',		'struct iatt *'),  	('cbk-arg',	'postbuf',		'struct iatt *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'fd-op'),  )  ops['readv'] = ( @@ -108,96 +170,111 @@ ops['open'] = (  )  ops['create'] = ( -	('fop-arg',	'loc',			'loc_t *'), -	('fop-arg',	'flags',		'int32_t'), -	('fop-arg',	'mode',			'mode_t'), -	('fop-arg',	'umask',		'mode_t'), -	('fop-arg',	'fd',			'fd_t *'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'loc',			'loc_t *',			'loc'), +	('fop-arg',	'flags',		'int32_t',			'flags'), +	('fop-arg',	'mode',			'mode_t',			'mode'), +	('fop-arg',	'umask',		'mode_t',			'umask',	'nosync'), +	('fop-arg',	'fd',			'fd_t *',			'fd'), +	('extra',	'iatt',			'struct iatt',		'&iatt'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'fd',			'fd_t *'),  	('cbk-arg',	'inode',		'inode_t *'),  	('cbk-arg',	'buf',			'struct iatt *'),  	('cbk-arg',	'preparent',	'struct iatt *'),  	('cbk-arg',	'postparent',	'struct iatt *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'entry-op'), +	('link',	'loc.inode',	'&iatt'),  )  ops['link'] = ( -	('fop-arg',	'oldloc',		'loc_t *'), -	('fop-arg',	'newloc',		'loc_t *'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'oldloc',		'loc_t *',			'loc'), +	('fop-arg',	'newloc',		'loc_t *',			'loc2'), +	('extra',	'iatt',			'struct iatt',		'&iatt'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'inode',		'inode_t *'),  	('cbk-arg',	'buf',			'struct iatt *'),  	('cbk-arg',	'preparent',	'struct iatt *'),  	('cbk-arg',	'postparent',	'struct iatt *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'entry-op'),  )  ops['rename'] = ( -	('fop-arg',	'oldloc',		'loc_t *'), -	('fop-arg',	'newloc',		'loc_t *'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'oldloc',		'loc_t *',			'loc'), +	('fop-arg',	'newloc',		'loc_t *',			'loc2'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'buf',			'struct iatt *'),  	('cbk-arg',	'preoldparent',	'struct iatt *'),  	('cbk-arg',	'postoldparent','struct iatt *'),  	('cbk-arg',	'prenewparent',	'struct iatt *'),  	('cbk-arg',	'postnewparent','struct iatt *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'entry-op'),  )  ops['symlink'] = ( -	('fop-arg',	'linkpath',		'const char *'), -	('fop-arg',	'loc',			'loc_t *'), -	('fop-arg',	'umask',		'mode_t'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'linkpath',		'const char *',		'linkname'), +	('fop-arg',	'loc',			'loc_t *',			'loc'), +	('fop-arg',	'umask',		'mode_t',			'mode',		'nosync'), +	('extra',	'iatt',			'struct iatt',		'&iatt'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'inode',		'inode_t *'),  	('cbk-arg',	'buf',			'struct iatt *'),  	('cbk-arg',	'preparent',	'struct iatt *'),  	('cbk-arg',	'postparent',	'struct iatt *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'entry-op'),  )  ops['rmdir'] = ( -	('fop-arg',	'loc',			'loc_t *'), -	('fop-arg',	'flags',		'int32_t'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'loc',			'loc_t *',			'loc'), +	('fop-arg',	'flags',		'int32_t',			'flags'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'preparent',	'struct iatt *'),  	('cbk-arg',	'postparent',	'struct iatt *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'entry-op'),  )  ops['unlink'] = ( -	('fop-arg',	'loc',			'loc_t *'), -	('fop-arg',	'flags',		'int32_t'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'loc',			'loc_t *',			'loc'), +	('fop-arg',	'flags',		'int32_t',			'flags',	'nosync'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'preparent',	'struct iatt *'),  	('cbk-arg',	'postparent',	'struct iatt *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'entry-op'),  )  ops['mkdir'] = ( -	('fop-arg',	'loc',			'loc_t *'), -	('fop-arg',	'mode',			'mode_t'), -	('fop-arg',	'umask',		'mode_t'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'loc',			'loc_t *',			'loc'), +	('fop-arg',	'mode',			'mode_t',			'mode'), +	('fop-arg',	'umask',		'mode_t',			'umask',	'nosync'), +	('extra',	'iatt',			'struct iatt',		'&iatt'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'inode',		'inode_t *'),  	('cbk-arg',	'buf',			'struct iatt *'),  	('cbk-arg',	'preparent',	'struct iatt *'),  	('cbk-arg',	'postparent',	'struct iatt *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'entry-op'), +	('link',	'loc.inode',	'&iatt'),  )  ops['mknod'] = ( -	('fop-arg',	'loc',			'loc_t *'), -	('fop-arg',	'mode',			'mode_t'), -	('fop-arg',	'rdev',			'dev_t'), -	('fop-arg',	'umask',		'mode_t'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'loc',			'loc_t *',			'loc'), +	('fop-arg',	'mode',			'mode_t',			'mode'), +	('fop-arg',	'rdev',			'dev_t',			'rdev'), +	('fop-arg',	'umask',		'mode_t',			'umask',	'nosync'), +	('extra',	'iatt',			'struct iatt',		'&iatt'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'inode',		'inode_t *'),  	('cbk-arg',	'buf',			'struct iatt *'),  	('cbk-arg',	'preparent',	'struct iatt *'),  	('cbk-arg',	'postparent',	'struct iatt *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'entry-op'),  )  ops['readlink'] = ( @@ -217,12 +294,13 @@ ops['access'] = (  )  ops['ftruncate'] = ( -	('fop-arg',	'fd',			'fd_t *'), -	('fop-arg',	'offset',		'off_t'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'fd',			'fd_t *',				'fd'), +	('fop-arg',	'offset',		'off_t',				'offset'), +	('fop-arg',	'xdata',		'dict_t *',				'xdata'),  	('cbk-arg',	'prebuf',		'struct iatt *'),  	('cbk-arg',	'postbuf',		'struct iatt *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'fd-op'),  )  ops['getxattr'] = ( @@ -234,35 +312,39 @@ ops['getxattr'] = (  )  ops['xattrop'] = ( -	('fop-arg',	'loc',			'loc_t *'), -	('fop-arg',	'flags',		'gf_xattrop_flags_t'), -	('fop-arg',	'dict',			'dict_t *'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'loc',			'loc_t *',				'loc'), +	('fop-arg',	'flags',		'gf_xattrop_flags_t',	'optype'), +	('fop-arg',	'dict',			'dict_t *',				'xattr'), +	('fop-arg',	'xdata',		'dict_t *',				'xdata'),  	('cbk-arg',	'dict',			'dict_t *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'inode-op'),  )  ops['fxattrop'] = ( -	('fop-arg',	'fd',			'fd_t *'), -	('fop-arg',	'flags',		'gf_xattrop_flags_t'), -	('fop-arg',	'dict',			'dict_t *'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'fd',			'fd_t *',				'fd'), +	('fop-arg',	'flags',		'gf_xattrop_flags_t',	'optype'), +	('fop-arg',	'dict',			'dict_t *',				'xattr'), +	('fop-arg',	'xdata',		'dict_t *',				'xdata'),  	('cbk-arg',	'dict',			'dict_t *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'fd-op'),  )  ops['removexattr'] = ( -	('fop-arg',	'loc',			'loc_t *'), -	('fop-arg',	'name',			'const char *'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'loc',			'loc_t *',			'loc'), +	('fop-arg',	'name',			'const char *',		'name'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'inode-op'),  )  ops['fremovexattr'] = ( -	('fop-arg',	'fd',			'fd_t *'), -	('fop-arg',	'name',			'const char *'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'fd',			'fd_t *',			'fd'), +	('fop-arg',	'name',			'const char *',		'name'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'fd-op'),  )  ops['lk'] = ( @@ -341,22 +423,26 @@ ops['readdirp'] = (  )  ops['setattr'] = ( -	('fop-arg',	'loc',			'loc_t *'), -	('fop-arg',	'stbuf',		'struct iatt *'), -	('fop-arg',	'valid',		'int32_t'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'loc',			'loc_t *',			'loc'), +	('fop-arg',	'stbuf',		'struct iatt *',	'stat'), +	('fop-arg',	'valid',		'int32_t',			'valid'), +	('extra',	'preop',		'struct iatt',		'&preop'), +	('extra',	'postop',		'struct iatt',		'&postop'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'statpre',		'struct iatt *'),  	('cbk-arg',	'statpost',		'struct iatt *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'inode-op'),  )  ops['truncate'] = ( -	('fop-arg',	'loc',			'loc_t *'), -	('fop-arg',	'offset',		'off_t'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'loc',			'loc_t *',			'loc'), +	('fop-arg',	'offset',		'off_t',			'offset'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'prebuf',		'struct iatt *'),  	('cbk-arg',	'postbuf',		'struct iatt *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'inode-op'),  )  ops['stat'] = ( @@ -378,45 +464,51 @@ ops['lookup'] = (  )  ops['fsetattr'] = ( -	('fop-arg',	'fd',			'fd_t *'), -	('fop-arg',	'stbuf',		'struct iatt *'), -	('fop-arg',	'valid',		'int32_t'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'fd',			'fd_t *',			'fd'), +	('fop-arg',	'stbuf',		'struct iatt *',	'stat'), +	('fop-arg',	'valid',		'int32_t',			'valid'), +	('extra',	'preop',		'struct iatt',		'&preop'), +	('extra',	'postop',		'struct iatt',		'&postop'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'statpre',		'struct iatt *'),  	('cbk-arg',	'statpost',		'struct iatt *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'fd-op'),  )  ops['fallocate'] = ( -	('fop-arg',	'fd',			'fd_t *'), -	('fop-arg',	'keep_size',	'int32_t'), -	('fop-arg',	'offset',		'off_t'), -	('fop-arg',	'len',			'size_t'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'fd',			'fd_t *',			'fd'), +	('fop-arg',	'keep_size',	'int32_t',			'mode'), +	('fop-arg',	'offset',		'off_t',			'offset'), +	('fop-arg',	'len',			'size_t',			'size'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'pre',			'struct iatt *'),  	('cbk-arg',	'post',			'struct iatt *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'fd-op'),  )  ops['discard'] = ( -	('fop-arg',	'fd',			'fd_t *'), -	('fop-arg',	'offset',		'off_t'), -	('fop-arg',	'len',			'size_t'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'fd',			'fd_t *',			'fd'), +	('fop-arg',	'offset',		'off_t',			'offset'), +	('fop-arg',	'len',			'size_t',			'size'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'pre',			'struct iatt *'),  	('cbk-arg',	'post',			'struct iatt *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'fd-op'),  )  ops['zerofill'] = ( -	('fop-arg',	'fd',			'fd_t *'), -	('fop-arg',	'offset',		'off_t'), +	('fop-arg',	'fd',			'fd_t *',			'fd'), +	('fop-arg',	'offset',		'off_t',			'offset'),  	# As e.g. fallocate/discard (above) "len" should really be a size_t. -	('fop-arg',	'len',			'off_t'), -	('fop-arg',	'xdata',		'dict_t *'), +	('fop-arg',	'len',			'off_t',			'size'), +	('fop-arg',	'xdata',		'dict_t *',			'xdata'),  	('cbk-arg',	'pre',			'struct iatt *'),  	('cbk-arg',	'post',			'struct iatt *'),  	('cbk-arg',	'xdata',		'dict_t *'), +	('journal',	'fd-op'),  )  ops['ipc'] = ( @@ -460,6 +552,11 @@ def get_subs (names, types):  def generate (tmpl, name, subs):  	text = tmpl.replace("@NAME@",name) +	if name == "writev": +		# More spurious inconsistency. +		text = text.replace("@UPNAME@","WRITE") +	else: +		text = text.replace("@UPNAME@",name.upper())  	for old, new in subs[name].iteritems():  		text = text.replace(old,new)  	# TBD: reindent/reformat the result for maximum readability. diff --git a/libglusterfs/src/iobuf.c b/libglusterfs/src/iobuf.c index a4d36691cd0..d1eb0acaf5e 100644 --- a/libglusterfs/src/iobuf.c +++ b/libglusterfs/src/iobuf.c @@ -1014,7 +1014,7 @@ int  iobref_merge (struct iobref *to, struct iobref *from)  {          int           i = 0; -        int           ret = -1; +        int           ret = 0;          struct iobuf *iobuf = NULL;          GF_VALIDATE_OR_GOTO ("iobuf", to, out); diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c index eb0c1cf983a..d412b4d656d 100644 --- a/libglusterfs/src/syscall.c +++ b/libglusterfs/src/syscall.c @@ -588,7 +588,7 @@ sys_fallocate(int fd, int mode, off_t offset, off_t len)          return posix_fallocate(fd, offset, len);  #endif -#if defined(F_ALLOCATECONFIG) && defined(GF_DARWIN_HOST_OS) +#if defined(F_ALLOCATECONTIG) && defined(GF_DARWIN_HOST_OS)          /* C conversion from C++ implementation for OSX by Mozilla Foundation */          if (mode) {                  /* keep size not supported */ diff --git a/tests/features/fdl-overflow.t b/tests/features/fdl-overflow.t new file mode 100644 index 00000000000..d7633a7ca7d --- /dev/null +++ b/tests/features/fdl-overflow.t @@ -0,0 +1,74 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +log_base=$($CLI --print-logdir) +log_id=${B0}/${V0}-0 +log_id=${log_id:1}     # Remove initial slash +log_id=${log_id//\//-} # Replace remaining slashes with dashes + +_check_sizes () { +	local n=0 +	local sz +	local total_sz=0 + +	# We don't care about the sizes of the meta files.  That would be +	# embedding too much of the implementation into the test. +	n=$(ls ${log_base}/${log_id}-meta-*.jnl | wc -l) +	[ $n = 2 ] || return 1 + +	# We *do* care about the sizes of the data files, which should exactly +	# reflect the amount of data written via dd. +	n=0 +	while read sz name; do +                G_LOG "found journal ${name} size ${sz}MB" +		n=$((n+1)) +		total_sz=$((total_sz+sz)) +	done < <(du -sm ${log_base}/${log_id}-data-*.jnl) +	[ $n = 2 ] || return 1 +	# On our CentOS and NetBSD regression-test systems, but not on my Fedora +	# development system, each file ends up being slightly larger than its +	# data size because of metadata, and 'du' rounds that up to a full extra +	# megabyte.  We'll allow either result, because what we're really +	# looking for is a complete failure to roll over from one file to +	# another at the appropriate size. +	[ $total_sz = 20 -o $total_sz = $((n+20)) ] || return 1 + +	return 0 +} + +check_sizes () { +	set -x +	_check_sizes +	ret=$? +	set +x +	return ret +} + +if [ x"$OSTYPE" = x"NetBSD" ]; then +        CREAT_OFLAG="creat," +else +        CREAT_OFLAG="" +fi + +TEST rm -f ${log_base}/${log_id}-*.log +TEST glusterd +TEST pidof glusterd + +# Get a simple volume set up and mounted with FDL active. +TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0 +TEST $CLI volume set $V0 changelog.changelog off +TEST $CLI volume set $V0 features.fdl on +TEST $CLI volume start $V0 +TEST $GFS -s $H0 --volfile-id $V0 $M0 + +# Generate some I/O and unmount/stop so we can see log sizes. +TEST dd if=/dev/zero of=$M0/twentyMB bs=1048576 count=20 \ +     oflag=${CREAT_OFLAG}sync +TEST umount $M0 +TEST $CLI volume stop $V0 + +TEST _check_sizes + +cleanup diff --git a/tests/features/fdl.t b/tests/features/fdl.t new file mode 100644 index 00000000000..34d6d78228a --- /dev/null +++ b/tests/features/fdl.t @@ -0,0 +1,52 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +log_base=$($CLI --print-logdir) +log_id=${B0}/${V0}-0 +log_id=${log_id:1}     # Remove initial slash +log_id=${log_id//\//-} # Replace remaining slashes with dashes +FDL_META_FILE=${log_base}/${log_id}-meta-1.jnl +FDL_DATA_FILE=${log_base}/${log_id}-data-1.jnl + +check_logfile() { +	[ $(gf_logdump $FDL_META_FILE $FDL_DATA_FILE | grep $1 | wc -l) -ge $2 ] +} + +if [ x"$OSTYPE" = x"NetBSD" ]; then +        CREAT_OFLAG="creat," +else +        CREAT_OFLAG="" +fi + +TEST rm -f $FDL_META_FILE $FDL_DATA_FILE +TEST glusterd +TEST pidof glusterd + +# Get a simple volume set up and mounted with FDL active. +TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0 +TEST $CLI volume set $V0 changelog.changelog off +TEST $CLI volume set $V0 features.fdl on +TEST $CLI volume start $V0 +TEST $GFS -s $H0 --volfile-id $V0 $M0 + +# Generate some I/O and unmount. +TEST mkdir -p $M0/abc/def +TEST dd if=/dev/zero of=$M0/abc/def/ghi bs=128 count=2 \ +     oflag=${CREAT_OFLAG}sync +TEST chmod 314 $M0/abc/def/ghi +TEST rm -rf $M0/abc +TEST umount $M0 + +# Check that gf_logdump works, and shows the ops we just issued.  There will be +# more SETATTR ops than the one corresponding to our chmod, because some are +# issued internally.  We have to guess a bit about where the log will be. +TEST check_logfile GF_FOP_MKDIR 2 +TEST check_logfile GF_FOP_CREATE 1 +TEST check_logfile GF_FOP_WRITE 2 +TEST check_logfile GF_FOP_SETATTR 1 +TEST check_logfile GF_FOP_UNLINK 1 +TEST check_logfile GF_FOP_RMDIR 2 + +cleanup diff --git a/tests/features/recon.t b/tests/features/recon.t new file mode 100644 index 00000000000..7dda2a680e8 --- /dev/null +++ b/tests/features/recon.t @@ -0,0 +1,62 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +log_base=$($CLI --print-logdir) +log_id=${B0}/${V0}-0 +log_id=${log_id:1}     # Remove initial slash +log_id=${log_id//\//-} # Replace remaining slashes with dashes +FDL_META_FILE=${log_base}/${log_id}-meta-1.jnl +FDL_DATA_FILE=${log_base}/${log_id}-data-1.jnl + +tmpdir=$(mktemp -d -t ${0##*/}.XXXXXX) +trap "rm -rf $tmpdir" EXIT + +write_file () { +	echo "peekaboo" > $1 +} + +TEST rm -f $FDL_META_FILE $FDL_DATA_FILE +TEST glusterd +TEST pidof glusterd + +# Get a simple volume set up and mounted with FDL active. +TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0 +TEST $CLI volume set $V0 features.fdl on +TEST $CLI volume start $V0 +TEST $GFS -s $H0 --volfile-id $V0 $M0 + +# Generate some I/O and then copy off the journal files for later. +TEST mkdir -p $M0/abc/def +TEST write_file $M0/abc/def/ghi +#EST chmod 314 $M0/abc/def/ghi +cp ${FDL_META_FILE} ${FDL_DATA_FILE} ${tmpdir} + +# Get back to an empty state and unmount. +TEST rm -rf $M0/abc +TEST umount $M0 + +# Make sure we really are in an empty state.  Otherwise the tests below could +# pass just because we never cleaned up in the first place. +TEST [ ! -d ${B0}/${V0}-0/abc ] + +# Create a stub volfile. +vol_file=${GLUSTERD_WORKDIR}/vols/${V0}/${V0}.${H0}.${log_id}.vol +vol_id_line=$(grep volume-id ${vol_file}) +cat > ${tmpdir}/recon.vol << EOF +volume recon-posix +    type storage/posix +    option directory ${B0}/${V0}-0 +${vol_id_line} +end-volume +EOF + +TEST gf_recon ${tmpdir}/recon.vol ${tmpdir}/$(basename ${FDL_META_FILE}) \ +				  ${tmpdir}/$(basename ${FDL_DATA_FILE}) + +TEST [ -d ${B0}/${V0}-0/abc/def ] +EXPECT "peekaboo" cat ${B0}/${V0}-0/abc/def/ghi +# TBD: test permissions, xattrs + +cleanup diff --git a/tests/include.rc b/tests/include.rc index 139bc03ac8c..21a69465797 100644 --- a/tests/include.rc +++ b/tests/include.rc @@ -136,7 +136,7 @@ function G_LOG()          return       fi       local g_log_string; -     g_log_string="++++++++++ G_LOG:$0: TEST: $1 $@ ++++++++++" +     g_log_string="++++++++++ G_LOG:$0: TEST: $@ ++++++++++"       g_log_string="`date -u +["%F %T.%6N"]`:$g_log_string"       local g_log_filename       for  g_log_filename in `find $g_log_logdir/ -type f -name \*.log`; @@ -541,10 +541,10 @@ function cleanup()          fi >&2          # tar logs at the start and end of every test -        if [ -n $LOGDIR ] +        if [ -n "$LOGDIR" -a -z "$STOP_WASTING_SPACE" ]          then                  tarname=$(basename $0 .t) -                tar -rvf ${LOGDIR}/${tarname}.tar ${LOGDIR}/* \ +                tar -rf ${LOGDIR}/${tarname}.tar ${LOGDIR}/* \                          --exclude="*.tar" \                          && \                  find $LOGDIR/* -maxdepth 0 -name '*.tar' -prune \ diff --git a/xlators/experimental/Makefile.am b/xlators/experimental/Makefile.am index 06f04a193c8..a31512203f6 100644 --- a/xlators/experimental/Makefile.am +++ b/xlators/experimental/Makefile.am @@ -1,3 +1,3 @@ -SUBDIRS = nsr-client nsr-server +SUBDIRS = nsr-client nsr-server fdl  CLEANFILES = diff --git a/xlators/experimental/fdl/Makefile.am b/xlators/experimental/fdl/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/experimental/fdl/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/experimental/fdl/src/Makefile.am b/xlators/experimental/fdl/src/Makefile.am new file mode 100644 index 00000000000..a05fc797b0a --- /dev/null +++ b/xlators/experimental/fdl/src/Makefile.am @@ -0,0 +1,42 @@ +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental +xlator_LTLIBRARIES = fdl.la + +noinst_HEADERS = jnl-types.h + +nodist_fdl_la_SOURCES = fdl.c +fdl_la_LDFLAGS = -module -avoid-version +fdl_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +sbin_PROGRAMS = gf_logdump gf_recon +gf_logdump_SOURCES = logdump.c +nodist_gf_logdump_SOURCES = libfdl.c +gf_logdump_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\ +                   $(top_builddir)/api/src/libgfapi.la + +# Eventually recon(ciliation) code will move elsewhere, but for now it's +# easier to have it next to the similar logdump code. +gf_recon_SOURCES = recon.c +nodist_gf_recon_SOURCES = librecon.c +gf_recon_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\ +                   $(top_builddir)/api/src/libgfapi.la + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ +	      -I$(top_srcdir)/api/src -fPIC \ +	      -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \ +	      -DDATADIR=\"$(localstatedir)\" + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +noinst_PYTHON = gen_fdl.py gen_dumper.py gen_recon.py +EXTRA_DIST = fdl-tmpl.c dump-tmpl.c recon-tmpl.c + +CLEANFILES = $(nodist_fdl_la_SOURCES) $(nodist_gf_logdump_SOURCES) + +fdl.c: fdl-tmpl.c gen_fdl.py +	$(PYTHON) $(srcdir)/gen_fdl.py $(srcdir)/fdl-tmpl.c > $@ + +libfdl.c: dump-tmpl.c gen_dumper.py +	$(PYTHON) $(srcdir)/gen_dumper.py $(srcdir)/dump-tmpl.c > $@ + +librecon.c: recon-tmpl.c gen_recon.py +	$(PYTHON) $(srcdir)/gen_recon.py $(srcdir)/recon-tmpl.c > $@ diff --git a/xlators/experimental/fdl/src/dump-tmpl.c b/xlators/experimental/fdl/src/dump-tmpl.c new file mode 100644 index 00000000000..cac1071a9c1 --- /dev/null +++ b/xlators/experimental/fdl/src/dump-tmpl.c @@ -0,0 +1,156 @@ +#pragma fragment PROLOG +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glfs.h" +#include "iatt.h" +#include "xlator.h" +#include "jnl-types.h" + +#pragma fragment DICT +        { +                int key_len, data_len; +                char *key_ptr; +                printf ("@ARGNAME@ = dict {\n"); +                for (;;) { +                        key_len = *((int *)new_meta); +                        new_meta += sizeof(int); +                        if (!key_len) { +                                break; +                        } +                        key_ptr = new_meta; +                        new_meta += key_len; +                        data_len = *((int *)new_meta); +                        new_meta += sizeof(int) + data_len; +                        printf (" %s = <%d bytes>\n", key_ptr, data_len); +                } +                printf ("}\n"); +        } + +#pragma fragment DOUBLE +        printf ("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta), +                *((uint64_t *)new_meta)); +        new_meta += sizeof(uint64_t); + +#pragma fragment GFID +        printf ("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta))); +        new_meta += 16; + +#pragma fragment INTEGER +        printf ("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta), +                *((uint32_t *)new_meta)); +        new_meta += sizeof(uint32_t); + +#pragma fragment LOC +        printf ("@ARGNAME@ = loc {\n"); +        printf ("  gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); +        new_meta += 16; +        printf ("  pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); +        new_meta += 16; +        if (*(new_meta++)) { +                printf ("  name = %s\n", new_meta); +                new_meta += (strlen(new_meta) + 1); +        } +        printf ("}\n"); + +#pragma fragment STRING +        if (*(new_meta++)) { +                printf ("@ARGNAME@ = %s\n", new_meta); +                new_meta += (strlen(new_meta) + 1); +        } + +#pragma fragment VECTOR +        { +                size_t len = *((size_t *)new_meta); +                new_meta += sizeof(len); +                printf ("@ARGNAME@ = <%zu bytes>\n", len); +                new_data += len; +        } + +#pragma fragment IATT +        { +                ia_prot_t *myprot = ((ia_prot_t *)new_meta); +                printf ("@ARGNAME@ = iatt {\n"); +                printf ("  ia_prot = %c%c%c", +                        myprot->suid ? 'S' : '-', +                        myprot->sgid ? 'S' : '-', +                        myprot->sticky ? 'T' : '-'); +                printf ("%c%c%c", +                        myprot->owner.read ? 'r' : '-', +                        myprot->owner.write ? 'w' : '-', +                        myprot->owner.exec ? 'x' : '-'); +                printf ("%c%c%c", +                        myprot->group.read ? 'r' : '-', +                        myprot->group.write ? 'w' : '-', +                        myprot->group.exec ? 'x' : '-'); +                printf ("%c%c%c\n", +                        myprot->other.read ? 'r' : '-', +                        myprot->other.write ? 'w' : '-', +                        myprot->other.exec ? 'x' : '-'); +                new_meta += sizeof(ia_prot_t); +                uint32_t *myints = (uint32_t *)new_meta; +                printf ("  ia_uid = %u\n", myints[0]); +                printf ("  ia_gid = %u\n", myints[1]); +                printf ("  ia_atime = %u.%09u\n", myints[2], myints[3]); +                printf ("  ia_mtime = %u.%09u\n", myints[4], myints[5]); +                new_meta += sizeof(*myints) * 6; +        } + +#pragma fragment FOP +void +fdl_dump_@NAME@ (char **old_meta, char **old_data) +{ +        char    *new_meta	= *old_meta; +        char	*new_data	= *old_data; + +        /* TBD: word size/endianness */ +@FUNCTION_BODY@ + +        *old_meta = new_meta; +        *old_data = new_data; +} + +#pragma fragment CASE +        case GF_FOP_@UPNAME@: +                printf ("=== GF_FOP_@UPNAME@\n"); +                fdl_dump_@NAME@ (&new_meta, &new_data); +                break; + +#pragma fragment EPILOG +int +fdl_dump (char **old_meta, char **old_data) +{ +        char            *new_meta       = *old_meta; +        char            *new_data       = *old_data; +        static glfs_t   *fs             = NULL; +        int             recognized      = 1; +        event_header_t  *eh; + +        /* +         * We don't really call anything else in GFAPI, but this is the most +         * convenient way to satisfy all of the spurious dependencies on how it +         * or glusterfsd initialize (e.g. setting up THIS). +         */ +        if (!fs) { +                fs = glfs_new ("dummy"); +        } + +        eh = (event_header_t *)new_meta; +        new_meta += sizeof (*eh); + +        /* TBD: check event_type instead of assuming NEW_REQUEST */ + +        switch (eh->fop_type) { +@SWITCH_BODY@ + +        default: +                printf ("unknown fop %u\n", eh->fop_type); +                recognized = 0; +        } + +        *old_meta = new_meta; +        *old_data = new_data; +        return recognized; +} diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c b/xlators/experimental/fdl/src/fdl-tmpl.c new file mode 100644 index 00000000000..8fcc6a8d6ff --- /dev/null +++ b/xlators/experimental/fdl/src/fdl-tmpl.c @@ -0,0 +1,506 @@ +/* +  Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> +  This file is part of GlusterFS. + +  This file is licensed to you under your choice of the GNU Lesser +  General Public License, version 3 or any later version (LGPLv3 or +  later), or the GNU General Public License, version 2 (GPLv2), in all +  cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <fcntl.h> +#include <unistd.h> +#include <sys/mman.h> +#include "call-stub.h" +#include "iatt.h" +#include "defaults.h" +#include "syscall.h" +#include "xlator.h" +#include "jnl-types.h" + +/* TBD: make tunable */ +#define META_FILE_SIZE  (1 << 20) +#define DATA_FILE_SIZE  (1 << 24) + +enum gf_fdl { +        gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, +        gf_fdl_mt_end +}; + +typedef struct { +        char            *type; +        off_t           size; +        char            *path; +        int             fd; +        void *          ptr; +        off_t           max_offset; +} log_obj_t; + +typedef struct { +        struct list_head        reqs; +        pthread_mutex_t         req_lock; +        pthread_cond_t          req_cond; +        char                    *log_dir; +        pthread_t               worker; +        gf_boolean_t            should_stop; +        gf_boolean_t            change_term; +        log_obj_t               meta_log; +        log_obj_t               data_log; +        int                     term; +        int                     first_term; +} fdl_private_t; + +void +fdl_enqueue (xlator_t *this, call_stub_t *stub) +{ +        fdl_private_t   *priv   = this->private; + +        pthread_mutex_lock (&priv->req_lock); +        list_add_tail (&stub->list, &priv->reqs); +        pthread_mutex_unlock (&priv->req_lock); + +        pthread_cond_signal (&priv->req_cond); +} + +#pragma generate + +char * +fdl_open_term_log (xlator_t *this, log_obj_t *obj, int term) +{ +        fdl_private_t   *priv   = this->private; +        int             ret; +        char *          ptr     = NULL; + +        /* +         * Use .jnl instead of .log so that we don't get test info (mistakenly) +         * appended to our journal files. +         */ +        if (this->ctx->cmd_args.log_ident) { +                ret = gf_asprintf (&obj->path, "%s/%s-%s-%d.jnl", +                                   priv->log_dir, this->ctx->cmd_args.log_ident, +                                   obj->type, term); +        } +        else { +                ret = gf_asprintf (&obj->path, "%s/fubar-%s-%d.jnl", +                                   priv->log_dir, obj->type, term); +        } +        if ((ret <= 0) || !obj->path) { +                gf_log (this->name, GF_LOG_ERROR, +                        "failed to construct log-file path"); +                goto err; +        } + +        gf_log (this->name, GF_LOG_INFO, "opening %s (size %ld)", +                obj->path, obj->size); + +        obj->fd = open (obj->path, O_RDWR|O_CREAT|O_TRUNC, 0666); +        if (obj->fd < 0) { +                gf_log (this->name, GF_LOG_ERROR, +                        "failed to open log file (%s)", strerror(errno)); +                goto err; +        } + +#if !defined(GF_BSD_HOST_OS) +        /* +         * NetBSD can just go die in a fire.  Even though it claims to support +         * fallocate/posix_fallocate they don't actually *do* anything so the +         * file size remains zero.  Then mmap succeeds anyway, but any access +         * to the mmap'ed region will segfault.  It would be acceptable for +         * fallocate to do what it says, for mmap to fail, or for access to +         * extend the file.  NetBSD managed to hit the trifecta of Getting +         * Everything Wrong, and debugging in that environment to get this far +         * has already been painful enough (systems I worked on in 1990 were +         * better that way).  We'll fall through to the lseek/write method, and +         * performance will be worse, and TOO BAD. +         */ +        if (sys_fallocate(obj->fd,0,0,obj->size) < 0) +#endif +        { +                gf_log (this->name, GF_LOG_WARNING, +                        "failed to fallocate space for log file"); +                /* Have to do this the ugly page-faulty way. */ +                (void) sys_lseek (obj->fd, obj->size-1, SEEK_SET); +                (void) sys_write (obj->fd, "", 1); +        } + +        ptr = mmap (NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0); +        if (ptr == MAP_FAILED) { +                gf_log (this->name, GF_LOG_ERROR, "failed to mmap log (%s)", +                        strerror(errno)); +                goto err; +        } + +        obj->ptr = ptr; +        obj->max_offset = 0; +        return ptr; + +err: +        if (obj->fd >= 0) { +                sys_close (obj->fd); +                obj->fd = (-1); +        } +        if (obj->path) { +                GF_FREE (obj->path); +                obj->path = NULL; +        } +        return ptr; +} + +void +fdl_close_term_log (xlator_t *this, log_obj_t *obj) +{ +        fdl_private_t   *priv           = this->private; + +        if (obj->ptr) { +                (void) munmap (obj->ptr, obj->size); +                obj->ptr = NULL; +        } + +        if (obj->fd >= 0) { +                gf_log (this->name, GF_LOG_INFO, +                        "truncating term %d %s journal to %ld", +                        priv->term, obj->type, obj->max_offset); +                if (sys_ftruncate(obj->fd,obj->max_offset) < 0) { +                        gf_log (this->name, GF_LOG_WARNING, +                                "failed to truncate journal (%s)", +                                strerror(errno)); +                } +                sys_close (obj->fd); +                obj->fd = (-1); +        } + +        if (obj->path) { +                GF_FREE (obj->path); +                obj->path = NULL; +        } +} + +gf_boolean_t +fdl_change_term (xlator_t *this, char **meta_ptr, char **data_ptr) +{ +        fdl_private_t   *priv           = this->private; + +        fdl_close_term_log (this, &priv->meta_log); +        fdl_close_term_log (this, &priv->data_log); + +        ++(priv->term); + +        *meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); +        if (!*meta_ptr) { +                return _gf_false; +        } + +        *data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); +        if (!*data_ptr) { +                return _gf_false; +        } + +        return _gf_true; +} + +void * +fdl_worker (void *arg) +{ +        xlator_t        *this           = arg; +        fdl_private_t   *priv           = this->private; +        call_stub_t     *stub; +        char *          meta_ptr        = NULL; +        off_t           *meta_offset    = &priv->meta_log.max_offset; +        char *          data_ptr        = NULL; +        off_t           *data_offset    = &priv->data_log.max_offset; +        unsigned long   base_as_ul; +        void *          msync_ptr; +        size_t          msync_len; +        gf_boolean_t    recycle; +        void            *err_label      = &&err_unlocked; + +        priv->meta_log.type = "meta"; +        priv->meta_log.size = META_FILE_SIZE; +        priv->meta_log.path = NULL; +        priv->meta_log.fd = (-1); +        priv->meta_log.ptr = NULL; + +        priv->data_log.type = "data"; +        priv->data_log.size = DATA_FILE_SIZE; +        priv->data_log.path = NULL; +        priv->data_log.fd = (-1); +        priv->data_log.ptr = NULL; + +        /* TBD: initial term should come from persistent storage (e.g. etcd) */ +        priv->first_term = ++(priv->term); +        meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); +        if (!meta_ptr) { +                goto *err_label; +        } +        data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); +        if (!data_ptr) { +                fdl_close_term_log (this, &priv->meta_log); +                goto *err_label; +        } + +        for (;;) { +                pthread_mutex_lock (&priv->req_lock); +                err_label = &&err_locked; +                while (list_empty(&priv->reqs)) { +                        pthread_cond_wait (&priv->req_cond, &priv->req_lock); +                        if (priv->should_stop) { +                                goto *err_label; +                        } +                        if (priv->change_term) { +                                if (!fdl_change_term(this, &meta_ptr, +                                                           &data_ptr)) { +                                        goto *err_label; +                                } +                                priv->change_term = _gf_false; +                                continue; +                        } +                } +                stub = list_entry (priv->reqs.next, call_stub_t, list); +                list_del_init (&stub->list); +                pthread_mutex_unlock (&priv->req_lock); +                err_label = &&err_unlocked; +                /* +                 * TBD: batch requests +                 * +                 * What we should do here is gather up *all* of the requests +                 * that have accumulated since we were last at this point, +                 * blast them all out in one big writev, and then dispatch them +                 * all before coming back for more.  That maximizes throughput, +                 * at some cost to latency (due to queuing effects at the log +                 * stage).  Note that we're likely to be above io-threads, so +                 * the dispatch itself will be parallelized (at further cost to +                 * latency).  For now, we just do the simplest thing and handle +                 * one request all the way through before fetching the next. +                 * +                 * So, why mmap/msync instead of writev/fdatasync?  Because it's +                 * faster.  Much faster.  So much faster that I half-suspect +                 * cheating, but it's more convenient for now than having to +                 * ensure that everything's page-aligned for O_DIRECT (the only +                 * alternative that still might avoid ridiculous levels of +                 * local-FS overhead). +                 * +                 * TBD: check that msync really does get our data to disk. +                 */ +                gf_log (this->name, GF_LOG_DEBUG, +                        "logging %u+%u bytes for op %d", +                        stub->jnl_meta_len, stub->jnl_data_len, stub->fop); +                recycle = _gf_false; +                if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) { +                        recycle = _gf_true; +                } +                if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) { +                        recycle = _gf_true; +                } +                if (recycle && !fdl_change_term(this,&meta_ptr,&data_ptr)) { +                        goto *err_label; +                } +                meta_ptr = priv->meta_log.ptr; +                data_ptr = priv->data_log.ptr; +                gf_log (this->name, GF_LOG_DEBUG, "serializing to %p/%p", +                        meta_ptr + *meta_offset, data_ptr + *data_offset); +                stub->serialize (stub, meta_ptr + *meta_offset, +                                       data_ptr + *data_offset); +                if (stub->jnl_meta_len > 0) { +                        base_as_ul = (unsigned long) (meta_ptr + *meta_offset); +                        msync_ptr = (void *) (base_as_ul & ~0x0fff); +                        msync_len = (size_t) (base_as_ul &  0x0fff); +                        if (msync (msync_ptr, msync_len+stub->jnl_meta_len, +                                              MS_SYNC) < 0) { +                                gf_log (this->name, GF_LOG_WARNING, +                                        "failed to log request meta (%s)", +                                        strerror(errno)); +                        } +                        *meta_offset += stub->jnl_meta_len; +                } +                if (stub->jnl_data_len > 0) { +                        base_as_ul = (unsigned long) (data_ptr + *data_offset); +                        msync_ptr = (void *) (base_as_ul & ~0x0fff); +                        msync_len = (size_t) (base_as_ul &  0x0fff); +                        if (msync (msync_ptr, msync_len+stub->jnl_data_len, +                                              MS_SYNC) < 0) { +                                gf_log (this->name, GF_LOG_WARNING, +                                        "failed to log request data (%s)", +                                        strerror(errno)); +                        } +                        *data_offset += stub->jnl_data_len; +                } +                call_resume (stub); +        } + +err_locked: +        pthread_mutex_unlock (&priv->req_lock); +err_unlocked: +        fdl_close_term_log (this, &priv->meta_log); +        fdl_close_term_log (this, &priv->data_log); +        return NULL; +} + +int32_t +fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +{ +        fdl_private_t   *priv   = this->private; +        dict_t          *tdict; +        int32_t         gt_err  = EIO; + +        switch (op) { + +        case FDL_IPC_CHANGE_TERM: +                gf_log (this->name, GF_LOG_INFO, "got CHANGE_TERM op"); +                priv->change_term = _gf_true; +                pthread_cond_signal (&priv->req_cond); +                STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL); +                break; + +        case FDL_IPC_GET_TERMS: +                gf_log (this->name, GF_LOG_INFO, "got GET_TERMS op"); +                tdict = dict_new (); +                if (!tdict) { +                        gt_err = ENOMEM; +                        goto gt_done; +                } +                if (dict_set_int32(tdict,"first",priv->first_term) != 0) { +                        goto gt_done; +                } +                if (dict_set_int32(tdict,"last",priv->term) != 0) { +                        goto gt_done; +                } +                gt_err = 0; +        gt_done: +                if (gt_err) { +                        STACK_UNWIND_STRICT (ipc, frame, -1, gt_err, NULL); +                } else { +                        STACK_UNWIND_STRICT (ipc, frame, 0, 0, tdict); +                } +                if (tdict) { +                        dict_unref (tdict); +                } +                break; + +        default: +                STACK_WIND_TAIL (frame, +                                 FIRST_CHILD(this), +                                 FIRST_CHILD(this)->fops->ipc, +                                 op, xdata); +        } + +        return 0; +} + +int +fdl_init (xlator_t *this) +{ +        fdl_private_t   *priv   = NULL; + +        priv = GF_CALLOC (1, sizeof (*priv), gf_fdl_mt_fdl_private_t); +        if (!priv) { +                gf_log (this->name, GF_LOG_ERROR, +                        "failed to allocate fdl_private"); +                goto err; +        } + +        INIT_LIST_HEAD (&priv->reqs); +        if (pthread_mutex_init (&priv->req_lock, NULL) != 0) { +                gf_log (this->name, GF_LOG_ERROR, +                        "failed to initialize req_lock"); +                goto err; +        } +        if (pthread_cond_init (&priv->req_cond, NULL) != 0) { +                gf_log (this->name, GF_LOG_ERROR, +                        "failed to initialize req_cond"); +                goto err; +        } + +        GF_OPTION_INIT ("log-path", priv->log_dir, path, err); + +        if (pthread_create(&priv->worker,NULL,fdl_worker,this) != 0) { +                gf_log (this->name, GF_LOG_ERROR, +                        "failed to start fdl_worker"); +                goto err; +        } + +        /* +         * The rest of the fop table is automatically generated, so this is a +         * bit cleaner than messing with the generation to add a hand-written +         * exception. +         */ +        this->fops->ipc = fdl_ipc; + +        this->private = priv; +        return 0; + +err: +        if (priv) { +                GF_FREE(priv); +        } +        return -1; +} + +void +fdl_fini (xlator_t *this) +{ +        fdl_private_t   *priv   = this->private; + +        if (priv) { +                priv->should_stop = _gf_true; +                pthread_cond_signal (&priv->req_cond); +                pthread_join (priv->worker, NULL); +                GF_FREE(priv); +        } +} + +int +fdl_reconfigure (xlator_t *this, dict_t *options) +{ +        fdl_private_t   *priv   = this->private; + +	GF_OPTION_RECONF ("log_dir", priv->log_dir, options, path, out); +        /* TBD: react if it changed */ + +out: +        return 0; +} + +int32_t +mem_acct_init (xlator_t *this) +{ +        int     ret = -1; + +        GF_VALIDATE_OR_GOTO ("fdl", this, out); + +        ret = xlator_mem_acct_init (this, gf_fdl_mt_end + 1); + +        if (ret != 0) { +                gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" +                        "failed"); +                return ret; +        } +out: +        return ret; +} + +class_methods_t class_methods = { +        .init           = fdl_init, +        .fini           = fdl_fini, +        .reconfigure    = fdl_reconfigure, +        .notify         = default_notify, +}; + +struct volume_options options[] = { +        { .key = {"log-path"}, +          .type = GF_OPTION_TYPE_PATH, +          .default_value = DEFAULT_LOG_FILE_DIRECTORY, +          .description = "Directory for FDL files." +        }, +        { .key  = {NULL} }, +}; + +struct xlator_cbks cbks = { +        .release        = default_release, +        .releasedir     = default_releasedir, +        .forget         = default_forget, +}; diff --git a/xlators/experimental/fdl/src/gen_dumper.py b/xlators/experimental/fdl/src/gen_dumper.py new file mode 100755 index 00000000000..42db55d2cb3 --- /dev/null +++ b/xlators/experimental/fdl/src/gen_dumper.py @@ -0,0 +1,116 @@ +#!/usr/bin/python + +import os +import re +import sys + +curdir = os.path.dirname (sys.argv[0]) +gendir = os.path.join (curdir, '../../../../libglusterfs/src') +sys.path.append (gendir) +from generator import ops, fop_subs, cbk_subs, generate + +# See the big header comment at the start of gen_fdl.py to see how the stages +# fit together.  The big difference here is that *all* of the C code is in the +# template file as labelled fragments, instead of as Python strings.  That +# makes it much easier to edit in one place, with proper syntax highlighting +# and indentation. +# +#   Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of +#   LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE. +# +#   Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and +#   FOP_TEMPLATE.  The expanded FOP code (including FUNCTION_BODY substitution +#   in the middle of each function) is emitted immediately; the expanded CASE +#   code is saved for the next stage. +# +#   Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code +#   in the middle of EPILOG, to generate the whole output file. +# +# Another way of looking at it is to consider how the fragments appear in +# the final output: +# +#   PROLOG +#   FOP (expanded for CREATE) +#       FOP before FUNCTION_BODY +#       LOC, INTEGER, GFID, etc. (one per arg, by type) +#       FOP after FUNCTION_BODY +#   FOP (expanded for WRITEV) +#       FOP before FUNCTION_BODY +#       GFID, VECTOR, etc. (on per arg, by type) +#       FOP after FUNCTION_BODY +#   (more FOPs) +#   EPILOG +#       EPILOG before CASE +#       CASE statements (one per fop) +#       EPILOG after CASE + +typemap = { +	'dict_t *':				( "DICT",		""), +	'fd_t *':				( "GFID",		""), +	'dev_t':				( "DOUBLE",		"%ld (0x%lx)"), +	'gf_xattrop_flags_t':	( "INTEGER",	"%d (0x%x)"), +	'int32_t':				( "INTEGER",	"%d (0x%x)"), +	'mode_t':				( "INTEGER",	"%d (0x%x)"), +	'off_t':				( "DOUBLE",		"%ld (0x%lx)"), +	'size_t':				( "DOUBLE",		"%ld (0x%lx)"), +	'uint32_t':				( "INTEGER",	"%d (0x%x)"), +	'loc_t *':				( "LOC",		""), +	'const char *':			( "STRING",		""), +	'struct iovec *':		( "VECTOR",		""), +	'struct iatt *':		( "IATT",		""), +} + +def get_special_subs (args): +	code = "" +	for arg in args: +		if (arg[0] != 'fop-arg') or (len(arg) < 4): +			continue +		recon_type, recon_fmt = typemap[arg[2]] +		code += fragments[recon_type].replace("@ARGNAME@",arg[3])		\ +									 .replace("@FORMAT@",recon_fmt) +	return code + +def gen_functions (): +	code = "" +	for name, value in ops.iteritems(): +		if "journal" not in [ x[0] for x in value ]: +			continue +		fop_subs[name]["@FUNCTION_BODY@"] = get_special_subs(value) +		# Print the FOP fragment with @FUNCTION_BODY@ in the middle. +		code += generate(fragments["FOP"],name,fop_subs) +	return code + +def gen_cases (): +	code = "" +	for name, value in ops.iteritems(): +		if "journal" not in [ x[0] for x in value ]: +			continue +		# Add the CASE fragment for this fop. +		code += generate(fragments["CASE"],name,fop_subs) +	return code + +def load_fragments (path="recon-tmpl.c"): +	pragma_re = re.compile('pragma fragment (.*)') +	cur_symbol = None +	cur_value = "" +	result = {} +	for line in open(path,"r").readlines(): +		m = pragma_re.search(line) +		if m: +			if cur_symbol: +				result[cur_symbol] = cur_value +			cur_symbol = m.group(1) +			cur_value = "" +		else: +			cur_value += line +	if cur_symbol: +		result[cur_symbol] = cur_value +	return result + +if __name__ == "__main__": +	fragments = load_fragments(sys.argv[1]) +	print "/* BEGIN GENERATED CODE - DO NOT MODIFY */" +	print fragments["PROLOG"] +	print gen_functions() +	print fragments["EPILOG"].replace("@SWITCH_BODY@",gen_cases()) +	print "/* END GENERATED CODE */" diff --git a/xlators/experimental/fdl/src/gen_fdl.py b/xlators/experimental/fdl/src/gen_fdl.py new file mode 100755 index 00000000000..7f6b1aaaeaa --- /dev/null +++ b/xlators/experimental/fdl/src/gen_fdl.py @@ -0,0 +1,328 @@ +#!/usr/bin/python + +import os +import sys + +curdir = os.path.dirname (sys.argv[0]) +gendir = os.path.join (curdir, '../../../../libglusterfs/src') +sys.path.append (gendir) +from generator import ops, fop_subs, cbk_subs, generate + +# Generation occurs in three stages.  In this case, it actually makes more +# sense to discuss them in the *opposite* order of that in which they +# actually happen. +# +#   Stage 3 is to insert all of the generated code into a file, replacing the +#   "#pragma generate" that's already there.  The file can thus contain all +#   sorts of stuff that's not specific to one fop, either before or after the +#   generated code as appropriate. +# +#   Stage 2 is to generate all of the code *for a particular fop*, using a +#   string-valued template plus a table of substitution values.  Most of these +#   are built in to the generator itself.  However, we also add a couple that +#   are specific to this particular translator - LEN_CODE and SER_CODE.  These +#   are per-fop functions to get the length or the contents (respectively) of +#   what we'll put in the log.  As with stage 3 allowing per-file boilerplate +#   before and after generated code, this allows per-fop boilerplate before and +#   after generated code. +# +#   Stage 1, therefore, is to create the LEN_CODE and SER_CODE substitutions for +#   each fop, and put them in the same table where e.g. NAME and SHORT_ARGS +#   already are.  We do this by looking at the fop-description table in the +#   generator module, then doing out own template substitution to plug each +#   specific argument name into another string-valued template. +# +# So, what does this leave us with in terms of variables and files? +# +#   For stage 1, we have a series of LEN_*_TEMPLATE and SERLZ_*_TEMPLATE +#   strings, which are used to generate the length and serialization code for +#   each argument type. +# +#   For stage 2, we have a bunch of *_TEMPLATE strings (no LEN_ or SERLZ_ +#   prefix), which are used (along with the output from stage 1) to generate +#   whole functions. +# +#   For stage 3, we have a whole separate file (fdl_tmpl.c) into which we insert +#   the collection of all functions defined in stage 2. + + +LEN_TEMPLATE = """ +void +fdl_len_@NAME@ (call_stub_t *stub) +{ +        uint32_t    meta_len    = sizeof (event_header_t); +		uint32_t	data_len	= 0; + +        /* TBD: global stuff, e.g. uid/gid */ +@LEN_CODE@ + +		/* TBD: pad extension length */ +		stub->jnl_meta_len = meta_len; +		stub->jnl_data_len = data_len; +} +""" + +SER_TEMPLATE = """ +void +fdl_serialize_@NAME@ (call_stub_t *stub, char *meta_buf, char *data_buf) +{ +		event_header_t	*eh; +		unsigned long	offset = 0; + +        /* TBD: word size/endianness */ +		eh = (event_header_t *)meta_buf; +		eh->event_type = NEW_REQUEST; +		eh->fop_type = GF_FOP_@UPNAME@; +		eh->request_id = 0;	// TBD +		meta_buf += sizeof (*eh); +@SER_CODE@ +		/* TBD: pad extension length */ +		eh->ext_length = offset; +} +""" + +CBK_TEMPLATE = """ +int32_t +fdl_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, +                int32_t op_ret, int32_t op_errno, +                @LONG_ARGS@) +{ +        STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, +                             @SHORT_ARGS@); +        return 0; +} +""" + +CONTINUE_TEMPLATE = """ +int32_t +fdl_@NAME@_continue (call_frame_t *frame, xlator_t *this, +                     @LONG_ARGS@) +{ +        STACK_WIND (frame, fdl_@NAME@_cbk, +                    FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, +                    @SHORT_ARGS@); +        return 0; +} + +""" + +FOP_TEMPLATE = """ +int32_t +fdl_@NAME@ (call_frame_t *frame, xlator_t *this, +            @LONG_ARGS@) +{ +        call_stub_t     *stub; + +        stub = fop_@NAME@_stub (frame, default_@NAME@, +                                @SHORT_ARGS@); +		fdl_len_@NAME@ (stub); +        stub->serialize = fdl_serialize_@NAME@; +        fdl_enqueue (this, stub); + +        return 0; +} +""" + +LEN_DICT_TEMPLATE = """ +		if (@SRC@) { +			data_pair_t *memb; +			for (memb = @SRC@->members_list; memb; memb = memb->next) { +				meta_len += sizeof(int); +				meta_len += strlen(memb->key) + 1; +				meta_len += sizeof(int); +				meta_len += memb->value->len; +			} +		} +		meta_len += sizeof(int); +""" + +LEN_GFID_TEMPLATE = """ +        meta_len += 16; +""" + +LEN_INTEGER_TEMPLATE = """ +        meta_len += sizeof (@SRC@); +""" + +# 16 for gfid, 16 for pargfid, 1 for flag, 0/1 for terminating NUL +LEN_LOC_TEMPLATE = """ +        if (@SRC@.name) { +                meta_len += (strlen (@SRC@.name) + 34); +        } else { +                meta_len += 33; +        } +""" + +LEN_STRING_TEMPLATE = """ +        if (@SRC@) { +                meta_len += (strlen (@SRC@) + 1); +        } else { +                meta_len += 1; +        } +""" + +LEN_VECTOR_TEMPLATE = """ +        meta_len += sizeof(size_t); +        data_len += iov_length (@VEC@, @CNT@); +""" + +LEN_IATT_TEMPLATE = """ +		meta_len += sizeof(@SRC@.ia_prot); +		meta_len += sizeof(@SRC@.ia_uid); +		meta_len += sizeof(@SRC@.ia_gid); +		meta_len += sizeof(@SRC@.ia_atime); +		meta_len += sizeof(@SRC@.ia_atime_nsec); +		meta_len += sizeof(@SRC@.ia_mtime); +		meta_len += sizeof(@SRC@.ia_mtime_nsec); +""" + +SERLZ_DICT_TEMPLATE = """ +        if (@SRC@) { +			data_pair_t *memb; +			for (memb = @SRC@->members_list; memb; memb = memb->next) { +				*((int *)(meta_buf+offset)) = strlen(memb->key) + 1; +				offset += sizeof(int); +				strcpy (meta_buf+offset, memb->key); +				offset += strlen(memb->key) + 1; +				*((int *)(meta_buf+offset)) = memb->value->len; +				offset += sizeof(int); +				memcpy (meta_buf+offset, memb->value->data, memb->value->len); +				offset += memb->value->len; +			} +        } +		*((int *)(meta_buf+offset)) = 0; +		offset += sizeof(int); +""" + +SERLZ_GFID_TEMPLATE = """ +        memcpy (meta_buf+offset, @SRC@->inode->gfid, 16); +        offset += 16; +""" + +SERLZ_INTEGER_TEMPLATE = """ +        memcpy (meta_buf+offset, &@SRC@, sizeof(@SRC@)); +        offset += sizeof(@SRC@); +""" + +SERLZ_LOC_TEMPLATE = """ +        memcpy (meta_buf+offset, @SRC@.gfid, 16); +        offset += 16; +        memcpy (meta_buf+offset, @SRC@.pargfid, 16); +        offset += 16; +        if (@SRC@.name) { +                *(meta_buf+offset) = 1; +				++offset; +                strcpy (meta_buf+offset, @SRC@.name); +                offset += (strlen (@SRC@.name) + 1); +        } else { +                *(meta_buf+offset) = 0; +				++offset; +        } +""" + +SERLZ_STRING_TEMPLATE = """ +        if (@SRC@) { +                *(meta_buf+offset) = 1; +				++offset; +                strcpy (meta_buf+offset, @SRC@); +                offset += strlen(@SRC@); +        } else { +                *(meta_buf+offset) = 0; +				++offset; +        } +""" + +SERLZ_VECTOR_TEMPLATE = """ +        *((size_t *)(meta_buf+offset)) = iov_length (@VEC@, @CNT@); +        offset += sizeof(size_t); +        int32_t i; +        for (i = 0; i < @CNT@; ++i) { +                memcpy (data_buf, @VEC@[i].iov_base, @VEC@[i].iov_len); +                data_buf += @VEC@[i].iov_len; +        } +""" + +# We don't need to save all of the fields - only those affected by chown, +# chgrp, chmod, and utime. +SERLZ_IATT_TEMPLATE = """ +		*((ia_prot_t *)(meta_buf+offset)) = @SRC@.ia_prot; +		offset += sizeof(@SRC@.ia_prot); +		*((uint32_t *)(meta_buf+offset)) = @SRC@.ia_uid; +		offset += sizeof(@SRC@.ia_uid); +		*((uint32_t *)(meta_buf+offset)) = @SRC@.ia_gid; +		offset += sizeof(@SRC@.ia_gid); +		*((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime; +		offset += sizeof(@SRC@.ia_atime); +		*((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime_nsec; +		offset += sizeof(@SRC@.ia_atime_nsec); +		*((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime; +		offset += sizeof(@SRC@.ia_mtime); +		*((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime_nsec; +		offset += sizeof(@SRC@.ia_mtime_nsec); +""" + +typemap = { +	'dict_t *':				( LEN_DICT_TEMPLATE,	SERLZ_DICT_TEMPLATE), +	'fd_t *':				( LEN_GFID_TEMPLATE,	SERLZ_GFID_TEMPLATE), +	'dev_t':				( LEN_INTEGER_TEMPLATE,	SERLZ_INTEGER_TEMPLATE), +	'gf_xattrop_flags_t':	( LEN_INTEGER_TEMPLATE,	SERLZ_INTEGER_TEMPLATE), +	'int32_t':				( LEN_INTEGER_TEMPLATE,	SERLZ_INTEGER_TEMPLATE), +	'mode_t':				( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), +	'off_t':				( LEN_INTEGER_TEMPLATE,	SERLZ_INTEGER_TEMPLATE), +	'size_t':				( LEN_INTEGER_TEMPLATE,	SERLZ_INTEGER_TEMPLATE), +	'uint32_t':				( LEN_INTEGER_TEMPLATE,	SERLZ_INTEGER_TEMPLATE), +	'loc_t *':				( LEN_LOC_TEMPLATE,		SERLZ_LOC_TEMPLATE), +	'const char *':			( LEN_STRING_TEMPLATE,	SERLZ_STRING_TEMPLATE), +	'struct iatt *':		( LEN_IATT_TEMPLATE,	SERLZ_IATT_TEMPLATE), +} + +def get_special_subs (args): +	len_code = "" +	ser_code = "" +	for arg in args: +		if (arg[0] != 'fop-arg') or (len(arg) < 4): +			continue +		# Let this throw an exception if we get an unknown field name.  The +		# broken build will remind whoever messed with the stub code that a +		# corresponding update is needed here. +		if arg[3] == "vector": +			# Make it as obvious as possible that this is a special case. +			len_code += LEN_VECTOR_TEMPLATE \ +				.replace("@VEC@","stub->args.vector") \ +				.replace("@CNT@","stub->args.count") +			ser_code += SERLZ_VECTOR_TEMPLATE \ +				.replace("@VEC@","stub->args.vector") \ +				.replace("@CNT@","stub->args.count") +		else: +			len_tmpl, ser_tmpl = typemap[arg[2]] +			src = "stub->args.%s" % arg[3] +			len_code += len_tmpl.replace("@SRC@",src) +			ser_code += ser_tmpl.replace("@SRC@",src) +	return len_code, ser_code + +def gen_fdl (): +	entrypoints = [] +	for name, value in ops.iteritems(): +		if "journal" not in [ x[0] for x in value ]: +			continue +		len_code, ser_code = get_special_subs(value) +		fop_subs[name]["@LEN_CODE@"] = len_code[:-1] +		fop_subs[name]["@SER_CODE@"] = ser_code[:-1] +		print generate(LEN_TEMPLATE,name,fop_subs) +		print generate(SER_TEMPLATE,name,fop_subs) +		print generate(CBK_TEMPLATE,name,cbk_subs) +		print generate(CONTINUE_TEMPLATE,name,fop_subs) +		print generate(FOP_TEMPLATE,name,fop_subs) +		entrypoints.append(name) +	print "struct xlator_fops fops = {" +	for ep in entrypoints: +		print "\t.%s = fdl_%s," % (ep, ep) +	print "};" + +for l in open(sys.argv[1],'r').readlines(): +	if l.find('#pragma generate') != -1: +		print "/* BEGIN GENERATED CODE - DO NOT MODIFY */" +		gen_fdl() +		print "/* END GENERATED CODE */" +	else: +		print l[:-1] diff --git a/xlators/experimental/fdl/src/gen_recon.py b/xlators/experimental/fdl/src/gen_recon.py new file mode 100755 index 00000000000..26318f92d88 --- /dev/null +++ b/xlators/experimental/fdl/src/gen_recon.py @@ -0,0 +1,191 @@ +#!/usr/bin/python + +import os +import re +import string +import sys + +curdir = os.path.dirname (sys.argv[0]) +gendir = os.path.join (curdir, '../../../../libglusterfs/src') +sys.path.append (gendir) +from generator import ops, fop_subs, cbk_subs, generate + +# See the big header comment at the start of gen_fdl.py to see how the stages +# fit together.  The big difference here is that *all* of the C code is in the +# template file as labelled fragments, instead of as Python strings.  That +# makes it much easier to edit in one place, with proper syntax highlighting +# and indentation. +# +#   Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of +#   LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE. +# +#   Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and +#   FOP_TEMPLATE.  The expanded FOP code (including FUNCTION_BODY substitution +#   in the middle of each function) is emitted immediately; the expanded CASE +#   code is saved for the next stage. +# +#   Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code +#   in the middle of EPILOG, to generate the whole output file. +# +# Another way of looking at it is to consider how the fragments appear in +# the final output: +# +#   PROLOG +#   FOP (expanded for CREATE) +#       FOP before FUNCTION_BODY +#       LOC, INTEGER, GFID, etc. (one per arg, by type) +#       FOP after FUNCTION_BODY +#   FOP (expanded for WRITEV) +#       FOP before FUNCTION_BODY +#       GFID, VECTOR, etc. (one per arg, by type) +#       FOP after FUNCTION_BODY +#   (more FOPs) +#   EPILOG +#       EPILOG before CASE +#       CASE statements (one per fop) +#       EPILOG after CASE + +typemap = { +	'dict_t *':				"DICT", +	'fd_t *':				"FD", +	'dev_t':				"DOUBLE", +	'gf_xattrop_flags_t':	"INTEGER", +	'int32_t':				"INTEGER", +	'mode_t':				"INTEGER", +	'off_t':				"DOUBLE", +	'size_t':				"DOUBLE", +	'uint32_t':				"INTEGER", +	'loc_t *':				"LOC", +	'const char *':			"STRING", +	'struct iovec *':		"VECTOR", +	'struct iatt *':		"IATT", +	'struct iobref *':		"IOBREF", +} + +def get_special_subs (name, args, fop_type): +	code = "" +	cleanups = "" +	links = "" +	s_args = [] +	for arg in args: +		if arg[0] == 'extra': +			code += "\t%s %s;\n\n" % (arg[2], arg[1]) +			s_args.append(arg[3]) +			continue +		if arg[0] == 'link': +			links += fragments["LINK"].replace("@INODE_ARG@",arg[1])	\ +									  .replace("@IATT_ARG@",arg[2]) +			continue +		if arg[0] != 'fop-arg': +			continue +		if (name, arg[1]) == ('writev', 'count'): +			# Special case: just skip this.  We can't mark it as 'nosync' +			# because of the way the translator and dumper generators look for +			# that after 'stub-name' which we don't define.  Instead of adding a +			# bunch of generic infrastructure for this one case, just pound it +			# here. +			continue +		recon_type = typemap[arg[2]] +		# print "/* %s.%s => %s (%s)*/" % (name, arg[1], recon_type, fop_type) +		if (name == "create") and (arg[1] == "fd"): +			# Special case: fd for create is new, not looked up. +			# print "/* change to NEW_FD */" +			recon_type = "NEW_FD" +		elif (recon_type == "LOC") and (fop_type == "entry-op"): +			# Need to treat this differently for inode vs. entry ops. +			# Special case: link source is treated like inode-op. +			if (name != "link") or (arg[1] != "oldloc"): +				# print "/* change to PARENT_LOC */" +				recon_type = "PARENT_LOC" +		code += fragments[recon_type].replace("@ARGNAME@",arg[1])		\ +									 .replace("@ARGTYPE@",arg[2]) +		cleanup_key = recon_type + "_CLEANUP" +		if fragments.has_key(cleanup_key): +			cleanups += fragments[cleanup_key].replace("@ARGNAME@",arg[1]) +		if 'nosync' in arg[4:]: +			code += "\t(void)%s;\n" % arg[1]; +			continue +		if arg[2] in ("loc_t *", "struct iatt *"): +			# These are passed as pointers to the syncop, but they're actual +			# structures in the generated code. +			s_args.append("&"+arg[1]); +		else: +			s_args.append(arg[1]) +	# We have to handle a couple of special cases here, because some n00b +	# defined the syncops with a different argument order than the fops they're +	# based on. +	if name == 'writev': +		# Swap 'flags' and 'iobref'.  Also, we need to add the iov count, which +		# is not stored in or read from the journal.  There are other ways to +		# do that, but this is the only place we need anything similar and we +		# already have to treat it as a special case so this is simplest. +		s_args_str = 'fd, &vector, 1, off, iobref, flags, xdata' +	elif name == 'symlink': +		# Swap 'linkpath' and 'loc'. +		s_args_str = '&loc, linkpath, &iatt, xdata' +	else: +		s_args_str = string.join (s_args, ", ") +	return code, links, s_args_str, cleanups + +# TBD: probably need to generate type-specific cleanup code as well - e.g. +# fd_unref for an fd_t, loc_wipe for a loc_t, and so on.  All of these +# generated CLEANUP fragments will go at the end of the function, with goto +# labels.  Meanwhile, the error-checking part of each type-specific fragment +# (e.g. LOC or FD) will need to update the indirect label that we jump to when +# an error is detected.  This will probably get messy. +def gen_functions (): +	code = "" +	for name, value in ops.iteritems(): +		fop_type = [ x[1] for x in value if x[0] == "journal" ] +		if not fop_type: +			continue +		body, links, syncop_args, cleanups = get_special_subs (name, value, +															   fop_type[0]) +		fop_subs[name]["@FUNCTION_BODY@"] = body +		fop_subs[name]["@LINKS@"] = links +		fop_subs[name]["@SYNCOP_ARGS@"] = syncop_args +		fop_subs[name]["@CLEANUPS@"] = cleanups +		if name == "writev": +			# Take advantage of the fact that, *during reconciliation*, the +			# vector is always a single element.  In normal I/O it's not. +			fop_subs[name]["@SUCCESS_VALUE@"] = "vector.iov_len" +		else: +			fop_subs[name]["@SUCCESS_VALUE@"] = "GFAPI_SUCCESS" +		# Print the FOP fragment with @FUNCTION_BODY@ in the middle. +		code += generate(fragments["FOP"],name,fop_subs) +	return code + +def gen_cases (): +	code = "" +	for name, value in ops.iteritems(): +		if "journal" not in [ x[0] for x in value ]: +			continue +		# Add the CASE fragment for this fop. +		code += generate(fragments["CASE"],name,fop_subs) +	return code + +def load_fragments (path="recon-tmpl.c"): +	pragma_re = re.compile('pragma fragment (.*)') +	cur_symbol = None +	cur_value = "" +	result = {} +	for line in open(path,"r").readlines(): +		m = pragma_re.search(line) +		if m: +			if cur_symbol: +				result[cur_symbol] = cur_value +			cur_symbol = m.group(1) +			cur_value = "" +		else: +			cur_value += line +	if cur_symbol: +		result[cur_symbol] = cur_value +	return result + +if __name__ == "__main__": +	fragments = load_fragments(sys.argv[1]) +	print "/* BEGIN GENERATED CODE - DO NOT MODIFY */" +	print fragments["PROLOG"] +	print gen_functions() +	print fragments["EPILOG"].replace("@SWITCH_BODY@",gen_cases()) +	print "/* END GENERATED CODE */" diff --git a/xlators/experimental/fdl/src/jnl-types.h b/xlators/experimental/fdl/src/jnl-types.h new file mode 100644 index 00000000000..8cb39d01a25 --- /dev/null +++ b/xlators/experimental/fdl/src/jnl-types.h @@ -0,0 +1,14 @@ +#define NEW_REQUEST     (uint8_t)'N' + +typedef struct { +        uint8_t         event_type;     /* e.g. NEW_REQUEST */ +        uint8_t         fop_type;       /* e.g. GF_FOP_SETATTR */ +        uint16_t        request_id; +        uint32_t        ext_length; +} event_header_t; + +enum { +        FDL_IPC_BASE = 0xfeedbee5,       /* ... and they make honey */ +        FDL_IPC_CHANGE_TERM, +        FDL_IPC_GET_TERMS, +}; diff --git a/xlators/experimental/fdl/src/logdump.c b/xlators/experimental/fdl/src/logdump.c new file mode 100644 index 00000000000..7c979c32a04 --- /dev/null +++ b/xlators/experimental/fdl/src/logdump.c @@ -0,0 +1,50 @@ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/mman.h> + +extern int fdl_dump (char **, char **); + +int +main (int argc, char **argv) +{ +        int     meta_fd         = (-1); +        char    *meta_buf       = NULL; +        int     data_fd         = (-1); +        char    *data_buf       = NULL; + +        meta_fd = open (argv[1], O_RDONLY); +        if (meta_fd < 0) { +                perror ("open"); +                return EXIT_FAILURE; +        } + +        /* TBD: get proper length */ +        meta_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0); +        if (meta_buf == MAP_FAILED) { +                perror ("mmap"); +                return EXIT_FAILURE; +        } + +        data_fd = open (argv[2], O_RDONLY); +        if (data_fd < 0) { +                perror ("open"); +                return EXIT_FAILURE; +        } + +        /* TBD: get proper length */ +        data_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0); +        if (data_buf == MAP_FAILED) { +                perror ("mmap"); +                return EXIT_FAILURE; +        } + +        for (;;) { +                if (!fdl_dump(&meta_buf,&data_buf)) { +                        break; +                } +        } + +        return EXIT_SUCCESS; +} diff --git a/xlators/experimental/fdl/src/recon-tmpl.c b/xlators/experimental/fdl/src/recon-tmpl.c new file mode 100644 index 00000000000..523bda39418 --- /dev/null +++ b/xlators/experimental/fdl/src/recon-tmpl.c @@ -0,0 +1,305 @@ +#pragma fragment PROLOG +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "fd.h" +#include "iatt.h" +#include "syncop.h" +#include "xlator.h" +#include "glfs-internal.h" + +#include "jnl-types.h" + +#define GFAPI_SUCCESS 0 + +inode_t * +recon_get_inode (glfs_t *fs, uuid_t gfid) +{ +        inode_t         *inode; +        loc_t           loc     = {NULL,}; +        struct iatt     iatt; +        int             ret; +        inode_t         *newinode; + +        inode = inode_find (fs->active_subvol->itable, gfid); +        if (inode) { +                printf ("=== FOUND %s IN TABLE\n", uuid_utoa(gfid)); +                return inode; +        } + +        loc.inode = inode_new (fs->active_subvol->itable); +        if (!loc.inode) { +                return NULL; +        } +        gf_uuid_copy (loc.inode->gfid, gfid); +        gf_uuid_copy (loc.gfid, gfid); + +        printf ("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid)); + +        ret = syncop_lookup (fs->active_subvol, &loc, &iatt, +                             NULL, NULL, NULL); +        if (ret != GFAPI_SUCCESS) { +                fprintf (stderr, "syncop_lookup failed (%d)\n", ret); +                return NULL; +        } + +        newinode = inode_link (loc.inode, NULL, NULL, &iatt); +        if (newinode) { +                inode_lookup (newinode); +        } + +        return newinode; +} + +#pragma fragment DICT +        dict_t  *@ARGNAME@; + +        @ARGNAME@ = dict_new(); +        if (!@ARGNAME@) { +                goto *err_label; +        } +        err_label = &&cleanup_@ARGNAME@; + +        { +                int     key_len, data_len; +                char    *key_ptr; +                int     garbage; +                for (;;) { +                        key_len = *((int *)new_meta); +                        new_meta += sizeof(int); +                        if (!key_len) { +                                break; +                        } +                        key_ptr = new_meta; +                        new_meta += key_len; +                        data_len = *((int *)new_meta); +                        new_meta += sizeof(int); +                        garbage = dict_set_static_bin (@ARGNAME@, key_ptr, +                                                       new_meta, data_len); +                        /* TBD: check error from dict_set_static_bin */ +                        (void)garbage; +                        new_meta += data_len; +                } +        } + +#pragma fragment DICT_CLEANUP +cleanup_@ARGNAME@: +        dict_unref (@ARGNAME@); + +#pragma fragment DOUBLE +        @ARGTYPE@       @ARGNAME@       = *((@ARGTYPE@ *)new_meta); +        new_meta += sizeof(uint64_t); + +#pragma fragment FD +        inode_t *@ARGNAME@_ino; +        fd_t    *@ARGNAME@; + +        @ARGNAME@_ino = recon_get_inode (fs, *((uuid_t *)new_meta)); +        new_meta += 16; +        if (!@ARGNAME@_ino) { +                goto *err_label; +        } +        err_label = &&cleanup_@ARGNAME@_ino; + +        @ARGNAME@ = fd_anonymous (@ARGNAME@_ino); +        if (!@ARGNAME@) { +                goto *err_label; +        } +        err_label = &&cleanup_@ARGNAME@; + +#pragma fragment FD_CLEANUP +cleanup_@ARGNAME@: +        fd_unref (@ARGNAME@); +cleanup_@ARGNAME@_ino: +        inode_unref (@ARGNAME@_ino); + +#pragma fragment NEW_FD +        /* +         * This pseudo-type is only used for create, and in that case we know +         * we'll be using loc.inode, so it's not worth generalizing to take an +         * extra argument. +         */ +        fd_t    *@ARGNAME@      = fd_anonymous (loc.inode); + +        if (!fd) { +                goto *err_label; +        } +        err_label = &&cleanup_@ARGNAME@; +        new_meta += 16; + +#pragma fragment NEW_FD_CLEANUP +cleanup_@ARGNAME@: +        fd_unref (@ARGNAME@); + +#pragma fragment INTEGER +        @ARGTYPE@       @ARGNAME@       = *((@ARGTYPE@ *)new_meta); + +        new_meta += sizeof(@ARGTYPE@); + +#pragma fragment LOC +        loc_t           @ARGNAME@       = { NULL, }; + +        @ARGNAME@.inode = recon_get_inode (fs, *((uuid_t *)new_meta)); +        if (!@ARGNAME@.inode) { +                goto *err_label; +        } +        err_label = &&cleanup_@ARGNAME@; +        gf_uuid_copy (@ARGNAME@.gfid, @ARGNAME@.inode->gfid); +        new_meta += 16; +        new_meta += 16; /* skip over pargfid */ +        if (*(new_meta++)) { +                @ARGNAME@.name = new_meta; +                new_meta += strlen(new_meta) + 1; +        } + +#pragma fragment LOC_CLEANUP +cleanup_@ARGNAME@: +        loc_wipe (&@ARGNAME@); + +#pragma fragment PARENT_LOC +        loc_t           @ARGNAME@       = { NULL, }; + +        new_meta += 16; /* skip over gfid */ +        @ARGNAME@.parent = recon_get_inode (fs, *((uuid_t *)new_meta)); +        if (!@ARGNAME@.parent) { +                goto *err_label; +        } +        err_label = &&cleanup_@ARGNAME@; +        gf_uuid_copy (@ARGNAME@.pargfid, @ARGNAME@.parent->gfid); +        new_meta += 16; +        if (!*(new_meta++)) { +                goto *err_label; +        } +        @ARGNAME@.name = new_meta; +        new_meta += strlen(new_meta) + 1; + +        @ARGNAME@.inode = inode_new (fs->active_subvol->itable); +        if (!@ARGNAME@.inode) { +                goto *err_label; +        } + +#pragma fragment PARENT_LOC_CLEANUP +cleanup_@ARGNAME@: +        loc_wipe (&@ARGNAME@); + +#pragma fragment STRING +        char    *@ARGNAME@; +        if (*(new_meta++)) { +                @ARGNAME@ = new_meta; +                new_meta += (strlen(new_meta) + 1); +        } +        else { +                goto *err_label; +        } + +#pragma fragment VECTOR +        struct iovec    @ARGNAME@; + +        @ARGNAME@.iov_len = *((size_t *)new_meta); +        new_meta += sizeof(@ARGNAME@.iov_len); +        @ARGNAME@.iov_base = new_data; +        new_data += @ARGNAME@.iov_len; + +#pragma fragment IATT +        struct iatt     @ARGNAME@; +        { +                @ARGNAME@.ia_prot = *((ia_prot_t *)new_meta); +                new_meta += sizeof(ia_prot_t); +                uint32_t *myints = (uint32_t *)new_meta; +                @ARGNAME@.ia_uid = myints[0]; +                @ARGNAME@.ia_gid = myints[1]; +                @ARGNAME@.ia_atime = myints[2]; +                @ARGNAME@.ia_atime_nsec = myints[3]; +                @ARGNAME@.ia_mtime = myints[4]; +                @ARGNAME@.ia_mtime_nsec = myints[5]; +                new_meta += sizeof(*myints) * 6; +        } + +#pragma fragment IOBREF +        struct iobref   *@ARGNAME@; + +        @ARGNAME@ = iobref_new(); +        if (!@ARGNAME@) { +                goto *err_label; +        } +        err_label = &&cleanup_@ARGNAME@; + +#pragma fragment IOBREF_CLEANUP +cleanup_@ARGNAME@: +        iobref_unref (@ARGNAME@); + +#pragma fragment LINK +        /* TBD: check error */ +        inode_t *new_inode = inode_link (@INODE_ARG@, NULL, NULL, @IATT_ARG@); +        if (new_inode) { +                inode_lookup (new_inode); +        } + +#pragma fragment FOP +int +fdl_replay_@NAME@ (glfs_t *fs, char **old_meta, char **old_data) +{ +        char    *new_meta	= *old_meta; +        char	*new_data	= *old_data; +        int     ret; +        int     status          = 0xbad; +        void    *err_label      = &&done; + +@FUNCTION_BODY@ + +        ret = syncop_@NAME@ (fs->active_subvol, @SYNCOP_ARGS@, NULL); +        if (ret != @SUCCESS_VALUE@) { +                fprintf (stderr, "syncop_@NAME@ returned %d", ret); +                goto *err_label; +        } + +@LINKS@ + +        status = 0; + +@CLEANUPS@ + +done: +        *old_meta = new_meta; +        *old_data = new_data; +        return status; +} + +#pragma fragment CASE +        case GF_FOP_@UPNAME@: +                printf ("=== GF_FOP_@UPNAME@\n"); +                if (fdl_replay_@NAME@ (fs, &new_meta, &new_data) != 0) { +                        goto done; +                } +                recognized = 1; +                break; + +#pragma fragment EPILOG +int +recon_execute (glfs_t *fs, char **old_meta, char **old_data) +{ +        char            *new_meta       = *old_meta; +        char            *new_data       = *old_data; +        int             recognized      = 0; +        event_header_t  *eh; + +        eh = (event_header_t *)new_meta; +        new_meta += sizeof (*eh); + +        /* TBD: check event_type instead of assuming NEW_REQUEST */ + +        switch (eh->fop_type) { +@SWITCH_BODY@ + +        default: +                printf ("unknown fop %u\n", eh->fop_type); +        } + +done: +        *old_meta = new_meta; +        *old_data = new_data; +        return recognized; +} diff --git a/xlators/experimental/fdl/src/recon.c b/xlators/experimental/fdl/src/recon.c new file mode 100644 index 00000000000..14168a011e0 --- /dev/null +++ b/xlators/experimental/fdl/src/recon.c @@ -0,0 +1,89 @@ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/mman.h> + +#include "glusterfs.h" +#include "fd.h" +#include "syncop.h" +#include "glfs-internal.h" + +#define GFAPI_SUCCESS 0 + +extern int recon_execute (glfs_t *, char **, char **); + +int +main (int argc, char **argv) +{ +        glfs_t  *fs; +        int     ret; +        int     meta_fd         = (-1); +        char    *meta_buf       = NULL; +        int     data_fd         = (-1); +        char    *data_buf       = NULL; + +        fs = glfs_new ("whocares"); +        if (!fs) { +                fprintf (stderr, "glfs_new failed\n"); +                return EXIT_FAILURE; +        } + +        if (getenv("RECON_DEBUG")) { +                ret = glfs_set_logging (fs, "/dev/stderr", 7); +        } +        else { +                ret = glfs_set_logging (fs, "/dev/null", 0); +        } + +        if (ret != GFAPI_SUCCESS) { +                fprintf (stderr, "glfs_set_logging failed (%d)\n", errno); +                return EXIT_FAILURE; +        } + +        ret = glfs_set_volfile (fs, argv[1]); +        if (ret != GFAPI_SUCCESS) { +                fprintf (stderr, "glfs_set_volfile failed (%d)\n", errno); +                return EXIT_FAILURE; +        } + +        ret = glfs_init (fs); +        if (ret != GFAPI_SUCCESS) { +                fprintf (stderr, "glfs_init failed (%d)\n", errno); +                return EXIT_FAILURE; +        } + +        meta_fd = open (argv[2], O_RDONLY); +        if (meta_fd < 0) { +                perror ("open"); +                return EXIT_FAILURE; +        } + +        /* TBD: get proper length */ +        meta_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0); +        if (meta_buf == MAP_FAILED) { +                perror ("mmap"); +                return EXIT_FAILURE; +        } + +        data_fd = open (argv[3], O_RDONLY); +        if (data_fd < 0) { +                perror ("open"); +                return EXIT_FAILURE; +        } + +        /* TBD: get proper length */ +        data_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0); +        if (data_buf == MAP_FAILED) { +                perror ("mmap"); +                return EXIT_FAILURE; +        } + +        for (;;) { +                if (!recon_execute(fs,&meta_buf,&data_buf)) { +                        break; +                } +        } + +        return EXIT_SUCCESS; +} diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am index 7e5783f4f30..649d9d8e9fa 100644 --- a/xlators/features/Makefile.am +++ b/xlators/features/Makefile.am @@ -1,5 +1,6 @@ -SUBDIRS = locks quota read-only mac-compat quiesce marker index barrier arbiter\ -          protect compress changelog changetimerecorder ganesha gfid-access $(GLUPY_SUBDIR) qemu-block \ -          upcall snapview-client snapview-server trash shard bit-rot #path-converter # filter +SUBDIRS = locks quota read-only mac-compat quiesce marker index barrier \ +	  arbiter protect compress changelog changetimerecorder ganesha \ +	  gfid-access $(GLUPY_SUBDIR) qemu-block upcall snapview-client \ +	  snapview-server trash shard bit-rot  CLEANFILES = diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 2c52cf72a3f..3df4b3556cf 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -1783,6 +1783,30 @@ out:          return ret;  } +/* Add this before (above) io-threads because it's not thread-safe yet. */ +static int +brick_graph_add_fdl (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, +                     dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + +        xlator_t        *xl = NULL; +        int             ret = -1; + +        if (!graph || !volinfo || !set_dict) +                goto out; + +        if (dict_get_str_boolean (set_dict, "features.fdl", 0)) { +                xl = volgen_graph_add (graph, "experimental/fdl", +                                       volinfo->volname); +                if (!xl) +                        goto out; +        } +        ret = 0; + +out: +        return ret; +} +  static int  brick_graph_add_iot (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,                        dict_t *set_dict, glusterd_brickinfo_t *brickinfo) @@ -2359,6 +2383,7 @@ static volgen_brick_xlator_t server_graph_table[] = {          {brick_graph_add_index, "index"},          {brick_graph_add_barrier, NULL},          {brick_graph_add_marker, "marker"}, +        {brick_graph_add_fdl, "fdl"},          {brick_graph_add_iot, "io-threads"},          {brick_graph_add_upcall, "upcall"},          {brick_graph_add_pump, NULL}, diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 1463ef72c71..c0059d83cfe 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -2711,6 +2711,15 @@ struct volopt_map_entry glusterd_volopt_map[] = {            .op_version  = GD_OP_VERSION_4_0_0,            .description = "percent of rep_count-1 bricks that must be up"          }, +        /* Full Data Logging */ +        { +          .key         = "features.fdl", +          .voltype     = "features/fdl", +          .option      = "!fdl", +          .op_version  = GD_OP_VERSION_4_0_0, +          .flags       = OPT_FLAG_XLATOR_OPT, +          .type        = NO_DOC, +        },          { .key         = NULL          }  };  | 
