diff options
author | Jeff Darcy <jdarcy@redhat.com> | 2016-02-08 13:30:49 -0500 |
---|---|---|
committer | Jeff Darcy <jdarcy@redhat.com> | 2016-02-13 05:13:07 -0800 |
commit | c458433041aafb48ae6d6e5fcf3e1e737dc3fda3 (patch) | |
tree | 33a03ca0c1f5faf58419de2c4ff4532752ddfb07 | |
parent | da33097c3d6492e3b468b4347e47c70828fb4320 (diff) |
experimental: add fdl (Full Data Logging) translator
NSR needs logging that is different than our existing changelog in
several ways:
* Full data, not just metadata
* Pre-op, not post-op
* High performance
* Supports the concept of time-bounded "terms"
Others (for example EC) might need the same thing. This patch adds such
a translator. It also adds code to dump the resulting journals, and to replay
them using syncops, plus (very rudimentary) tests for all of the above.
Change-Id: I29680a1b4e0a9e7d5a8497fef302c46434b86636
Signed-off-by: Jeff Darcy <jdarcy@redhat.com>
Reviewed-on: http://review.gluster.org/12450
Smoke: Gluster Build System <jenkins@build.gluster.com>
CentOS-regression: Gluster Build System <jenkins@build.gluster.com>
NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
30 files changed, 2269 insertions, 110 deletions
diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases index 40b6ed21192..7181dd2f6e8 100644 --- a/api/src/gfapi.aliases +++ b/api/src/gfapi.aliases @@ -140,3 +140,4 @@ _priv_glfs_resolve _glfs_resolve$GFAPI_PRIVATE_3.7.0 _priv_glfs_process_upcall_event _glfs_process_upcall_event$GFAPI_PRIVATE_3.7.0 _pub_glfs_h_lookupat _glfs_h_lookupat$GFAPI_3.7.4 +_pub_glfs_ipc_xd _glfs_ipc_xd@GFAPI_4.0.4 diff --git a/api/src/gfapi.map b/api/src/gfapi.map index d42ae2b97af..b35984a088c 100644 --- a/api/src/gfapi.map +++ b/api/src/gfapi.map @@ -167,3 +167,8 @@ GFAPI_3.7.4 { global: glfs_h_lookupat; } GFAPI_PRIVATE_3.7.0; + +GFAPI_4.0.0 { + global: + glfs_ipc_xd; +} GFAPI_3.7.4; diff --git a/api/src/glfs.c b/api/src/glfs.c index b151936a6e8..037b579225f 100644 --- a/api/src/glfs.c +++ b/api/src/glfs.c @@ -1233,7 +1233,7 @@ invalid_fs: GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volfile, 3.6.0); int -pub_glfs_ipc (struct glfs *fs, int opcode) +pub_glfs_ipc_xd (struct glfs *fs, int opcode, dict_t *xd_in, dict_t **xd_out) { xlator_t *subvol = NULL; int ret = -1; @@ -1248,7 +1248,7 @@ pub_glfs_ipc (struct glfs *fs, int opcode) goto out; } - ret = syncop_ipc (subvol, opcode, NULL, NULL); + ret = syncop_ipc (subvol, opcode, xd_in, xd_out); DECODE_SYNCOP_ERR (ret); out: @@ -1259,4 +1259,12 @@ invalid_fs: return ret; } +GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ipc_xd, 4.0.0); + +int +pub_glfs_ipc (struct glfs *fs, int opcode) +{ + return pub_glfs_ipc_xd (fs, opcode, NULL, NULL); +} + GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ipc, 3.7.0); diff --git a/configure.ac b/configure.ac index 5d2fe342b74..29e36648aac 100644 --- a/configure.ac +++ b/configure.ac @@ -117,6 +117,8 @@ AC_CONFIG_FILES([Makefile xlators/features/Makefile xlators/features/arbiter/Makefile xlators/features/arbiter/src/Makefile + xlators/experimental/fdl/Makefile + xlators/experimental/fdl/src/Makefile xlators/features/changelog/Makefile xlators/features/changelog/src/Makefile xlators/features/changelog/lib/Makefile diff --git a/glusterfs.spec.in b/glusterfs.spec.in index 53a65ae4fed..9f04bc37e10 100644 --- a/glusterfs.spec.in +++ b/glusterfs.spec.in @@ -952,6 +952,7 @@ fi %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/barrier.so %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/cdc.so %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changelog.so +%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental/fdl.so %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/gfid-access.so %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/read-only.so %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/shard.so @@ -1217,6 +1218,8 @@ fi /usr/lib/firewalld/services/glusterfs.xml %endif +%{_sbindir}/gf_logdump +%{_sbindir}/gf_recon %changelog * Sat Jan 16 2016 Niels de Vos <ndevos@redhat.com> diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c index bbaca1e7277..c980e7bc640 100644 --- a/glusterfsd/src/glusterfsd.c +++ b/glusterfsd/src/glusterfsd.c @@ -1199,6 +1199,26 @@ parse_opts (int key, char *arg, struct argp_state *state) return 0; } +gf_boolean_t +should_call_fini (glusterfs_ctx_t *ctx, xlator_t *trav) +{ + /* There's nothing to call, so the other checks don't matter. */ + if (!trav->fini) { + return _gf_false; + } + + /* This preserves previous behavior in glusterd. */ + if (ctx->process_mode == GF_GLUSTERD_PROCESS) { + return _gf_true; + } + + /* This is the only one known to be safe in glusterfsd. */ + if (!strcmp(trav->type,"experimental/fdl")) { + return _gf_true; + } + + return _gf_false; +} void cleanup_and_exit (int signum) @@ -1271,20 +1291,17 @@ cleanup_and_exit (int signum) /*call fini for glusterd xlator */ /* TODO : Invoke fini for rest of the xlators */ - if (ctx->process_mode == GF_GLUSTERD_PROCESS) { - - trav = NULL; - if (ctx->active) - trav = ctx->active->top; - while (trav) { - if (trav->fini) { - THIS = trav; - trav->fini (trav); - } - trav = trav->next; + trav = NULL; + if (ctx->active) + trav = ctx->active->top; + while (trav) { + if (should_call_fini(ctx,trav)) { + THIS = trav; + trav->fini (trav); } - + trav = trav->next; } + exit(0); } diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am index 46e2e021134..c6d93c925ac 100644 --- a/libglusterfs/src/Makefile.am +++ b/libglusterfs/src/Makefile.am @@ -83,7 +83,7 @@ y.tab.h: graph.y defaults.c: defaults-tmpl.c generator.py gen-defaults.py $(PYTHON) $(srcdir)/gen-defaults.py $(srcdir)/defaults-tmpl.c > $@ -CLEANFILES = graph.lex.c y.tab.c y.tab.h defaults.c +CLEANFILES = $(nodist_libglusterfs_la_SOURCES) if UNITTEST CLEANFILES += *.gcda *.gcno *_xunit.xml diff --git a/libglusterfs/src/call-stub.h b/libglusterfs/src/call-stub.h index 01621368ee9..82a49c1d7b9 100644 --- a/libglusterfs/src/call-stub.h +++ b/libglusterfs/src/call-stub.h @@ -17,12 +17,15 @@ #include "stack.h" #include "list.h" -typedef struct { +typedef struct _call_stub { struct list_head list; char wind; call_frame_t *frame; glusterfs_fop_t fop; struct mem_pool *stub_mem_pool; /* pointer to stub mempool in ctx_t */ + uint32_t jnl_meta_len; + uint32_t jnl_data_len; + void (*serialize) (struct _call_stub *, char *, char *); union { fop_lookup_t lookup; diff --git a/libglusterfs/src/generator.py b/libglusterfs/src/generator.py index 5e8f6c29cd4..8be68337baa 100644..100755 --- a/libglusterfs/src/generator.py +++ b/libglusterfs/src/generator.py @@ -2,6 +2,65 @@ import string +# ops format: 'fop-arg' name type stub-field [nosync] +# 'cbk-arg' name type +# 'extra' name type arg-str +# 'journal' fop-type +# 'link' inode iatt +# +# 'role' indicates the significance of this line to the code generator (sort of +# our own type). +# +# For fop-arg, we first need to know the name and the type of the arg so that +# we can generate SHORT_ARGS (for function calls) and LONG_ARGS (for +# declarations). For code that uses stubs, we also need to know the name of +# the stub field, which might be different than the argument itself. Lastly, +# for code that uses syncops, we need to know whether whoever wrote the syncop +# for this fop "forgot" to include this argument. (Editorial: this kind of +# creeping inconsistency is why we should have used code generation for stubs +# and syncops as well as defaults all along.) To address this need, we use the +# optional 'nosync' field for arguments (e.g. mkdir.umask) that we should skip +# in generated syncop code. +# +# 'cbk-arg' is like fop-arg but simpler and used for generating callbacks +# instead of fop functions. +# +# 'extra' is also like fop-arg, but it's another hack for syncops. This time +# the problem is that some of what would normally be *callback* arguments are +# instead created in the caller and passed to the syncop. We handle that by +# adding an entry at the appropriate place in the fop-arg list, with the name +# and type to generate a declaration and an argument string to generate the +# actual syncop call. +# +# The mere presence of a 'journal' item is sufficient for most of the journal +# code to recognize that it should do something. However, reconciliation also +# needs to decide how reconciliation builds the arguments it needs to call down +# to the syncop layer, based on what's in the journal. To do that, we divide +# ops into three types and store those types in the ops table. In general, +# these three types work as follows. +# +# For an fd-op, the GFID in the journal is used (in loc.gfid) field to +# look up an inode, then an anonymous fd is found/created for that inode. +# +# For an inode-op, the GFID in the journal is used the same way, but no fd +# is needed. +# +# For an entry-op, the *parent* GFID and name from the journal are used to +# look up an inode (via loc.pargfid and par.name respectively). +# +# The only places this seems to fall down is for link and create. In link, +# which is generally an entry-op, the source is looked up as though it's an +# inode-op. In create, we have an fd argument but it's really a return +# argument so we get a fresh inode instead of looking one up. Those two cases +# need to be handled as special cases in the reconciliation code. +# +# 'link' is (hopefully) the last of the journal/syncop hacks. Much like +# 'extra', some values that are returned as callback arguments in the normal +# case are handled differently for syncops. For syncops that create objects +# (e.g. mkdir) we need to link those objects into our inode table. The 'inode' +# and 'iatt' fields here give us the information we need to construct the +# proper inode_link call(s). + ops = {} ops['fgetxattr'] = ( @@ -13,19 +72,21 @@ ops['fgetxattr'] = ( ) ops['fsetxattr'] = ( - ('fop-arg', 'fd', 'fd_t *'), - ('fop-arg', 'dict', 'dict_t *'), - ('fop-arg', 'flags', 'int32_t'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'fd', 'fd_t *', 'fd'), + ('fop-arg', 'dict', 'dict_t *', 'xattr'), + ('fop-arg', 'flags', 'int32_t', 'flags'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'fd-op'), ) ops['setxattr'] = ( - ('fop-arg', 'loc', 'loc_t *'), - ('fop-arg', 'dict', 'dict_t *'), - ('fop-arg', 'flags', 'int32_t'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'loc', 'loc_t *', 'loc'), + ('fop-arg', 'dict', 'dict_t *', 'xattr'), + ('fop-arg', 'flags', 'int32_t', 'flags'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'inode-op'), ) ops['statfs'] = ( @@ -73,16 +134,17 @@ ops['flush'] = ( ) ops['writev'] = ( - ('fop-arg', 'fd', 'fd_t *'), - ('fop-arg', 'vector', 'struct iovec *'), + ('fop-arg', 'fd', 'fd_t *', 'fd'), + ('fop-arg', 'vector', 'struct iovec *', 'vector'), ('fop-arg', 'count', 'int32_t'), - ('fop-arg', 'off', 'off_t'), - ('fop-arg', 'flags', 'uint32_t'), + ('fop-arg', 'off', 'off_t', 'offset'), + ('fop-arg', 'flags', 'uint32_t', 'flags'), ('fop-arg', 'iobref', 'struct iobref *'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'prebuf', 'struct iatt *'), ('cbk-arg', 'postbuf', 'struct iatt *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'fd-op'), ) ops['readv'] = ( @@ -108,96 +170,111 @@ ops['open'] = ( ) ops['create'] = ( - ('fop-arg', 'loc', 'loc_t *'), - ('fop-arg', 'flags', 'int32_t'), - ('fop-arg', 'mode', 'mode_t'), - ('fop-arg', 'umask', 'mode_t'), - ('fop-arg', 'fd', 'fd_t *'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'loc', 'loc_t *', 'loc'), + ('fop-arg', 'flags', 'int32_t', 'flags'), + ('fop-arg', 'mode', 'mode_t', 'mode'), + ('fop-arg', 'umask', 'mode_t', 'umask', 'nosync'), + ('fop-arg', 'fd', 'fd_t *', 'fd'), + ('extra', 'iatt', 'struct iatt', '&iatt'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'fd', 'fd_t *'), ('cbk-arg', 'inode', 'inode_t *'), ('cbk-arg', 'buf', 'struct iatt *'), ('cbk-arg', 'preparent', 'struct iatt *'), ('cbk-arg', 'postparent', 'struct iatt *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'entry-op'), + ('link', 'loc.inode', '&iatt'), ) ops['link'] = ( - ('fop-arg', 'oldloc', 'loc_t *'), - ('fop-arg', 'newloc', 'loc_t *'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'oldloc', 'loc_t *', 'loc'), + ('fop-arg', 'newloc', 'loc_t *', 'loc2'), + ('extra', 'iatt', 'struct iatt', '&iatt'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'inode', 'inode_t *'), ('cbk-arg', 'buf', 'struct iatt *'), ('cbk-arg', 'preparent', 'struct iatt *'), ('cbk-arg', 'postparent', 'struct iatt *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'entry-op'), ) ops['rename'] = ( - ('fop-arg', 'oldloc', 'loc_t *'), - ('fop-arg', 'newloc', 'loc_t *'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'oldloc', 'loc_t *', 'loc'), + ('fop-arg', 'newloc', 'loc_t *', 'loc2'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'buf', 'struct iatt *'), ('cbk-arg', 'preoldparent', 'struct iatt *'), ('cbk-arg', 'postoldparent','struct iatt *'), ('cbk-arg', 'prenewparent', 'struct iatt *'), ('cbk-arg', 'postnewparent','struct iatt *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'entry-op'), ) ops['symlink'] = ( - ('fop-arg', 'linkpath', 'const char *'), - ('fop-arg', 'loc', 'loc_t *'), - ('fop-arg', 'umask', 'mode_t'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'linkpath', 'const char *', 'linkname'), + ('fop-arg', 'loc', 'loc_t *', 'loc'), + ('fop-arg', 'umask', 'mode_t', 'mode', 'nosync'), + ('extra', 'iatt', 'struct iatt', '&iatt'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'inode', 'inode_t *'), ('cbk-arg', 'buf', 'struct iatt *'), ('cbk-arg', 'preparent', 'struct iatt *'), ('cbk-arg', 'postparent', 'struct iatt *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'entry-op'), ) ops['rmdir'] = ( - ('fop-arg', 'loc', 'loc_t *'), - ('fop-arg', 'flags', 'int32_t'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'loc', 'loc_t *', 'loc'), + ('fop-arg', 'flags', 'int32_t', 'flags'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'preparent', 'struct iatt *'), ('cbk-arg', 'postparent', 'struct iatt *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'entry-op'), ) ops['unlink'] = ( - ('fop-arg', 'loc', 'loc_t *'), - ('fop-arg', 'flags', 'int32_t'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'loc', 'loc_t *', 'loc'), + ('fop-arg', 'flags', 'int32_t', 'flags', 'nosync'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'preparent', 'struct iatt *'), ('cbk-arg', 'postparent', 'struct iatt *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'entry-op'), ) ops['mkdir'] = ( - ('fop-arg', 'loc', 'loc_t *'), - ('fop-arg', 'mode', 'mode_t'), - ('fop-arg', 'umask', 'mode_t'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'loc', 'loc_t *', 'loc'), + ('fop-arg', 'mode', 'mode_t', 'mode'), + ('fop-arg', 'umask', 'mode_t', 'umask', 'nosync'), + ('extra', 'iatt', 'struct iatt', '&iatt'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'inode', 'inode_t *'), ('cbk-arg', 'buf', 'struct iatt *'), ('cbk-arg', 'preparent', 'struct iatt *'), ('cbk-arg', 'postparent', 'struct iatt *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'entry-op'), + ('link', 'loc.inode', '&iatt'), ) ops['mknod'] = ( - ('fop-arg', 'loc', 'loc_t *'), - ('fop-arg', 'mode', 'mode_t'), - ('fop-arg', 'rdev', 'dev_t'), - ('fop-arg', 'umask', 'mode_t'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'loc', 'loc_t *', 'loc'), + ('fop-arg', 'mode', 'mode_t', 'mode'), + ('fop-arg', 'rdev', 'dev_t', 'rdev'), + ('fop-arg', 'umask', 'mode_t', 'umask', 'nosync'), + ('extra', 'iatt', 'struct iatt', '&iatt'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'inode', 'inode_t *'), ('cbk-arg', 'buf', 'struct iatt *'), ('cbk-arg', 'preparent', 'struct iatt *'), ('cbk-arg', 'postparent', 'struct iatt *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'entry-op'), ) ops['readlink'] = ( @@ -217,12 +294,13 @@ ops['access'] = ( ) ops['ftruncate'] = ( - ('fop-arg', 'fd', 'fd_t *'), - ('fop-arg', 'offset', 'off_t'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'fd', 'fd_t *', 'fd'), + ('fop-arg', 'offset', 'off_t', 'offset'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'prebuf', 'struct iatt *'), ('cbk-arg', 'postbuf', 'struct iatt *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'fd-op'), ) ops['getxattr'] = ( @@ -234,35 +312,39 @@ ops['getxattr'] = ( ) ops['xattrop'] = ( - ('fop-arg', 'loc', 'loc_t *'), - ('fop-arg', 'flags', 'gf_xattrop_flags_t'), - ('fop-arg', 'dict', 'dict_t *'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'loc', 'loc_t *', 'loc'), + ('fop-arg', 'flags', 'gf_xattrop_flags_t', 'optype'), + ('fop-arg', 'dict', 'dict_t *', 'xattr'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'dict', 'dict_t *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'inode-op'), ) ops['fxattrop'] = ( - ('fop-arg', 'fd', 'fd_t *'), - ('fop-arg', 'flags', 'gf_xattrop_flags_t'), - ('fop-arg', 'dict', 'dict_t *'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'fd', 'fd_t *', 'fd'), + ('fop-arg', 'flags', 'gf_xattrop_flags_t', 'optype'), + ('fop-arg', 'dict', 'dict_t *', 'xattr'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'dict', 'dict_t *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'fd-op'), ) ops['removexattr'] = ( - ('fop-arg', 'loc', 'loc_t *'), - ('fop-arg', 'name', 'const char *'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'loc', 'loc_t *', 'loc'), + ('fop-arg', 'name', 'const char *', 'name'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'inode-op'), ) ops['fremovexattr'] = ( - ('fop-arg', 'fd', 'fd_t *'), - ('fop-arg', 'name', 'const char *'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'fd', 'fd_t *', 'fd'), + ('fop-arg', 'name', 'const char *', 'name'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'fd-op'), ) ops['lk'] = ( @@ -341,22 +423,26 @@ ops['readdirp'] = ( ) ops['setattr'] = ( - ('fop-arg', 'loc', 'loc_t *'), - ('fop-arg', 'stbuf', 'struct iatt *'), - ('fop-arg', 'valid', 'int32_t'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'loc', 'loc_t *', 'loc'), + ('fop-arg', 'stbuf', 'struct iatt *', 'stat'), + ('fop-arg', 'valid', 'int32_t', 'valid'), + ('extra', 'preop', 'struct iatt', '&preop'), + ('extra', 'postop', 'struct iatt', '&postop'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'statpre', 'struct iatt *'), ('cbk-arg', 'statpost', 'struct iatt *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'inode-op'), ) ops['truncate'] = ( - ('fop-arg', 'loc', 'loc_t *'), - ('fop-arg', 'offset', 'off_t'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'loc', 'loc_t *', 'loc'), + ('fop-arg', 'offset', 'off_t', 'offset'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'prebuf', 'struct iatt *'), ('cbk-arg', 'postbuf', 'struct iatt *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'inode-op'), ) ops['stat'] = ( @@ -378,45 +464,51 @@ ops['lookup'] = ( ) ops['fsetattr'] = ( - ('fop-arg', 'fd', 'fd_t *'), - ('fop-arg', 'stbuf', 'struct iatt *'), - ('fop-arg', 'valid', 'int32_t'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'fd', 'fd_t *', 'fd'), + ('fop-arg', 'stbuf', 'struct iatt *', 'stat'), + ('fop-arg', 'valid', 'int32_t', 'valid'), + ('extra', 'preop', 'struct iatt', '&preop'), + ('extra', 'postop', 'struct iatt', '&postop'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'statpre', 'struct iatt *'), ('cbk-arg', 'statpost', 'struct iatt *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'fd-op'), ) ops['fallocate'] = ( - ('fop-arg', 'fd', 'fd_t *'), - ('fop-arg', 'keep_size', 'int32_t'), - ('fop-arg', 'offset', 'off_t'), - ('fop-arg', 'len', 'size_t'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'fd', 'fd_t *', 'fd'), + ('fop-arg', 'keep_size', 'int32_t', 'mode'), + ('fop-arg', 'offset', 'off_t', 'offset'), + ('fop-arg', 'len', 'size_t', 'size'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'pre', 'struct iatt *'), ('cbk-arg', 'post', 'struct iatt *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'fd-op'), ) ops['discard'] = ( - ('fop-arg', 'fd', 'fd_t *'), - ('fop-arg', 'offset', 'off_t'), - ('fop-arg', 'len', 'size_t'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'fd', 'fd_t *', 'fd'), + ('fop-arg', 'offset', 'off_t', 'offset'), + ('fop-arg', 'len', 'size_t', 'size'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'pre', 'struct iatt *'), ('cbk-arg', 'post', 'struct iatt *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'fd-op'), ) ops['zerofill'] = ( - ('fop-arg', 'fd', 'fd_t *'), - ('fop-arg', 'offset', 'off_t'), + ('fop-arg', 'fd', 'fd_t *', 'fd'), + ('fop-arg', 'offset', 'off_t', 'offset'), # As e.g. fallocate/discard (above) "len" should really be a size_t. - ('fop-arg', 'len', 'off_t'), - ('fop-arg', 'xdata', 'dict_t *'), + ('fop-arg', 'len', 'off_t', 'size'), + ('fop-arg', 'xdata', 'dict_t *', 'xdata'), ('cbk-arg', 'pre', 'struct iatt *'), ('cbk-arg', 'post', 'struct iatt *'), ('cbk-arg', 'xdata', 'dict_t *'), + ('journal', 'fd-op'), ) ops['ipc'] = ( @@ -460,6 +552,11 @@ def get_subs (names, types): def generate (tmpl, name, subs): text = tmpl.replace("@NAME@",name) + if name == "writev": + # More spurious inconsistency. + text = text.replace("@UPNAME@","WRITE") + else: + text = text.replace("@UPNAME@",name.upper()) for old, new in subs[name].iteritems(): text = text.replace(old,new) # TBD: reindent/reformat the result for maximum readability. diff --git a/libglusterfs/src/iobuf.c b/libglusterfs/src/iobuf.c index a4d36691cd0..d1eb0acaf5e 100644 --- a/libglusterfs/src/iobuf.c +++ b/libglusterfs/src/iobuf.c @@ -1014,7 +1014,7 @@ int iobref_merge (struct iobref *to, struct iobref *from) { int i = 0; - int ret = -1; + int ret = 0; struct iobuf *iobuf = NULL; GF_VALIDATE_OR_GOTO ("iobuf", to, out); diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c index eb0c1cf983a..d412b4d656d 100644 --- a/libglusterfs/src/syscall.c +++ b/libglusterfs/src/syscall.c @@ -588,7 +588,7 @@ sys_fallocate(int fd, int mode, off_t offset, off_t len) return posix_fallocate(fd, offset, len); #endif -#if defined(F_ALLOCATECONFIG) && defined(GF_DARWIN_HOST_OS) +#if defined(F_ALLOCATECONTIG) && defined(GF_DARWIN_HOST_OS) /* C conversion from C++ implementation for OSX by Mozilla Foundation */ if (mode) { /* keep size not supported */ diff --git a/tests/features/fdl-overflow.t b/tests/features/fdl-overflow.t new file mode 100644 index 00000000000..d7633a7ca7d --- /dev/null +++ b/tests/features/fdl-overflow.t @@ -0,0 +1,74 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +log_base=$($CLI --print-logdir) +log_id=${B0}/${V0}-0 +log_id=${log_id:1} # Remove initial slash +log_id=${log_id//\//-} # Replace remaining slashes with dashes + +_check_sizes () { + local n=0 + local sz + local total_sz=0 + + # We don't care about the sizes of the meta files. That would be + # embedding too much of the implementation into the test. + n=$(ls ${log_base}/${log_id}-meta-*.jnl | wc -l) + [ $n = 2 ] || return 1 + + # We *do* care about the sizes of the data files, which should exactly + # reflect the amount of data written via dd. + n=0 + while read sz name; do + G_LOG "found journal ${name} size ${sz}MB" + n=$((n+1)) + total_sz=$((total_sz+sz)) + done < <(du -sm ${log_base}/${log_id}-data-*.jnl) + [ $n = 2 ] || return 1 + # On our CentOS and NetBSD regression-test systems, but not on my Fedora + # development system, each file ends up being slightly larger than its + # data size because of metadata, and 'du' rounds that up to a full extra + # megabyte. We'll allow either result, because what we're really + # looking for is a complete failure to roll over from one file to + # another at the appropriate size. + [ $total_sz = 20 -o $total_sz = $((n+20)) ] || return 1 + + return 0 +} + +check_sizes () { + set -x + _check_sizes + ret=$? + set +x + return ret +} + +if [ x"$OSTYPE" = x"NetBSD" ]; then + CREAT_OFLAG="creat," +else + CREAT_OFLAG="" +fi + +TEST rm -f ${log_base}/${log_id}-*.log +TEST glusterd +TEST pidof glusterd + +# Get a simple volume set up and mounted with FDL active. +TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0 +TEST $CLI volume set $V0 changelog.changelog off +TEST $CLI volume set $V0 features.fdl on +TEST $CLI volume start $V0 +TEST $GFS -s $H0 --volfile-id $V0 $M0 + +# Generate some I/O and unmount/stop so we can see log sizes. +TEST dd if=/dev/zero of=$M0/twentyMB bs=1048576 count=20 \ + oflag=${CREAT_OFLAG}sync +TEST umount $M0 +TEST $CLI volume stop $V0 + +TEST _check_sizes + +cleanup diff --git a/tests/features/fdl.t b/tests/features/fdl.t new file mode 100644 index 00000000000..34d6d78228a --- /dev/null +++ b/tests/features/fdl.t @@ -0,0 +1,52 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +log_base=$($CLI --print-logdir) +log_id=${B0}/${V0}-0 +log_id=${log_id:1} # Remove initial slash +log_id=${log_id//\//-} # Replace remaining slashes with dashes +FDL_META_FILE=${log_base}/${log_id}-meta-1.jnl +FDL_DATA_FILE=${log_base}/${log_id}-data-1.jnl + +check_logfile() { + [ $(gf_logdump $FDL_META_FILE $FDL_DATA_FILE | grep $1 | wc -l) -ge $2 ] +} + +if [ x"$OSTYPE" = x"NetBSD" ]; then + CREAT_OFLAG="creat," +else + CREAT_OFLAG="" +fi + +TEST rm -f $FDL_META_FILE $FDL_DATA_FILE +TEST glusterd +TEST pidof glusterd + +# Get a simple volume set up and mounted with FDL active. +TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0 +TEST $CLI volume set $V0 changelog.changelog off +TEST $CLI volume set $V0 features.fdl on +TEST $CLI volume start $V0 +TEST $GFS -s $H0 --volfile-id $V0 $M0 + +# Generate some I/O and unmount. +TEST mkdir -p $M0/abc/def +TEST dd if=/dev/zero of=$M0/abc/def/ghi bs=128 count=2 \ + oflag=${CREAT_OFLAG}sync +TEST chmod 314 $M0/abc/def/ghi +TEST rm -rf $M0/abc +TEST umount $M0 + +# Check that gf_logdump works, and shows the ops we just issued. There will be +# more SETATTR ops than the one corresponding to our chmod, because some are +# issued internally. We have to guess a bit about where the log will be. +TEST check_logfile GF_FOP_MKDIR 2 +TEST check_logfile GF_FOP_CREATE 1 +TEST check_logfile GF_FOP_WRITE 2 +TEST check_logfile GF_FOP_SETATTR 1 +TEST check_logfile GF_FOP_UNLINK 1 +TEST check_logfile GF_FOP_RMDIR 2 + +cleanup diff --git a/tests/features/recon.t b/tests/features/recon.t new file mode 100644 index 00000000000..7dda2a680e8 --- /dev/null +++ b/tests/features/recon.t @@ -0,0 +1,62 @@ +#!/bin/bash + +. $(dirname $0)/../include.rc +. $(dirname $0)/../volume.rc + +log_base=$($CLI --print-logdir) +log_id=${B0}/${V0}-0 +log_id=${log_id:1} # Remove initial slash +log_id=${log_id//\//-} # Replace remaining slashes with dashes +FDL_META_FILE=${log_base}/${log_id}-meta-1.jnl +FDL_DATA_FILE=${log_base}/${log_id}-data-1.jnl + +tmpdir=$(mktemp -d -t ${0##*/}.XXXXXX) +trap "rm -rf $tmpdir" EXIT + +write_file () { + echo "peekaboo" > $1 +} + +TEST rm -f $FDL_META_FILE $FDL_DATA_FILE +TEST glusterd +TEST pidof glusterd + +# Get a simple volume set up and mounted with FDL active. +TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0 +TEST $CLI volume set $V0 features.fdl on +TEST $CLI volume start $V0 +TEST $GFS -s $H0 --volfile-id $V0 $M0 + +# Generate some I/O and then copy off the journal files for later. +TEST mkdir -p $M0/abc/def +TEST write_file $M0/abc/def/ghi +#EST chmod 314 $M0/abc/def/ghi +cp ${FDL_META_FILE} ${FDL_DATA_FILE} ${tmpdir} + +# Get back to an empty state and unmount. +TEST rm -rf $M0/abc +TEST umount $M0 + +# Make sure we really are in an empty state. Otherwise the tests below could +# pass just because we never cleaned up in the first place. +TEST [ ! -d ${B0}/${V0}-0/abc ] + +# Create a stub volfile. +vol_file=${GLUSTERD_WORKDIR}/vols/${V0}/${V0}.${H0}.${log_id}.vol +vol_id_line=$(grep volume-id ${vol_file}) +cat > ${tmpdir}/recon.vol << EOF +volume recon-posix + type storage/posix + option directory ${B0}/${V0}-0 +${vol_id_line} +end-volume +EOF + +TEST gf_recon ${tmpdir}/recon.vol ${tmpdir}/$(basename ${FDL_META_FILE}) \ + ${tmpdir}/$(basename ${FDL_DATA_FILE}) + +TEST [ -d ${B0}/${V0}-0/abc/def ] +EXPECT "peekaboo" cat ${B0}/${V0}-0/abc/def/ghi +# TBD: test permissions, xattrs + +cleanup diff --git a/tests/include.rc b/tests/include.rc index 139bc03ac8c..21a69465797 100644 --- a/tests/include.rc +++ b/tests/include.rc @@ -136,7 +136,7 @@ function G_LOG() return fi local g_log_string; - g_log_string="++++++++++ G_LOG:$0: TEST: $1 $@ ++++++++++" + g_log_string="++++++++++ G_LOG:$0: TEST: $@ ++++++++++" g_log_string="`date -u +["%F %T.%6N"]`:$g_log_string" local g_log_filename for g_log_filename in `find $g_log_logdir/ -type f -name \*.log`; @@ -541,10 +541,10 @@ function cleanup() fi >&2 # tar logs at the start and end of every test - if [ -n $LOGDIR ] + if [ -n "$LOGDIR" -a -z "$STOP_WASTING_SPACE" ] then tarname=$(basename $0 .t) - tar -rvf ${LOGDIR}/${tarname}.tar ${LOGDIR}/* \ + tar -rf ${LOGDIR}/${tarname}.tar ${LOGDIR}/* \ --exclude="*.tar" \ && \ find $LOGDIR/* -maxdepth 0 -name '*.tar' -prune \ diff --git a/xlators/experimental/Makefile.am b/xlators/experimental/Makefile.am index 06f04a193c8..a31512203f6 100644 --- a/xlators/experimental/Makefile.am +++ b/xlators/experimental/Makefile.am @@ -1,3 +1,3 @@ -SUBDIRS = nsr-client nsr-server +SUBDIRS = nsr-client nsr-server fdl CLEANFILES = diff --git a/xlators/experimental/fdl/Makefile.am b/xlators/experimental/fdl/Makefile.am new file mode 100644 index 00000000000..a985f42a877 --- /dev/null +++ b/xlators/experimental/fdl/Makefile.am @@ -0,0 +1,3 @@ +SUBDIRS = src + +CLEANFILES = diff --git a/xlators/experimental/fdl/src/Makefile.am b/xlators/experimental/fdl/src/Makefile.am new file mode 100644 index 00000000000..a05fc797b0a --- /dev/null +++ b/xlators/experimental/fdl/src/Makefile.am @@ -0,0 +1,42 @@ +xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental +xlator_LTLIBRARIES = fdl.la + +noinst_HEADERS = jnl-types.h + +nodist_fdl_la_SOURCES = fdl.c +fdl_la_LDFLAGS = -module -avoid-version +fdl_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la + +sbin_PROGRAMS = gf_logdump gf_recon +gf_logdump_SOURCES = logdump.c +nodist_gf_logdump_SOURCES = libfdl.c +gf_logdump_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\ + $(top_builddir)/api/src/libgfapi.la + +# Eventually recon(ciliation) code will move elsewhere, but for now it's +# easier to have it next to the similar logdump code. +gf_recon_SOURCES = recon.c +nodist_gf_recon_SOURCES = librecon.c +gf_recon_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\ + $(top_builddir)/api/src/libgfapi.la + +AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \ + -I$(top_srcdir)/api/src -fPIC \ + -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \ + -DDATADIR=\"$(localstatedir)\" + +AM_CFLAGS = -Wall $(GF_CFLAGS) + +noinst_PYTHON = gen_fdl.py gen_dumper.py gen_recon.py +EXTRA_DIST = fdl-tmpl.c dump-tmpl.c recon-tmpl.c + +CLEANFILES = $(nodist_fdl_la_SOURCES) $(nodist_gf_logdump_SOURCES) + +fdl.c: fdl-tmpl.c gen_fdl.py + $(PYTHON) $(srcdir)/gen_fdl.py $(srcdir)/fdl-tmpl.c > $@ + +libfdl.c: dump-tmpl.c gen_dumper.py + $(PYTHON) $(srcdir)/gen_dumper.py $(srcdir)/dump-tmpl.c > $@ + +librecon.c: recon-tmpl.c gen_recon.py + $(PYTHON) $(srcdir)/gen_recon.py $(srcdir)/recon-tmpl.c > $@ diff --git a/xlators/experimental/fdl/src/dump-tmpl.c b/xlators/experimental/fdl/src/dump-tmpl.c new file mode 100644 index 00000000000..cac1071a9c1 --- /dev/null +++ b/xlators/experimental/fdl/src/dump-tmpl.c @@ -0,0 +1,156 @@ +#pragma fragment PROLOG +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glfs.h" +#include "iatt.h" +#include "xlator.h" +#include "jnl-types.h" + +#pragma fragment DICT + { + int key_len, data_len; + char *key_ptr; + printf ("@ARGNAME@ = dict {\n"); + for (;;) { + key_len = *((int *)new_meta); + new_meta += sizeof(int); + if (!key_len) { + break; + } + key_ptr = new_meta; + new_meta += key_len; + data_len = *((int *)new_meta); + new_meta += sizeof(int) + data_len; + printf (" %s = <%d bytes>\n", key_ptr, data_len); + } + printf ("}\n"); + } + +#pragma fragment DOUBLE + printf ("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta), + *((uint64_t *)new_meta)); + new_meta += sizeof(uint64_t); + +#pragma fragment GFID + printf ("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta))); + new_meta += 16; + +#pragma fragment INTEGER + printf ("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta), + *((uint32_t *)new_meta)); + new_meta += sizeof(uint32_t); + +#pragma fragment LOC + printf ("@ARGNAME@ = loc {\n"); + printf (" gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); + new_meta += 16; + printf (" pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta))); + new_meta += 16; + if (*(new_meta++)) { + printf (" name = %s\n", new_meta); + new_meta += (strlen(new_meta) + 1); + } + printf ("}\n"); + +#pragma fragment STRING + if (*(new_meta++)) { + printf ("@ARGNAME@ = %s\n", new_meta); + new_meta += (strlen(new_meta) + 1); + } + +#pragma fragment VECTOR + { + size_t len = *((size_t *)new_meta); + new_meta += sizeof(len); + printf ("@ARGNAME@ = <%zu bytes>\n", len); + new_data += len; + } + +#pragma fragment IATT + { + ia_prot_t *myprot = ((ia_prot_t *)new_meta); + printf ("@ARGNAME@ = iatt {\n"); + printf (" ia_prot = %c%c%c", + myprot->suid ? 'S' : '-', + myprot->sgid ? 'S' : '-', + myprot->sticky ? 'T' : '-'); + printf ("%c%c%c", + myprot->owner.read ? 'r' : '-', + myprot->owner.write ? 'w' : '-', + myprot->owner.exec ? 'x' : '-'); + printf ("%c%c%c", + myprot->group.read ? 'r' : '-', + myprot->group.write ? 'w' : '-', + myprot->group.exec ? 'x' : '-'); + printf ("%c%c%c\n", + myprot->other.read ? 'r' : '-', + myprot->other.write ? 'w' : '-', + myprot->other.exec ? 'x' : '-'); + new_meta += sizeof(ia_prot_t); + uint32_t *myints = (uint32_t *)new_meta; + printf (" ia_uid = %u\n", myints[0]); + printf (" ia_gid = %u\n", myints[1]); + printf (" ia_atime = %u.%09u\n", myints[2], myints[3]); + printf (" ia_mtime = %u.%09u\n", myints[4], myints[5]); + new_meta += sizeof(*myints) * 6; + } + +#pragma fragment FOP +void +fdl_dump_@NAME@ (char **old_meta, char **old_data) +{ + char *new_meta = *old_meta; + char *new_data = *old_data; + + /* TBD: word size/endianness */ +@FUNCTION_BODY@ + + *old_meta = new_meta; + *old_data = new_data; +} + +#pragma fragment CASE + case GF_FOP_@UPNAME@: + printf ("=== GF_FOP_@UPNAME@\n"); + fdl_dump_@NAME@ (&new_meta, &new_data); + break; + +#pragma fragment EPILOG +int +fdl_dump (char **old_meta, char **old_data) +{ + char *new_meta = *old_meta; + char *new_data = *old_data; + static glfs_t *fs = NULL; + int recognized = 1; + event_header_t *eh; + + /* + * We don't really call anything else in GFAPI, but this is the most + * convenient way to satisfy all of the spurious dependencies on how it + * or glusterfsd initialize (e.g. setting up THIS). + */ + if (!fs) { + fs = glfs_new ("dummy"); + } + + eh = (event_header_t *)new_meta; + new_meta += sizeof (*eh); + + /* TBD: check event_type instead of assuming NEW_REQUEST */ + + switch (eh->fop_type) { +@SWITCH_BODY@ + + default: + printf ("unknown fop %u\n", eh->fop_type); + recognized = 0; + } + + *old_meta = new_meta; + *old_data = new_data; + return recognized; +} diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c b/xlators/experimental/fdl/src/fdl-tmpl.c new file mode 100644 index 00000000000..8fcc6a8d6ff --- /dev/null +++ b/xlators/experimental/fdl/src/fdl-tmpl.c @@ -0,0 +1,506 @@ +/* + Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com> + This file is part of GlusterFS. + + This file is licensed to you under your choice of the GNU Lesser + General Public License, version 3 or any later version (LGPLv3 or + later), or the GNU General Public License, version 2 (GPLv2), in all + cases as published by the Free Software Foundation. +*/ + +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include <fcntl.h> +#include <unistd.h> +#include <sys/mman.h> +#include "call-stub.h" +#include "iatt.h" +#include "defaults.h" +#include "syscall.h" +#include "xlator.h" +#include "jnl-types.h" + +/* TBD: make tunable */ +#define META_FILE_SIZE (1 << 20) +#define DATA_FILE_SIZE (1 << 24) + +enum gf_fdl { + gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1, + gf_fdl_mt_end +}; + +typedef struct { + char *type; + off_t size; + char *path; + int fd; + void * ptr; + off_t max_offset; +} log_obj_t; + +typedef struct { + struct list_head reqs; + pthread_mutex_t req_lock; + pthread_cond_t req_cond; + char *log_dir; + pthread_t worker; + gf_boolean_t should_stop; + gf_boolean_t change_term; + log_obj_t meta_log; + log_obj_t data_log; + int term; + int first_term; +} fdl_private_t; + +void +fdl_enqueue (xlator_t *this, call_stub_t *stub) +{ + fdl_private_t *priv = this->private; + + pthread_mutex_lock (&priv->req_lock); + list_add_tail (&stub->list, &priv->reqs); + pthread_mutex_unlock (&priv->req_lock); + + pthread_cond_signal (&priv->req_cond); +} + +#pragma generate + +char * +fdl_open_term_log (xlator_t *this, log_obj_t *obj, int term) +{ + fdl_private_t *priv = this->private; + int ret; + char * ptr = NULL; + + /* + * Use .jnl instead of .log so that we don't get test info (mistakenly) + * appended to our journal files. + */ + if (this->ctx->cmd_args.log_ident) { + ret = gf_asprintf (&obj->path, "%s/%s-%s-%d.jnl", + priv->log_dir, this->ctx->cmd_args.log_ident, + obj->type, term); + } + else { + ret = gf_asprintf (&obj->path, "%s/fubar-%s-%d.jnl", + priv->log_dir, obj->type, term); + } + if ((ret <= 0) || !obj->path) { + gf_log (this->name, GF_LOG_ERROR, + "failed to construct log-file path"); + goto err; + } + + gf_log (this->name, GF_LOG_INFO, "opening %s (size %ld)", + obj->path, obj->size); + + obj->fd = open (obj->path, O_RDWR|O_CREAT|O_TRUNC, 0666); + if (obj->fd < 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to open log file (%s)", strerror(errno)); + goto err; + } + +#if !defined(GF_BSD_HOST_OS) + /* + * NetBSD can just go die in a fire. Even though it claims to support + * fallocate/posix_fallocate they don't actually *do* anything so the + * file size remains zero. Then mmap succeeds anyway, but any access + * to the mmap'ed region will segfault. It would be acceptable for + * fallocate to do what it says, for mmap to fail, or for access to + * extend the file. NetBSD managed to hit the trifecta of Getting + * Everything Wrong, and debugging in that environment to get this far + * has already been painful enough (systems I worked on in 1990 were + * better that way). We'll fall through to the lseek/write method, and + * performance will be worse, and TOO BAD. + */ + if (sys_fallocate(obj->fd,0,0,obj->size) < 0) +#endif + { + gf_log (this->name, GF_LOG_WARNING, + "failed to fallocate space for log file"); + /* Have to do this the ugly page-faulty way. */ + (void) sys_lseek (obj->fd, obj->size-1, SEEK_SET); + (void) sys_write (obj->fd, "", 1); + } + + ptr = mmap (NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0); + if (ptr == MAP_FAILED) { + gf_log (this->name, GF_LOG_ERROR, "failed to mmap log (%s)", + strerror(errno)); + goto err; + } + + obj->ptr = ptr; + obj->max_offset = 0; + return ptr; + +err: + if (obj->fd >= 0) { + sys_close (obj->fd); + obj->fd = (-1); + } + if (obj->path) { + GF_FREE (obj->path); + obj->path = NULL; + } + return ptr; +} + +void +fdl_close_term_log (xlator_t *this, log_obj_t *obj) +{ + fdl_private_t *priv = this->private; + + if (obj->ptr) { + (void) munmap (obj->ptr, obj->size); + obj->ptr = NULL; + } + + if (obj->fd >= 0) { + gf_log (this->name, GF_LOG_INFO, + "truncating term %d %s journal to %ld", + priv->term, obj->type, obj->max_offset); + if (sys_ftruncate(obj->fd,obj->max_offset) < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to truncate journal (%s)", + strerror(errno)); + } + sys_close (obj->fd); + obj->fd = (-1); + } + + if (obj->path) { + GF_FREE (obj->path); + obj->path = NULL; + } +} + +gf_boolean_t +fdl_change_term (xlator_t *this, char **meta_ptr, char **data_ptr) +{ + fdl_private_t *priv = this->private; + + fdl_close_term_log (this, &priv->meta_log); + fdl_close_term_log (this, &priv->data_log); + + ++(priv->term); + + *meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); + if (!*meta_ptr) { + return _gf_false; + } + + *data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); + if (!*data_ptr) { + return _gf_false; + } + + return _gf_true; +} + +void * +fdl_worker (void *arg) +{ + xlator_t *this = arg; + fdl_private_t *priv = this->private; + call_stub_t *stub; + char * meta_ptr = NULL; + off_t *meta_offset = &priv->meta_log.max_offset; + char * data_ptr = NULL; + off_t *data_offset = &priv->data_log.max_offset; + unsigned long base_as_ul; + void * msync_ptr; + size_t msync_len; + gf_boolean_t recycle; + void *err_label = &&err_unlocked; + + priv->meta_log.type = "meta"; + priv->meta_log.size = META_FILE_SIZE; + priv->meta_log.path = NULL; + priv->meta_log.fd = (-1); + priv->meta_log.ptr = NULL; + + priv->data_log.type = "data"; + priv->data_log.size = DATA_FILE_SIZE; + priv->data_log.path = NULL; + priv->data_log.fd = (-1); + priv->data_log.ptr = NULL; + + /* TBD: initial term should come from persistent storage (e.g. etcd) */ + priv->first_term = ++(priv->term); + meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term); + if (!meta_ptr) { + goto *err_label; + } + data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term); + if (!data_ptr) { + fdl_close_term_log (this, &priv->meta_log); + goto *err_label; + } + + for (;;) { + pthread_mutex_lock (&priv->req_lock); + err_label = &&err_locked; + while (list_empty(&priv->reqs)) { + pthread_cond_wait (&priv->req_cond, &priv->req_lock); + if (priv->should_stop) { + goto *err_label; + } + if (priv->change_term) { + if (!fdl_change_term(this, &meta_ptr, + &data_ptr)) { + goto *err_label; + } + priv->change_term = _gf_false; + continue; + } + } + stub = list_entry (priv->reqs.next, call_stub_t, list); + list_del_init (&stub->list); + pthread_mutex_unlock (&priv->req_lock); + err_label = &&err_unlocked; + /* + * TBD: batch requests + * + * What we should do here is gather up *all* of the requests + * that have accumulated since we were last at this point, + * blast them all out in one big writev, and then dispatch them + * all before coming back for more. That maximizes throughput, + * at some cost to latency (due to queuing effects at the log + * stage). Note that we're likely to be above io-threads, so + * the dispatch itself will be parallelized (at further cost to + * latency). For now, we just do the simplest thing and handle + * one request all the way through before fetching the next. + * + * So, why mmap/msync instead of writev/fdatasync? Because it's + * faster. Much faster. So much faster that I half-suspect + * cheating, but it's more convenient for now than having to + * ensure that everything's page-aligned for O_DIRECT (the only + * alternative that still might avoid ridiculous levels of + * local-FS overhead). + * + * TBD: check that msync really does get our data to disk. + */ + gf_log (this->name, GF_LOG_DEBUG, + "logging %u+%u bytes for op %d", + stub->jnl_meta_len, stub->jnl_data_len, stub->fop); + recycle = _gf_false; + if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) { + recycle = _gf_true; + } + if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) { + recycle = _gf_true; + } + if (recycle && !fdl_change_term(this,&meta_ptr,&data_ptr)) { + goto *err_label; + } + meta_ptr = priv->meta_log.ptr; + data_ptr = priv->data_log.ptr; + gf_log (this->name, GF_LOG_DEBUG, "serializing to %p/%p", + meta_ptr + *meta_offset, data_ptr + *data_offset); + stub->serialize (stub, meta_ptr + *meta_offset, + data_ptr + *data_offset); + if (stub->jnl_meta_len > 0) { + base_as_ul = (unsigned long) (meta_ptr + *meta_offset); + msync_ptr = (void *) (base_as_ul & ~0x0fff); + msync_len = (size_t) (base_as_ul & 0x0fff); + if (msync (msync_ptr, msync_len+stub->jnl_meta_len, + MS_SYNC) < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to log request meta (%s)", + strerror(errno)); + } + *meta_offset += stub->jnl_meta_len; + } + if (stub->jnl_data_len > 0) { + base_as_ul = (unsigned long) (data_ptr + *data_offset); + msync_ptr = (void *) (base_as_ul & ~0x0fff); + msync_len = (size_t) (base_as_ul & 0x0fff); + if (msync (msync_ptr, msync_len+stub->jnl_data_len, + MS_SYNC) < 0) { + gf_log (this->name, GF_LOG_WARNING, + "failed to log request data (%s)", + strerror(errno)); + } + *data_offset += stub->jnl_data_len; + } + call_resume (stub); + } + +err_locked: + pthread_mutex_unlock (&priv->req_lock); +err_unlocked: + fdl_close_term_log (this, &priv->meta_log); + fdl_close_term_log (this, &priv->data_log); + return NULL; +} + +int32_t +fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata) +{ + fdl_private_t *priv = this->private; + dict_t *tdict; + int32_t gt_err = EIO; + + switch (op) { + + case FDL_IPC_CHANGE_TERM: + gf_log (this->name, GF_LOG_INFO, "got CHANGE_TERM op"); + priv->change_term = _gf_true; + pthread_cond_signal (&priv->req_cond); + STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL); + break; + + case FDL_IPC_GET_TERMS: + gf_log (this->name, GF_LOG_INFO, "got GET_TERMS op"); + tdict = dict_new (); + if (!tdict) { + gt_err = ENOMEM; + goto gt_done; + } + if (dict_set_int32(tdict,"first",priv->first_term) != 0) { + goto gt_done; + } + if (dict_set_int32(tdict,"last",priv->term) != 0) { + goto gt_done; + } + gt_err = 0; + gt_done: + if (gt_err) { + STACK_UNWIND_STRICT (ipc, frame, -1, gt_err, NULL); + } else { + STACK_UNWIND_STRICT (ipc, frame, 0, 0, tdict); + } + if (tdict) { + dict_unref (tdict); + } + break; + + default: + STACK_WIND_TAIL (frame, + FIRST_CHILD(this), + FIRST_CHILD(this)->fops->ipc, + op, xdata); + } + + return 0; +} + +int +fdl_init (xlator_t *this) +{ + fdl_private_t *priv = NULL; + + priv = GF_CALLOC (1, sizeof (*priv), gf_fdl_mt_fdl_private_t); + if (!priv) { + gf_log (this->name, GF_LOG_ERROR, + "failed to allocate fdl_private"); + goto err; + } + + INIT_LIST_HEAD (&priv->reqs); + if (pthread_mutex_init (&priv->req_lock, NULL) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to initialize req_lock"); + goto err; + } + if (pthread_cond_init (&priv->req_cond, NULL) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to initialize req_cond"); + goto err; + } + + GF_OPTION_INIT ("log-path", priv->log_dir, path, err); + + if (pthread_create(&priv->worker,NULL,fdl_worker,this) != 0) { + gf_log (this->name, GF_LOG_ERROR, + "failed to start fdl_worker"); + goto err; + } + + /* + * The rest of the fop table is automatically generated, so this is a + * bit cleaner than messing with the generation to add a hand-written + * exception. + */ + this->fops->ipc = fdl_ipc; + + this->private = priv; + return 0; + +err: + if (priv) { + GF_FREE(priv); + } + return -1; +} + +void +fdl_fini (xlator_t *this) +{ + fdl_private_t *priv = this->private; + + if (priv) { + priv->should_stop = _gf_true; + pthread_cond_signal (&priv->req_cond); + pthread_join (priv->worker, NULL); + GF_FREE(priv); + } +} + +int +fdl_reconfigure (xlator_t *this, dict_t *options) +{ + fdl_private_t *priv = this->private; + + GF_OPTION_RECONF ("log_dir", priv->log_dir, options, path, out); + /* TBD: react if it changed */ + +out: + return 0; +} + +int32_t +mem_acct_init (xlator_t *this) +{ + int ret = -1; + + GF_VALIDATE_OR_GOTO ("fdl", this, out); + + ret = xlator_mem_acct_init (this, gf_fdl_mt_end + 1); + + if (ret != 0) { + gf_log (this->name, GF_LOG_ERROR, "Memory accounting init" + "failed"); + return ret; + } +out: + return ret; +} + +class_methods_t class_methods = { + .init = fdl_init, + .fini = fdl_fini, + .reconfigure = fdl_reconfigure, + .notify = default_notify, +}; + +struct volume_options options[] = { + { .key = {"log-path"}, + .type = GF_OPTION_TYPE_PATH, + .default_value = DEFAULT_LOG_FILE_DIRECTORY, + .description = "Directory for FDL files." + }, + { .key = {NULL} }, +}; + +struct xlator_cbks cbks = { + .release = default_release, + .releasedir = default_releasedir, + .forget = default_forget, +}; diff --git a/xlators/experimental/fdl/src/gen_dumper.py b/xlators/experimental/fdl/src/gen_dumper.py new file mode 100755 index 00000000000..42db55d2cb3 --- /dev/null +++ b/xlators/experimental/fdl/src/gen_dumper.py @@ -0,0 +1,116 @@ +#!/usr/bin/python + +import os +import re +import sys + +curdir = os.path.dirname (sys.argv[0]) +gendir = os.path.join (curdir, '../../../../libglusterfs/src') +sys.path.append (gendir) +from generator import ops, fop_subs, cbk_subs, generate + +# See the big header comment at the start of gen_fdl.py to see how the stages +# fit together. The big difference here is that *all* of the C code is in the +# template file as labelled fragments, instead of as Python strings. That +# makes it much easier to edit in one place, with proper syntax highlighting +# and indentation. +# +# Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of +# LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE. +# +# Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and +# FOP_TEMPLATE. The expanded FOP code (including FUNCTION_BODY substitution +# in the middle of each function) is emitted immediately; the expanded CASE +# code is saved for the next stage. +# +# Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code +# in the middle of EPILOG, to generate the whole output file. +# +# Another way of looking at it is to consider how the fragments appear in +# the final output: +# +# PROLOG +# FOP (expanded for CREATE) +# FOP before FUNCTION_BODY +# LOC, INTEGER, GFID, etc. (one per arg, by type) +# FOP after FUNCTION_BODY +# FOP (expanded for WRITEV) +# FOP before FUNCTION_BODY +# GFID, VECTOR, etc. (on per arg, by type) +# FOP after FUNCTION_BODY +# (more FOPs) +# EPILOG +# EPILOG before CASE +# CASE statements (one per fop) +# EPILOG after CASE + +typemap = { + 'dict_t *': ( "DICT", ""), + 'fd_t *': ( "GFID", ""), + 'dev_t': ( "DOUBLE", "%ld (0x%lx)"), + 'gf_xattrop_flags_t': ( "INTEGER", "%d (0x%x)"), + 'int32_t': ( "INTEGER", "%d (0x%x)"), + 'mode_t': ( "INTEGER", "%d (0x%x)"), + 'off_t': ( "DOUBLE", "%ld (0x%lx)"), + 'size_t': ( "DOUBLE", "%ld (0x%lx)"), + 'uint32_t': ( "INTEGER", "%d (0x%x)"), + 'loc_t *': ( "LOC", ""), + 'const char *': ( "STRING", ""), + 'struct iovec *': ( "VECTOR", ""), + 'struct iatt *': ( "IATT", ""), +} + +def get_special_subs (args): + code = "" + for arg in args: + if (arg[0] != 'fop-arg') or (len(arg) < 4): + continue + recon_type, recon_fmt = typemap[arg[2]] + code += fragments[recon_type].replace("@ARGNAME@",arg[3]) \ + .replace("@FORMAT@",recon_fmt) + return code + +def gen_functions (): + code = "" + for name, value in ops.iteritems(): + if "journal" not in [ x[0] for x in value ]: + continue + fop_subs[name]["@FUNCTION_BODY@"] = get_special_subs(value) + # Print the FOP fragment with @FUNCTION_BODY@ in the middle. + code += generate(fragments["FOP"],name,fop_subs) + return code + +def gen_cases (): + code = "" + for name, value in ops.iteritems(): + if "journal" not in [ x[0] for x in value ]: + continue + # Add the CASE fragment for this fop. + code += generate(fragments["CASE"],name,fop_subs) + return code + +def load_fragments (path="recon-tmpl.c"): + pragma_re = re.compile('pragma fragment (.*)') + cur_symbol = None + cur_value = "" + result = {} + for line in open(path,"r").readlines(): + m = pragma_re.search(line) + if m: + if cur_symbol: + result[cur_symbol] = cur_value + cur_symbol = m.group(1) + cur_value = "" + else: + cur_value += line + if cur_symbol: + result[cur_symbol] = cur_value + return result + +if __name__ == "__main__": + fragments = load_fragments(sys.argv[1]) + print "/* BEGIN GENERATED CODE - DO NOT MODIFY */" + print fragments["PROLOG"] + print gen_functions() + print fragments["EPILOG"].replace("@SWITCH_BODY@",gen_cases()) + print "/* END GENERATED CODE */" diff --git a/xlators/experimental/fdl/src/gen_fdl.py b/xlators/experimental/fdl/src/gen_fdl.py new file mode 100755 index 00000000000..7f6b1aaaeaa --- /dev/null +++ b/xlators/experimental/fdl/src/gen_fdl.py @@ -0,0 +1,328 @@ +#!/usr/bin/python + +import os +import sys + +curdir = os.path.dirname (sys.argv[0]) +gendir = os.path.join (curdir, '../../../../libglusterfs/src') +sys.path.append (gendir) +from generator import ops, fop_subs, cbk_subs, generate + +# Generation occurs in three stages. In this case, it actually makes more +# sense to discuss them in the *opposite* order of that in which they +# actually happen. +# +# Stage 3 is to insert all of the generated code into a file, replacing the +# "#pragma generate" that's already there. The file can thus contain all +# sorts of stuff that's not specific to one fop, either before or after the +# generated code as appropriate. +# +# Stage 2 is to generate all of the code *for a particular fop*, using a +# string-valued template plus a table of substitution values. Most of these +# are built in to the generator itself. However, we also add a couple that +# are specific to this particular translator - LEN_CODE and SER_CODE. These +# are per-fop functions to get the length or the contents (respectively) of +# what we'll put in the log. As with stage 3 allowing per-file boilerplate +# before and after generated code, this allows per-fop boilerplate before and +# after generated code. +# +# Stage 1, therefore, is to create the LEN_CODE and SER_CODE substitutions for +# each fop, and put them in the same table where e.g. NAME and SHORT_ARGS +# already are. We do this by looking at the fop-description table in the +# generator module, then doing out own template substitution to plug each +# specific argument name into another string-valued template. +# +# So, what does this leave us with in terms of variables and files? +# +# For stage 1, we have a series of LEN_*_TEMPLATE and SERLZ_*_TEMPLATE +# strings, which are used to generate the length and serialization code for +# each argument type. +# +# For stage 2, we have a bunch of *_TEMPLATE strings (no LEN_ or SERLZ_ +# prefix), which are used (along with the output from stage 1) to generate +# whole functions. +# +# For stage 3, we have a whole separate file (fdl_tmpl.c) into which we insert +# the collection of all functions defined in stage 2. + + +LEN_TEMPLATE = """ +void +fdl_len_@NAME@ (call_stub_t *stub) +{ + uint32_t meta_len = sizeof (event_header_t); + uint32_t data_len = 0; + + /* TBD: global stuff, e.g. uid/gid */ +@LEN_CODE@ + + /* TBD: pad extension length */ + stub->jnl_meta_len = meta_len; + stub->jnl_data_len = data_len; +} +""" + +SER_TEMPLATE = """ +void +fdl_serialize_@NAME@ (call_stub_t *stub, char *meta_buf, char *data_buf) +{ + event_header_t *eh; + unsigned long offset = 0; + + /* TBD: word size/endianness */ + eh = (event_header_t *)meta_buf; + eh->event_type = NEW_REQUEST; + eh->fop_type = GF_FOP_@UPNAME@; + eh->request_id = 0; // TBD + meta_buf += sizeof (*eh); +@SER_CODE@ + /* TBD: pad extension length */ + eh->ext_length = offset; +} +""" + +CBK_TEMPLATE = """ +int32_t +fdl_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this, + int32_t op_ret, int32_t op_errno, + @LONG_ARGS@) +{ + STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, + @SHORT_ARGS@); + return 0; +} +""" + +CONTINUE_TEMPLATE = """ +int32_t +fdl_@NAME@_continue (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + STACK_WIND (frame, fdl_@NAME@_cbk, + FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@, + @SHORT_ARGS@); + return 0; +} + +""" + +FOP_TEMPLATE = """ +int32_t +fdl_@NAME@ (call_frame_t *frame, xlator_t *this, + @LONG_ARGS@) +{ + call_stub_t *stub; + + stub = fop_@NAME@_stub (frame, default_@NAME@, + @SHORT_ARGS@); + fdl_len_@NAME@ (stub); + stub->serialize = fdl_serialize_@NAME@; + fdl_enqueue (this, stub); + + return 0; +} +""" + +LEN_DICT_TEMPLATE = """ + if (@SRC@) { + data_pair_t *memb; + for (memb = @SRC@->members_list; memb; memb = memb->next) { + meta_len += sizeof(int); + meta_len += strlen(memb->key) + 1; + meta_len += sizeof(int); + meta_len += memb->value->len; + } + } + meta_len += sizeof(int); +""" + +LEN_GFID_TEMPLATE = """ + meta_len += 16; +""" + +LEN_INTEGER_TEMPLATE = """ + meta_len += sizeof (@SRC@); +""" + +# 16 for gfid, 16 for pargfid, 1 for flag, 0/1 for terminating NUL +LEN_LOC_TEMPLATE = """ + if (@SRC@.name) { + meta_len += (strlen (@SRC@.name) + 34); + } else { + meta_len += 33; + } +""" + +LEN_STRING_TEMPLATE = """ + if (@SRC@) { + meta_len += (strlen (@SRC@) + 1); + } else { + meta_len += 1; + } +""" + +LEN_VECTOR_TEMPLATE = """ + meta_len += sizeof(size_t); + data_len += iov_length (@VEC@, @CNT@); +""" + +LEN_IATT_TEMPLATE = """ + meta_len += sizeof(@SRC@.ia_prot); + meta_len += sizeof(@SRC@.ia_uid); + meta_len += sizeof(@SRC@.ia_gid); + meta_len += sizeof(@SRC@.ia_atime); + meta_len += sizeof(@SRC@.ia_atime_nsec); + meta_len += sizeof(@SRC@.ia_mtime); + meta_len += sizeof(@SRC@.ia_mtime_nsec); +""" + +SERLZ_DICT_TEMPLATE = """ + if (@SRC@) { + data_pair_t *memb; + for (memb = @SRC@->members_list; memb; memb = memb->next) { + *((int *)(meta_buf+offset)) = strlen(memb->key) + 1; + offset += sizeof(int); + strcpy (meta_buf+offset, memb->key); + offset += strlen(memb->key) + 1; + *((int *)(meta_buf+offset)) = memb->value->len; + offset += sizeof(int); + memcpy (meta_buf+offset, memb->value->data, memb->value->len); + offset += memb->value->len; + } + } + *((int *)(meta_buf+offset)) = 0; + offset += sizeof(int); +""" + +SERLZ_GFID_TEMPLATE = """ + memcpy (meta_buf+offset, @SRC@->inode->gfid, 16); + offset += 16; +""" + +SERLZ_INTEGER_TEMPLATE = """ + memcpy (meta_buf+offset, &@SRC@, sizeof(@SRC@)); + offset += sizeof(@SRC@); +""" + +SERLZ_LOC_TEMPLATE = """ + memcpy (meta_buf+offset, @SRC@.gfid, 16); + offset += 16; + memcpy (meta_buf+offset, @SRC@.pargfid, 16); + offset += 16; + if (@SRC@.name) { + *(meta_buf+offset) = 1; + ++offset; + strcpy (meta_buf+offset, @SRC@.name); + offset += (strlen (@SRC@.name) + 1); + } else { + *(meta_buf+offset) = 0; + ++offset; + } +""" + +SERLZ_STRING_TEMPLATE = """ + if (@SRC@) { + *(meta_buf+offset) = 1; + ++offset; + strcpy (meta_buf+offset, @SRC@); + offset += strlen(@SRC@); + } else { + *(meta_buf+offset) = 0; + ++offset; + } +""" + +SERLZ_VECTOR_TEMPLATE = """ + *((size_t *)(meta_buf+offset)) = iov_length (@VEC@, @CNT@); + offset += sizeof(size_t); + int32_t i; + for (i = 0; i < @CNT@; ++i) { + memcpy (data_buf, @VEC@[i].iov_base, @VEC@[i].iov_len); + data_buf += @VEC@[i].iov_len; + } +""" + +# We don't need to save all of the fields - only those affected by chown, +# chgrp, chmod, and utime. +SERLZ_IATT_TEMPLATE = """ + *((ia_prot_t *)(meta_buf+offset)) = @SRC@.ia_prot; + offset += sizeof(@SRC@.ia_prot); + *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_uid; + offset += sizeof(@SRC@.ia_uid); + *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_gid; + offset += sizeof(@SRC@.ia_gid); + *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime; + offset += sizeof(@SRC@.ia_atime); + *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime_nsec; + offset += sizeof(@SRC@.ia_atime_nsec); + *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime; + offset += sizeof(@SRC@.ia_mtime); + *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime_nsec; + offset += sizeof(@SRC@.ia_mtime_nsec); +""" + +typemap = { + 'dict_t *': ( LEN_DICT_TEMPLATE, SERLZ_DICT_TEMPLATE), + 'fd_t *': ( LEN_GFID_TEMPLATE, SERLZ_GFID_TEMPLATE), + 'dev_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), + 'gf_xattrop_flags_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), + 'int32_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), + 'mode_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), + 'off_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), + 'size_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), + 'uint32_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE), + 'loc_t *': ( LEN_LOC_TEMPLATE, SERLZ_LOC_TEMPLATE), + 'const char *': ( LEN_STRING_TEMPLATE, SERLZ_STRING_TEMPLATE), + 'struct iatt *': ( LEN_IATT_TEMPLATE, SERLZ_IATT_TEMPLATE), +} + +def get_special_subs (args): + len_code = "" + ser_code = "" + for arg in args: + if (arg[0] != 'fop-arg') or (len(arg) < 4): + continue + # Let this throw an exception if we get an unknown field name. The + # broken build will remind whoever messed with the stub code that a + # corresponding update is needed here. + if arg[3] == "vector": + # Make it as obvious as possible that this is a special case. + len_code += LEN_VECTOR_TEMPLATE \ + .replace("@VEC@","stub->args.vector") \ + .replace("@CNT@","stub->args.count") + ser_code += SERLZ_VECTOR_TEMPLATE \ + .replace("@VEC@","stub->args.vector") \ + .replace("@CNT@","stub->args.count") + else: + len_tmpl, ser_tmpl = typemap[arg[2]] + src = "stub->args.%s" % arg[3] + len_code += len_tmpl.replace("@SRC@",src) + ser_code += ser_tmpl.replace("@SRC@",src) + return len_code, ser_code + +def gen_fdl (): + entrypoints = [] + for name, value in ops.iteritems(): + if "journal" not in [ x[0] for x in value ]: + continue + len_code, ser_code = get_special_subs(value) + fop_subs[name]["@LEN_CODE@"] = len_code[:-1] + fop_subs[name]["@SER_CODE@"] = ser_code[:-1] + print generate(LEN_TEMPLATE,name,fop_subs) + print generate(SER_TEMPLATE,name,fop_subs) + print generate(CBK_TEMPLATE,name,cbk_subs) + print generate(CONTINUE_TEMPLATE,name,fop_subs) + print generate(FOP_TEMPLATE,name,fop_subs) + entrypoints.append(name) + print "struct xlator_fops fops = {" + for ep in entrypoints: + print "\t.%s = fdl_%s," % (ep, ep) + print "};" + +for l in open(sys.argv[1],'r').readlines(): + if l.find('#pragma generate') != -1: + print "/* BEGIN GENERATED CODE - DO NOT MODIFY */" + gen_fdl() + print "/* END GENERATED CODE */" + else: + print l[:-1] diff --git a/xlators/experimental/fdl/src/gen_recon.py b/xlators/experimental/fdl/src/gen_recon.py new file mode 100755 index 00000000000..26318f92d88 --- /dev/null +++ b/xlators/experimental/fdl/src/gen_recon.py @@ -0,0 +1,191 @@ +#!/usr/bin/python + +import os +import re +import string +import sys + +curdir = os.path.dirname (sys.argv[0]) +gendir = os.path.join (curdir, '../../../../libglusterfs/src') +sys.path.append (gendir) +from generator import ops, fop_subs, cbk_subs, generate + +# See the big header comment at the start of gen_fdl.py to see how the stages +# fit together. The big difference here is that *all* of the C code is in the +# template file as labelled fragments, instead of as Python strings. That +# makes it much easier to edit in one place, with proper syntax highlighting +# and indentation. +# +# Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of +# LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE. +# +# Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and +# FOP_TEMPLATE. The expanded FOP code (including FUNCTION_BODY substitution +# in the middle of each function) is emitted immediately; the expanded CASE +# code is saved for the next stage. +# +# Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code +# in the middle of EPILOG, to generate the whole output file. +# +# Another way of looking at it is to consider how the fragments appear in +# the final output: +# +# PROLOG +# FOP (expanded for CREATE) +# FOP before FUNCTION_BODY +# LOC, INTEGER, GFID, etc. (one per arg, by type) +# FOP after FUNCTION_BODY +# FOP (expanded for WRITEV) +# FOP before FUNCTION_BODY +# GFID, VECTOR, etc. (one per arg, by type) +# FOP after FUNCTION_BODY +# (more FOPs) +# EPILOG +# EPILOG before CASE +# CASE statements (one per fop) +# EPILOG after CASE + +typemap = { + 'dict_t *': "DICT", + 'fd_t *': "FD", + 'dev_t': "DOUBLE", + 'gf_xattrop_flags_t': "INTEGER", + 'int32_t': "INTEGER", + 'mode_t': "INTEGER", + 'off_t': "DOUBLE", + 'size_t': "DOUBLE", + 'uint32_t': "INTEGER", + 'loc_t *': "LOC", + 'const char *': "STRING", + 'struct iovec *': "VECTOR", + 'struct iatt *': "IATT", + 'struct iobref *': "IOBREF", +} + +def get_special_subs (name, args, fop_type): + code = "" + cleanups = "" + links = "" + s_args = [] + for arg in args: + if arg[0] == 'extra': + code += "\t%s %s;\n\n" % (arg[2], arg[1]) + s_args.append(arg[3]) + continue + if arg[0] == 'link': + links += fragments["LINK"].replace("@INODE_ARG@",arg[1]) \ + .replace("@IATT_ARG@",arg[2]) + continue + if arg[0] != 'fop-arg': + continue + if (name, arg[1]) == ('writev', 'count'): + # Special case: just skip this. We can't mark it as 'nosync' + # because of the way the translator and dumper generators look for + # that after 'stub-name' which we don't define. Instead of adding a + # bunch of generic infrastructure for this one case, just pound it + # here. + continue + recon_type = typemap[arg[2]] + # print "/* %s.%s => %s (%s)*/" % (name, arg[1], recon_type, fop_type) + if (name == "create") and (arg[1] == "fd"): + # Special case: fd for create is new, not looked up. + # print "/* change to NEW_FD */" + recon_type = "NEW_FD" + elif (recon_type == "LOC") and (fop_type == "entry-op"): + # Need to treat this differently for inode vs. entry ops. + # Special case: link source is treated like inode-op. + if (name != "link") or (arg[1] != "oldloc"): + # print "/* change to PARENT_LOC */" + recon_type = "PARENT_LOC" + code += fragments[recon_type].replace("@ARGNAME@",arg[1]) \ + .replace("@ARGTYPE@",arg[2]) + cleanup_key = recon_type + "_CLEANUP" + if fragments.has_key(cleanup_key): + cleanups += fragments[cleanup_key].replace("@ARGNAME@",arg[1]) + if 'nosync' in arg[4:]: + code += "\t(void)%s;\n" % arg[1]; + continue + if arg[2] in ("loc_t *", "struct iatt *"): + # These are passed as pointers to the syncop, but they're actual + # structures in the generated code. + s_args.append("&"+arg[1]); + else: + s_args.append(arg[1]) + # We have to handle a couple of special cases here, because some n00b + # defined the syncops with a different argument order than the fops they're + # based on. + if name == 'writev': + # Swap 'flags' and 'iobref'. Also, we need to add the iov count, which + # is not stored in or read from the journal. There are other ways to + # do that, but this is the only place we need anything similar and we + # already have to treat it as a special case so this is simplest. + s_args_str = 'fd, &vector, 1, off, iobref, flags, xdata' + elif name == 'symlink': + # Swap 'linkpath' and 'loc'. + s_args_str = '&loc, linkpath, &iatt, xdata' + else: + s_args_str = string.join (s_args, ", ") + return code, links, s_args_str, cleanups + +# TBD: probably need to generate type-specific cleanup code as well - e.g. +# fd_unref for an fd_t, loc_wipe for a loc_t, and so on. All of these +# generated CLEANUP fragments will go at the end of the function, with goto +# labels. Meanwhile, the error-checking part of each type-specific fragment +# (e.g. LOC or FD) will need to update the indirect label that we jump to when +# an error is detected. This will probably get messy. +def gen_functions (): + code = "" + for name, value in ops.iteritems(): + fop_type = [ x[1] for x in value if x[0] == "journal" ] + if not fop_type: + continue + body, links, syncop_args, cleanups = get_special_subs (name, value, + fop_type[0]) + fop_subs[name]["@FUNCTION_BODY@"] = body + fop_subs[name]["@LINKS@"] = links + fop_subs[name]["@SYNCOP_ARGS@"] = syncop_args + fop_subs[name]["@CLEANUPS@"] = cleanups + if name == "writev": + # Take advantage of the fact that, *during reconciliation*, the + # vector is always a single element. In normal I/O it's not. + fop_subs[name]["@SUCCESS_VALUE@"] = "vector.iov_len" + else: + fop_subs[name]["@SUCCESS_VALUE@"] = "GFAPI_SUCCESS" + # Print the FOP fragment with @FUNCTION_BODY@ in the middle. + code += generate(fragments["FOP"],name,fop_subs) + return code + +def gen_cases (): + code = "" + for name, value in ops.iteritems(): + if "journal" not in [ x[0] for x in value ]: + continue + # Add the CASE fragment for this fop. + code += generate(fragments["CASE"],name,fop_subs) + return code + +def load_fragments (path="recon-tmpl.c"): + pragma_re = re.compile('pragma fragment (.*)') + cur_symbol = None + cur_value = "" + result = {} + for line in open(path,"r").readlines(): + m = pragma_re.search(line) + if m: + if cur_symbol: + result[cur_symbol] = cur_value + cur_symbol = m.group(1) + cur_value = "" + else: + cur_value += line + if cur_symbol: + result[cur_symbol] = cur_value + return result + +if __name__ == "__main__": + fragments = load_fragments(sys.argv[1]) + print "/* BEGIN GENERATED CODE - DO NOT MODIFY */" + print fragments["PROLOG"] + print gen_functions() + print fragments["EPILOG"].replace("@SWITCH_BODY@",gen_cases()) + print "/* END GENERATED CODE */" diff --git a/xlators/experimental/fdl/src/jnl-types.h b/xlators/experimental/fdl/src/jnl-types.h new file mode 100644 index 00000000000..8cb39d01a25 --- /dev/null +++ b/xlators/experimental/fdl/src/jnl-types.h @@ -0,0 +1,14 @@ +#define NEW_REQUEST (uint8_t)'N' + +typedef struct { + uint8_t event_type; /* e.g. NEW_REQUEST */ + uint8_t fop_type; /* e.g. GF_FOP_SETATTR */ + uint16_t request_id; + uint32_t ext_length; +} event_header_t; + +enum { + FDL_IPC_BASE = 0xfeedbee5, /* ... and they make honey */ + FDL_IPC_CHANGE_TERM, + FDL_IPC_GET_TERMS, +}; diff --git a/xlators/experimental/fdl/src/logdump.c b/xlators/experimental/fdl/src/logdump.c new file mode 100644 index 00000000000..7c979c32a04 --- /dev/null +++ b/xlators/experimental/fdl/src/logdump.c @@ -0,0 +1,50 @@ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/mman.h> + +extern int fdl_dump (char **, char **); + +int +main (int argc, char **argv) +{ + int meta_fd = (-1); + char *meta_buf = NULL; + int data_fd = (-1); + char *data_buf = NULL; + + meta_fd = open (argv[1], O_RDONLY); + if (meta_fd < 0) { + perror ("open"); + return EXIT_FAILURE; + } + + /* TBD: get proper length */ + meta_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0); + if (meta_buf == MAP_FAILED) { + perror ("mmap"); + return EXIT_FAILURE; + } + + data_fd = open (argv[2], O_RDONLY); + if (data_fd < 0) { + perror ("open"); + return EXIT_FAILURE; + } + + /* TBD: get proper length */ + data_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0); + if (data_buf == MAP_FAILED) { + perror ("mmap"); + return EXIT_FAILURE; + } + + for (;;) { + if (!fdl_dump(&meta_buf,&data_buf)) { + break; + } + } + + return EXIT_SUCCESS; +} diff --git a/xlators/experimental/fdl/src/recon-tmpl.c b/xlators/experimental/fdl/src/recon-tmpl.c new file mode 100644 index 00000000000..523bda39418 --- /dev/null +++ b/xlators/experimental/fdl/src/recon-tmpl.c @@ -0,0 +1,305 @@ +#pragma fragment PROLOG +#ifndef _CONFIG_H +#define _CONFIG_H +#include "config.h" +#endif + +#include "glusterfs.h" +#include "fd.h" +#include "iatt.h" +#include "syncop.h" +#include "xlator.h" +#include "glfs-internal.h" + +#include "jnl-types.h" + +#define GFAPI_SUCCESS 0 + +inode_t * +recon_get_inode (glfs_t *fs, uuid_t gfid) +{ + inode_t *inode; + loc_t loc = {NULL,}; + struct iatt iatt; + int ret; + inode_t *newinode; + + inode = inode_find (fs->active_subvol->itable, gfid); + if (inode) { + printf ("=== FOUND %s IN TABLE\n", uuid_utoa(gfid)); + return inode; + } + + loc.inode = inode_new (fs->active_subvol->itable); + if (!loc.inode) { + return NULL; + } + gf_uuid_copy (loc.inode->gfid, gfid); + gf_uuid_copy (loc.gfid, gfid); + + printf ("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid)); + + ret = syncop_lookup (fs->active_subvol, &loc, &iatt, + NULL, NULL, NULL); + if (ret != GFAPI_SUCCESS) { + fprintf (stderr, "syncop_lookup failed (%d)\n", ret); + return NULL; + } + + newinode = inode_link (loc.inode, NULL, NULL, &iatt); + if (newinode) { + inode_lookup (newinode); + } + + return newinode; +} + +#pragma fragment DICT + dict_t *@ARGNAME@; + + @ARGNAME@ = dict_new(); + if (!@ARGNAME@) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; + + { + int key_len, data_len; + char *key_ptr; + int garbage; + for (;;) { + key_len = *((int *)new_meta); + new_meta += sizeof(int); + if (!key_len) { + break; + } + key_ptr = new_meta; + new_meta += key_len; + data_len = *((int *)new_meta); + new_meta += sizeof(int); + garbage = dict_set_static_bin (@ARGNAME@, key_ptr, + new_meta, data_len); + /* TBD: check error from dict_set_static_bin */ + (void)garbage; + new_meta += data_len; + } + } + +#pragma fragment DICT_CLEANUP +cleanup_@ARGNAME@: + dict_unref (@ARGNAME@); + +#pragma fragment DOUBLE + @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); + new_meta += sizeof(uint64_t); + +#pragma fragment FD + inode_t *@ARGNAME@_ino; + fd_t *@ARGNAME@; + + @ARGNAME@_ino = recon_get_inode (fs, *((uuid_t *)new_meta)); + new_meta += 16; + if (!@ARGNAME@_ino) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@_ino; + + @ARGNAME@ = fd_anonymous (@ARGNAME@_ino); + if (!@ARGNAME@) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; + +#pragma fragment FD_CLEANUP +cleanup_@ARGNAME@: + fd_unref (@ARGNAME@); +cleanup_@ARGNAME@_ino: + inode_unref (@ARGNAME@_ino); + +#pragma fragment NEW_FD + /* + * This pseudo-type is only used for create, and in that case we know + * we'll be using loc.inode, so it's not worth generalizing to take an + * extra argument. + */ + fd_t *@ARGNAME@ = fd_anonymous (loc.inode); + + if (!fd) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; + new_meta += 16; + +#pragma fragment NEW_FD_CLEANUP +cleanup_@ARGNAME@: + fd_unref (@ARGNAME@); + +#pragma fragment INTEGER + @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta); + + new_meta += sizeof(@ARGTYPE@); + +#pragma fragment LOC + loc_t @ARGNAME@ = { NULL, }; + + @ARGNAME@.inode = recon_get_inode (fs, *((uuid_t *)new_meta)); + if (!@ARGNAME@.inode) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; + gf_uuid_copy (@ARGNAME@.gfid, @ARGNAME@.inode->gfid); + new_meta += 16; + new_meta += 16; /* skip over pargfid */ + if (*(new_meta++)) { + @ARGNAME@.name = new_meta; + new_meta += strlen(new_meta) + 1; + } + +#pragma fragment LOC_CLEANUP +cleanup_@ARGNAME@: + loc_wipe (&@ARGNAME@); + +#pragma fragment PARENT_LOC + loc_t @ARGNAME@ = { NULL, }; + + new_meta += 16; /* skip over gfid */ + @ARGNAME@.parent = recon_get_inode (fs, *((uuid_t *)new_meta)); + if (!@ARGNAME@.parent) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; + gf_uuid_copy (@ARGNAME@.pargfid, @ARGNAME@.parent->gfid); + new_meta += 16; + if (!*(new_meta++)) { + goto *err_label; + } + @ARGNAME@.name = new_meta; + new_meta += strlen(new_meta) + 1; + + @ARGNAME@.inode = inode_new (fs->active_subvol->itable); + if (!@ARGNAME@.inode) { + goto *err_label; + } + +#pragma fragment PARENT_LOC_CLEANUP +cleanup_@ARGNAME@: + loc_wipe (&@ARGNAME@); + +#pragma fragment STRING + char *@ARGNAME@; + if (*(new_meta++)) { + @ARGNAME@ = new_meta; + new_meta += (strlen(new_meta) + 1); + } + else { + goto *err_label; + } + +#pragma fragment VECTOR + struct iovec @ARGNAME@; + + @ARGNAME@.iov_len = *((size_t *)new_meta); + new_meta += sizeof(@ARGNAME@.iov_len); + @ARGNAME@.iov_base = new_data; + new_data += @ARGNAME@.iov_len; + +#pragma fragment IATT + struct iatt @ARGNAME@; + { + @ARGNAME@.ia_prot = *((ia_prot_t *)new_meta); + new_meta += sizeof(ia_prot_t); + uint32_t *myints = (uint32_t *)new_meta; + @ARGNAME@.ia_uid = myints[0]; + @ARGNAME@.ia_gid = myints[1]; + @ARGNAME@.ia_atime = myints[2]; + @ARGNAME@.ia_atime_nsec = myints[3]; + @ARGNAME@.ia_mtime = myints[4]; + @ARGNAME@.ia_mtime_nsec = myints[5]; + new_meta += sizeof(*myints) * 6; + } + +#pragma fragment IOBREF + struct iobref *@ARGNAME@; + + @ARGNAME@ = iobref_new(); + if (!@ARGNAME@) { + goto *err_label; + } + err_label = &&cleanup_@ARGNAME@; + +#pragma fragment IOBREF_CLEANUP +cleanup_@ARGNAME@: + iobref_unref (@ARGNAME@); + +#pragma fragment LINK + /* TBD: check error */ + inode_t *new_inode = inode_link (@INODE_ARG@, NULL, NULL, @IATT_ARG@); + if (new_inode) { + inode_lookup (new_inode); + } + +#pragma fragment FOP +int +fdl_replay_@NAME@ (glfs_t *fs, char **old_meta, char **old_data) +{ + char *new_meta = *old_meta; + char *new_data = *old_data; + int ret; + int status = 0xbad; + void *err_label = &&done; + +@FUNCTION_BODY@ + + ret = syncop_@NAME@ (fs->active_subvol, @SYNCOP_ARGS@, NULL); + if (ret != @SUCCESS_VALUE@) { + fprintf (stderr, "syncop_@NAME@ returned %d", ret); + goto *err_label; + } + +@LINKS@ + + status = 0; + +@CLEANUPS@ + +done: + *old_meta = new_meta; + *old_data = new_data; + return status; +} + +#pragma fragment CASE + case GF_FOP_@UPNAME@: + printf ("=== GF_FOP_@UPNAME@\n"); + if (fdl_replay_@NAME@ (fs, &new_meta, &new_data) != 0) { + goto done; + } + recognized = 1; + break; + +#pragma fragment EPILOG +int +recon_execute (glfs_t *fs, char **old_meta, char **old_data) +{ + char *new_meta = *old_meta; + char *new_data = *old_data; + int recognized = 0; + event_header_t *eh; + + eh = (event_header_t *)new_meta; + new_meta += sizeof (*eh); + + /* TBD: check event_type instead of assuming NEW_REQUEST */ + + switch (eh->fop_type) { +@SWITCH_BODY@ + + default: + printf ("unknown fop %u\n", eh->fop_type); + } + +done: + *old_meta = new_meta; + *old_data = new_data; + return recognized; +} diff --git a/xlators/experimental/fdl/src/recon.c b/xlators/experimental/fdl/src/recon.c new file mode 100644 index 00000000000..14168a011e0 --- /dev/null +++ b/xlators/experimental/fdl/src/recon.c @@ -0,0 +1,89 @@ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/mman.h> + +#include "glusterfs.h" +#include "fd.h" +#include "syncop.h" +#include "glfs-internal.h" + +#define GFAPI_SUCCESS 0 + +extern int recon_execute (glfs_t *, char **, char **); + +int +main (int argc, char **argv) +{ + glfs_t *fs; + int ret; + int meta_fd = (-1); + char *meta_buf = NULL; + int data_fd = (-1); + char *data_buf = NULL; + + fs = glfs_new ("whocares"); + if (!fs) { + fprintf (stderr, "glfs_new failed\n"); + return EXIT_FAILURE; + } + + if (getenv("RECON_DEBUG")) { + ret = glfs_set_logging (fs, "/dev/stderr", 7); + } + else { + ret = glfs_set_logging (fs, "/dev/null", 0); + } + + if (ret != GFAPI_SUCCESS) { + fprintf (stderr, "glfs_set_logging failed (%d)\n", errno); + return EXIT_FAILURE; + } + + ret = glfs_set_volfile (fs, argv[1]); + if (ret != GFAPI_SUCCESS) { + fprintf (stderr, "glfs_set_volfile failed (%d)\n", errno); + return EXIT_FAILURE; + } + + ret = glfs_init (fs); + if (ret != GFAPI_SUCCESS) { + fprintf (stderr, "glfs_init failed (%d)\n", errno); + return EXIT_FAILURE; + } + + meta_fd = open (argv[2], O_RDONLY); + if (meta_fd < 0) { + perror ("open"); + return EXIT_FAILURE; + } + + /* TBD: get proper length */ + meta_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0); + if (meta_buf == MAP_FAILED) { + perror ("mmap"); + return EXIT_FAILURE; + } + + data_fd = open (argv[3], O_RDONLY); + if (data_fd < 0) { + perror ("open"); + return EXIT_FAILURE; + } + + /* TBD: get proper length */ + data_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0); + if (data_buf == MAP_FAILED) { + perror ("mmap"); + return EXIT_FAILURE; + } + + for (;;) { + if (!recon_execute(fs,&meta_buf,&data_buf)) { + break; + } + } + + return EXIT_SUCCESS; +} diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am index 7e5783f4f30..649d9d8e9fa 100644 --- a/xlators/features/Makefile.am +++ b/xlators/features/Makefile.am @@ -1,5 +1,6 @@ -SUBDIRS = locks quota read-only mac-compat quiesce marker index barrier arbiter\ - protect compress changelog changetimerecorder ganesha gfid-access $(GLUPY_SUBDIR) qemu-block \ - upcall snapview-client snapview-server trash shard bit-rot #path-converter # filter +SUBDIRS = locks quota read-only mac-compat quiesce marker index barrier \ + arbiter protect compress changelog changetimerecorder ganesha \ + gfid-access $(GLUPY_SUBDIR) qemu-block upcall snapview-client \ + snapview-server trash shard bit-rot CLEANFILES = diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c index 2c52cf72a3f..3df4b3556cf 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volgen.c +++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c @@ -1783,6 +1783,30 @@ out: return ret; } +/* Add this before (above) io-threads because it's not thread-safe yet. */ +static int +brick_graph_add_fdl (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, + dict_t *set_dict, glusterd_brickinfo_t *brickinfo) +{ + + xlator_t *xl = NULL; + int ret = -1; + + if (!graph || !volinfo || !set_dict) + goto out; + + if (dict_get_str_boolean (set_dict, "features.fdl", 0)) { + xl = volgen_graph_add (graph, "experimental/fdl", + volinfo->volname); + if (!xl) + goto out; + } + ret = 0; + +out: + return ret; +} + static int brick_graph_add_iot (volgen_graph_t *graph, glusterd_volinfo_t *volinfo, dict_t *set_dict, glusterd_brickinfo_t *brickinfo) @@ -2359,6 +2383,7 @@ static volgen_brick_xlator_t server_graph_table[] = { {brick_graph_add_index, "index"}, {brick_graph_add_barrier, NULL}, {brick_graph_add_marker, "marker"}, + {brick_graph_add_fdl, "fdl"}, {brick_graph_add_iot, "io-threads"}, {brick_graph_add_upcall, "upcall"}, {brick_graph_add_pump, NULL}, diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c index 1463ef72c71..c0059d83cfe 100644 --- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c +++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c @@ -2711,6 +2711,15 @@ struct volopt_map_entry glusterd_volopt_map[] = { .op_version = GD_OP_VERSION_4_0_0, .description = "percent of rep_count-1 bricks that must be up" }, + /* Full Data Logging */ + { + .key = "features.fdl", + .voltype = "features/fdl", + .option = "!fdl", + .op_version = GD_OP_VERSION_4_0_0, + .flags = OPT_FLAG_XLATOR_OPT, + .type = NO_DOC, + }, { .key = NULL } }; |