summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJeff Darcy <jdarcy@redhat.com>2016-02-08 13:30:49 -0500
committerJeff Darcy <jdarcy@redhat.com>2016-02-13 05:13:07 -0800
commitc458433041aafb48ae6d6e5fcf3e1e737dc3fda3 (patch)
tree33a03ca0c1f5faf58419de2c4ff4532752ddfb07
parentda33097c3d6492e3b468b4347e47c70828fb4320 (diff)
experimental: add fdl (Full Data Logging) translator
NSR needs logging that is different than our existing changelog in several ways: * Full data, not just metadata * Pre-op, not post-op * High performance * Supports the concept of time-bounded "terms" Others (for example EC) might need the same thing. This patch adds such a translator. It also adds code to dump the resulting journals, and to replay them using syncops, plus (very rudimentary) tests for all of the above. Change-Id: I29680a1b4e0a9e7d5a8497fef302c46434b86636 Signed-off-by: Jeff Darcy <jdarcy@redhat.com> Reviewed-on: http://review.gluster.org/12450 Smoke: Gluster Build System <jenkins@build.gluster.com> CentOS-regression: Gluster Build System <jenkins@build.gluster.com> NetBSD-regression: NetBSD Build System <jenkins@build.gluster.org>
-rw-r--r--api/src/gfapi.aliases1
-rw-r--r--api/src/gfapi.map5
-rw-r--r--api/src/glfs.c12
-rw-r--r--configure.ac2
-rw-r--r--glusterfs.spec.in3
-rw-r--r--glusterfsd/src/glusterfsd.c41
-rw-r--r--libglusterfs/src/Makefile.am2
-rw-r--r--libglusterfs/src/call-stub.h5
-rwxr-xr-x[-rw-r--r--]libglusterfs/src/generator.py267
-rw-r--r--libglusterfs/src/iobuf.c2
-rw-r--r--libglusterfs/src/syscall.c2
-rw-r--r--tests/features/fdl-overflow.t74
-rw-r--r--tests/features/fdl.t52
-rw-r--r--tests/features/recon.t62
-rw-r--r--tests/include.rc6
-rw-r--r--xlators/experimental/Makefile.am2
-rw-r--r--xlators/experimental/fdl/Makefile.am3
-rw-r--r--xlators/experimental/fdl/src/Makefile.am42
-rw-r--r--xlators/experimental/fdl/src/dump-tmpl.c156
-rw-r--r--xlators/experimental/fdl/src/fdl-tmpl.c506
-rwxr-xr-xxlators/experimental/fdl/src/gen_dumper.py116
-rwxr-xr-xxlators/experimental/fdl/src/gen_fdl.py328
-rwxr-xr-xxlators/experimental/fdl/src/gen_recon.py191
-rw-r--r--xlators/experimental/fdl/src/jnl-types.h14
-rw-r--r--xlators/experimental/fdl/src/logdump.c50
-rw-r--r--xlators/experimental/fdl/src/recon-tmpl.c305
-rw-r--r--xlators/experimental/fdl/src/recon.c89
-rw-r--r--xlators/features/Makefile.am7
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c25
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c9
30 files changed, 2269 insertions, 110 deletions
diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases
index 40b6ed21192..7181dd2f6e8 100644
--- a/api/src/gfapi.aliases
+++ b/api/src/gfapi.aliases
@@ -140,3 +140,4 @@ _priv_glfs_resolve _glfs_resolve$GFAPI_PRIVATE_3.7.0
_priv_glfs_process_upcall_event _glfs_process_upcall_event$GFAPI_PRIVATE_3.7.0
_pub_glfs_h_lookupat _glfs_h_lookupat$GFAPI_3.7.4
+_pub_glfs_ipc_xd _glfs_ipc_xd@GFAPI_4.0.4
diff --git a/api/src/gfapi.map b/api/src/gfapi.map
index d42ae2b97af..b35984a088c 100644
--- a/api/src/gfapi.map
+++ b/api/src/gfapi.map
@@ -167,3 +167,8 @@ GFAPI_3.7.4 {
global:
glfs_h_lookupat;
} GFAPI_PRIVATE_3.7.0;
+
+GFAPI_4.0.0 {
+ global:
+ glfs_ipc_xd;
+} GFAPI_3.7.4;
diff --git a/api/src/glfs.c b/api/src/glfs.c
index b151936a6e8..037b579225f 100644
--- a/api/src/glfs.c
+++ b/api/src/glfs.c
@@ -1233,7 +1233,7 @@ invalid_fs:
GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volfile, 3.6.0);
int
-pub_glfs_ipc (struct glfs *fs, int opcode)
+pub_glfs_ipc_xd (struct glfs *fs, int opcode, dict_t *xd_in, dict_t **xd_out)
{
xlator_t *subvol = NULL;
int ret = -1;
@@ -1248,7 +1248,7 @@ pub_glfs_ipc (struct glfs *fs, int opcode)
goto out;
}
- ret = syncop_ipc (subvol, opcode, NULL, NULL);
+ ret = syncop_ipc (subvol, opcode, xd_in, xd_out);
DECODE_SYNCOP_ERR (ret);
out:
@@ -1259,4 +1259,12 @@ invalid_fs:
return ret;
}
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ipc_xd, 4.0.0);
+
+int
+pub_glfs_ipc (struct glfs *fs, int opcode)
+{
+ return pub_glfs_ipc_xd (fs, opcode, NULL, NULL);
+}
+
GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ipc, 3.7.0);
diff --git a/configure.ac b/configure.ac
index 5d2fe342b74..29e36648aac 100644
--- a/configure.ac
+++ b/configure.ac
@@ -117,6 +117,8 @@ AC_CONFIG_FILES([Makefile
xlators/features/Makefile
xlators/features/arbiter/Makefile
xlators/features/arbiter/src/Makefile
+ xlators/experimental/fdl/Makefile
+ xlators/experimental/fdl/src/Makefile
xlators/features/changelog/Makefile
xlators/features/changelog/src/Makefile
xlators/features/changelog/lib/Makefile
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index 53a65ae4fed..9f04bc37e10 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -952,6 +952,7 @@ fi
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/barrier.so
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/cdc.so
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changelog.so
+%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/experimental/fdl.so
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/gfid-access.so
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/read-only.so
%{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/shard.so
@@ -1217,6 +1218,8 @@ fi
/usr/lib/firewalld/services/glusterfs.xml
%endif
+%{_sbindir}/gf_logdump
+%{_sbindir}/gf_recon
%changelog
* Sat Jan 16 2016 Niels de Vos <ndevos@redhat.com>
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index bbaca1e7277..c980e7bc640 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -1199,6 +1199,26 @@ parse_opts (int key, char *arg, struct argp_state *state)
return 0;
}
+gf_boolean_t
+should_call_fini (glusterfs_ctx_t *ctx, xlator_t *trav)
+{
+ /* There's nothing to call, so the other checks don't matter. */
+ if (!trav->fini) {
+ return _gf_false;
+ }
+
+ /* This preserves previous behavior in glusterd. */
+ if (ctx->process_mode == GF_GLUSTERD_PROCESS) {
+ return _gf_true;
+ }
+
+ /* This is the only one known to be safe in glusterfsd. */
+ if (!strcmp(trav->type,"experimental/fdl")) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
void
cleanup_and_exit (int signum)
@@ -1271,20 +1291,17 @@ cleanup_and_exit (int signum)
/*call fini for glusterd xlator */
/* TODO : Invoke fini for rest of the xlators */
- if (ctx->process_mode == GF_GLUSTERD_PROCESS) {
-
- trav = NULL;
- if (ctx->active)
- trav = ctx->active->top;
- while (trav) {
- if (trav->fini) {
- THIS = trav;
- trav->fini (trav);
- }
- trav = trav->next;
+ trav = NULL;
+ if (ctx->active)
+ trav = ctx->active->top;
+ while (trav) {
+ if (should_call_fini(ctx,trav)) {
+ THIS = trav;
+ trav->fini (trav);
}
-
+ trav = trav->next;
}
+
exit(0);
}
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am
index 46e2e021134..c6d93c925ac 100644
--- a/libglusterfs/src/Makefile.am
+++ b/libglusterfs/src/Makefile.am
@@ -83,7 +83,7 @@ y.tab.h: graph.y
defaults.c: defaults-tmpl.c generator.py gen-defaults.py
$(PYTHON) $(srcdir)/gen-defaults.py $(srcdir)/defaults-tmpl.c > $@
-CLEANFILES = graph.lex.c y.tab.c y.tab.h defaults.c
+CLEANFILES = $(nodist_libglusterfs_la_SOURCES)
if UNITTEST
CLEANFILES += *.gcda *.gcno *_xunit.xml
diff --git a/libglusterfs/src/call-stub.h b/libglusterfs/src/call-stub.h
index 01621368ee9..82a49c1d7b9 100644
--- a/libglusterfs/src/call-stub.h
+++ b/libglusterfs/src/call-stub.h
@@ -17,12 +17,15 @@
#include "stack.h"
#include "list.h"
-typedef struct {
+typedef struct _call_stub {
struct list_head list;
char wind;
call_frame_t *frame;
glusterfs_fop_t fop;
struct mem_pool *stub_mem_pool; /* pointer to stub mempool in ctx_t */
+ uint32_t jnl_meta_len;
+ uint32_t jnl_data_len;
+ void (*serialize) (struct _call_stub *, char *, char *);
union {
fop_lookup_t lookup;
diff --git a/libglusterfs/src/generator.py b/libglusterfs/src/generator.py
index 5e8f6c29cd4..8be68337baa 100644..100755
--- a/libglusterfs/src/generator.py
+++ b/libglusterfs/src/generator.py
@@ -2,6 +2,65 @@
import string
+# ops format: 'fop-arg' name type stub-field [nosync]
+# 'cbk-arg' name type
+# 'extra' name type arg-str
+# 'journal' fop-type
+# 'link' inode iatt
+#
+# 'role' indicates the significance of this line to the code generator (sort of
+# our own type).
+#
+# For fop-arg, we first need to know the name and the type of the arg so that
+# we can generate SHORT_ARGS (for function calls) and LONG_ARGS (for
+# declarations). For code that uses stubs, we also need to know the name of
+# the stub field, which might be different than the argument itself. Lastly,
+# for code that uses syncops, we need to know whether whoever wrote the syncop
+# for this fop "forgot" to include this argument. (Editorial: this kind of
+# creeping inconsistency is why we should have used code generation for stubs
+# and syncops as well as defaults all along.) To address this need, we use the
+# optional 'nosync' field for arguments (e.g. mkdir.umask) that we should skip
+# in generated syncop code.
+#
+# 'cbk-arg' is like fop-arg but simpler and used for generating callbacks
+# instead of fop functions.
+#
+# 'extra' is also like fop-arg, but it's another hack for syncops. This time
+# the problem is that some of what would normally be *callback* arguments are
+# instead created in the caller and passed to the syncop. We handle that by
+# adding an entry at the appropriate place in the fop-arg list, with the name
+# and type to generate a declaration and an argument string to generate the
+# actual syncop call.
+#
+# The mere presence of a 'journal' item is sufficient for most of the journal
+# code to recognize that it should do something. However, reconciliation also
+# needs to decide how reconciliation builds the arguments it needs to call down
+# to the syncop layer, based on what's in the journal. To do that, we divide
+# ops into three types and store those types in the ops table. In general,
+# these three types work as follows.
+#
+# For an fd-op, the GFID in the journal is used (in loc.gfid) field to
+# look up an inode, then an anonymous fd is found/created for that inode.
+#
+# For an inode-op, the GFID in the journal is used the same way, but no fd
+# is needed.
+#
+# For an entry-op, the *parent* GFID and name from the journal are used to
+# look up an inode (via loc.pargfid and par.name respectively).
+#
+# The only places this seems to fall down is for link and create. In link,
+# which is generally an entry-op, the source is looked up as though it's an
+# inode-op. In create, we have an fd argument but it's really a return
+# argument so we get a fresh inode instead of looking one up. Those two cases
+# need to be handled as special cases in the reconciliation code.
+#
+# 'link' is (hopefully) the last of the journal/syncop hacks. Much like
+# 'extra', some values that are returned as callback arguments in the normal
+# case are handled differently for syncops. For syncops that create objects
+# (e.g. mkdir) we need to link those objects into our inode table. The 'inode'
+# and 'iatt' fields here give us the information we need to construct the
+# proper inode_link call(s).
+
ops = {}
ops['fgetxattr'] = (
@@ -13,19 +72,21 @@ ops['fgetxattr'] = (
)
ops['fsetxattr'] = (
- ('fop-arg', 'fd', 'fd_t *'),
- ('fop-arg', 'dict', 'dict_t *'),
- ('fop-arg', 'flags', 'int32_t'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'dict', 'dict_t *', 'xattr'),
+ ('fop-arg', 'flags', 'int32_t', 'flags'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
)
ops['setxattr'] = (
- ('fop-arg', 'loc', 'loc_t *'),
- ('fop-arg', 'dict', 'dict_t *'),
- ('fop-arg', 'flags', 'int32_t'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'dict', 'dict_t *', 'xattr'),
+ ('fop-arg', 'flags', 'int32_t', 'flags'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'inode-op'),
)
ops['statfs'] = (
@@ -73,16 +134,17 @@ ops['flush'] = (
)
ops['writev'] = (
- ('fop-arg', 'fd', 'fd_t *'),
- ('fop-arg', 'vector', 'struct iovec *'),
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'vector', 'struct iovec *', 'vector'),
('fop-arg', 'count', 'int32_t'),
- ('fop-arg', 'off', 'off_t'),
- ('fop-arg', 'flags', 'uint32_t'),
+ ('fop-arg', 'off', 'off_t', 'offset'),
+ ('fop-arg', 'flags', 'uint32_t', 'flags'),
('fop-arg', 'iobref', 'struct iobref *'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'prebuf', 'struct iatt *'),
('cbk-arg', 'postbuf', 'struct iatt *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
)
ops['readv'] = (
@@ -108,96 +170,111 @@ ops['open'] = (
)
ops['create'] = (
- ('fop-arg', 'loc', 'loc_t *'),
- ('fop-arg', 'flags', 'int32_t'),
- ('fop-arg', 'mode', 'mode_t'),
- ('fop-arg', 'umask', 'mode_t'),
- ('fop-arg', 'fd', 'fd_t *'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'flags', 'int32_t', 'flags'),
+ ('fop-arg', 'mode', 'mode_t', 'mode'),
+ ('fop-arg', 'umask', 'mode_t', 'umask', 'nosync'),
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'fd', 'fd_t *'),
('cbk-arg', 'inode', 'inode_t *'),
('cbk-arg', 'buf', 'struct iatt *'),
('cbk-arg', 'preparent', 'struct iatt *'),
('cbk-arg', 'postparent', 'struct iatt *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+ ('link', 'loc.inode', '&iatt'),
)
ops['link'] = (
- ('fop-arg', 'oldloc', 'loc_t *'),
- ('fop-arg', 'newloc', 'loc_t *'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'oldloc', 'loc_t *', 'loc'),
+ ('fop-arg', 'newloc', 'loc_t *', 'loc2'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'inode', 'inode_t *'),
('cbk-arg', 'buf', 'struct iatt *'),
('cbk-arg', 'preparent', 'struct iatt *'),
('cbk-arg', 'postparent', 'struct iatt *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
)
ops['rename'] = (
- ('fop-arg', 'oldloc', 'loc_t *'),
- ('fop-arg', 'newloc', 'loc_t *'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'oldloc', 'loc_t *', 'loc'),
+ ('fop-arg', 'newloc', 'loc_t *', 'loc2'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'buf', 'struct iatt *'),
('cbk-arg', 'preoldparent', 'struct iatt *'),
('cbk-arg', 'postoldparent','struct iatt *'),
('cbk-arg', 'prenewparent', 'struct iatt *'),
('cbk-arg', 'postnewparent','struct iatt *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
)
ops['symlink'] = (
- ('fop-arg', 'linkpath', 'const char *'),
- ('fop-arg', 'loc', 'loc_t *'),
- ('fop-arg', 'umask', 'mode_t'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'linkpath', 'const char *', 'linkname'),
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'umask', 'mode_t', 'mode', 'nosync'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'inode', 'inode_t *'),
('cbk-arg', 'buf', 'struct iatt *'),
('cbk-arg', 'preparent', 'struct iatt *'),
('cbk-arg', 'postparent', 'struct iatt *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
)
ops['rmdir'] = (
- ('fop-arg', 'loc', 'loc_t *'),
- ('fop-arg', 'flags', 'int32_t'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'flags', 'int32_t', 'flags'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'preparent', 'struct iatt *'),
('cbk-arg', 'postparent', 'struct iatt *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
)
ops['unlink'] = (
- ('fop-arg', 'loc', 'loc_t *'),
- ('fop-arg', 'flags', 'int32_t'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'flags', 'int32_t', 'flags', 'nosync'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'preparent', 'struct iatt *'),
('cbk-arg', 'postparent', 'struct iatt *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
)
ops['mkdir'] = (
- ('fop-arg', 'loc', 'loc_t *'),
- ('fop-arg', 'mode', 'mode_t'),
- ('fop-arg', 'umask', 'mode_t'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'mode', 'mode_t', 'mode'),
+ ('fop-arg', 'umask', 'mode_t', 'umask', 'nosync'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'inode', 'inode_t *'),
('cbk-arg', 'buf', 'struct iatt *'),
('cbk-arg', 'preparent', 'struct iatt *'),
('cbk-arg', 'postparent', 'struct iatt *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+ ('link', 'loc.inode', '&iatt'),
)
ops['mknod'] = (
- ('fop-arg', 'loc', 'loc_t *'),
- ('fop-arg', 'mode', 'mode_t'),
- ('fop-arg', 'rdev', 'dev_t'),
- ('fop-arg', 'umask', 'mode_t'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'mode', 'mode_t', 'mode'),
+ ('fop-arg', 'rdev', 'dev_t', 'rdev'),
+ ('fop-arg', 'umask', 'mode_t', 'umask', 'nosync'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'inode', 'inode_t *'),
('cbk-arg', 'buf', 'struct iatt *'),
('cbk-arg', 'preparent', 'struct iatt *'),
('cbk-arg', 'postparent', 'struct iatt *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
)
ops['readlink'] = (
@@ -217,12 +294,13 @@ ops['access'] = (
)
ops['ftruncate'] = (
- ('fop-arg', 'fd', 'fd_t *'),
- ('fop-arg', 'offset', 'off_t'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'offset', 'off_t', 'offset'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'prebuf', 'struct iatt *'),
('cbk-arg', 'postbuf', 'struct iatt *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
)
ops['getxattr'] = (
@@ -234,35 +312,39 @@ ops['getxattr'] = (
)
ops['xattrop'] = (
- ('fop-arg', 'loc', 'loc_t *'),
- ('fop-arg', 'flags', 'gf_xattrop_flags_t'),
- ('fop-arg', 'dict', 'dict_t *'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'flags', 'gf_xattrop_flags_t', 'optype'),
+ ('fop-arg', 'dict', 'dict_t *', 'xattr'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'dict', 'dict_t *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'inode-op'),
)
ops['fxattrop'] = (
- ('fop-arg', 'fd', 'fd_t *'),
- ('fop-arg', 'flags', 'gf_xattrop_flags_t'),
- ('fop-arg', 'dict', 'dict_t *'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'flags', 'gf_xattrop_flags_t', 'optype'),
+ ('fop-arg', 'dict', 'dict_t *', 'xattr'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'dict', 'dict_t *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
)
ops['removexattr'] = (
- ('fop-arg', 'loc', 'loc_t *'),
- ('fop-arg', 'name', 'const char *'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'name', 'const char *', 'name'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'inode-op'),
)
ops['fremovexattr'] = (
- ('fop-arg', 'fd', 'fd_t *'),
- ('fop-arg', 'name', 'const char *'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'name', 'const char *', 'name'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
)
ops['lk'] = (
@@ -341,22 +423,26 @@ ops['readdirp'] = (
)
ops['setattr'] = (
- ('fop-arg', 'loc', 'loc_t *'),
- ('fop-arg', 'stbuf', 'struct iatt *'),
- ('fop-arg', 'valid', 'int32_t'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'stbuf', 'struct iatt *', 'stat'),
+ ('fop-arg', 'valid', 'int32_t', 'valid'),
+ ('extra', 'preop', 'struct iatt', '&preop'),
+ ('extra', 'postop', 'struct iatt', '&postop'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'statpre', 'struct iatt *'),
('cbk-arg', 'statpost', 'struct iatt *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'inode-op'),
)
ops['truncate'] = (
- ('fop-arg', 'loc', 'loc_t *'),
- ('fop-arg', 'offset', 'off_t'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'offset', 'off_t', 'offset'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'prebuf', 'struct iatt *'),
('cbk-arg', 'postbuf', 'struct iatt *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'inode-op'),
)
ops['stat'] = (
@@ -378,45 +464,51 @@ ops['lookup'] = (
)
ops['fsetattr'] = (
- ('fop-arg', 'fd', 'fd_t *'),
- ('fop-arg', 'stbuf', 'struct iatt *'),
- ('fop-arg', 'valid', 'int32_t'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'stbuf', 'struct iatt *', 'stat'),
+ ('fop-arg', 'valid', 'int32_t', 'valid'),
+ ('extra', 'preop', 'struct iatt', '&preop'),
+ ('extra', 'postop', 'struct iatt', '&postop'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'statpre', 'struct iatt *'),
('cbk-arg', 'statpost', 'struct iatt *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
)
ops['fallocate'] = (
- ('fop-arg', 'fd', 'fd_t *'),
- ('fop-arg', 'keep_size', 'int32_t'),
- ('fop-arg', 'offset', 'off_t'),
- ('fop-arg', 'len', 'size_t'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'keep_size', 'int32_t', 'mode'),
+ ('fop-arg', 'offset', 'off_t', 'offset'),
+ ('fop-arg', 'len', 'size_t', 'size'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'pre', 'struct iatt *'),
('cbk-arg', 'post', 'struct iatt *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
)
ops['discard'] = (
- ('fop-arg', 'fd', 'fd_t *'),
- ('fop-arg', 'offset', 'off_t'),
- ('fop-arg', 'len', 'size_t'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'offset', 'off_t', 'offset'),
+ ('fop-arg', 'len', 'size_t', 'size'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'pre', 'struct iatt *'),
('cbk-arg', 'post', 'struct iatt *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
)
ops['zerofill'] = (
- ('fop-arg', 'fd', 'fd_t *'),
- ('fop-arg', 'offset', 'off_t'),
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'offset', 'off_t', 'offset'),
# As e.g. fallocate/discard (above) "len" should really be a size_t.
- ('fop-arg', 'len', 'off_t'),
- ('fop-arg', 'xdata', 'dict_t *'),
+ ('fop-arg', 'len', 'off_t', 'size'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
('cbk-arg', 'pre', 'struct iatt *'),
('cbk-arg', 'post', 'struct iatt *'),
('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
)
ops['ipc'] = (
@@ -460,6 +552,11 @@ def get_subs (names, types):
def generate (tmpl, name, subs):
text = tmpl.replace("@NAME@",name)
+ if name == "writev":
+ # More spurious inconsistency.
+ text = text.replace("@UPNAME@","WRITE")
+ else:
+ text = text.replace("@UPNAME@",name.upper())
for old, new in subs[name].iteritems():
text = text.replace(old,new)
# TBD: reindent/reformat the result for maximum readability.
diff --git a/libglusterfs/src/iobuf.c b/libglusterfs/src/iobuf.c
index a4d36691cd0..d1eb0acaf5e 100644
--- a/libglusterfs/src/iobuf.c
+++ b/libglusterfs/src/iobuf.c
@@ -1014,7 +1014,7 @@ int
iobref_merge (struct iobref *to, struct iobref *from)
{
int i = 0;
- int ret = -1;
+ int ret = 0;
struct iobuf *iobuf = NULL;
GF_VALIDATE_OR_GOTO ("iobuf", to, out);
diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c
index eb0c1cf983a..d412b4d656d 100644
--- a/libglusterfs/src/syscall.c
+++ b/libglusterfs/src/syscall.c
@@ -588,7 +588,7 @@ sys_fallocate(int fd, int mode, off_t offset, off_t len)
return posix_fallocate(fd, offset, len);
#endif
-#if defined(F_ALLOCATECONFIG) && defined(GF_DARWIN_HOST_OS)
+#if defined(F_ALLOCATECONTIG) && defined(GF_DARWIN_HOST_OS)
/* C conversion from C++ implementation for OSX by Mozilla Foundation */
if (mode) {
/* keep size not supported */
diff --git a/tests/features/fdl-overflow.t b/tests/features/fdl-overflow.t
new file mode 100644
index 00000000000..d7633a7ca7d
--- /dev/null
+++ b/tests/features/fdl-overflow.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+log_base=$($CLI --print-logdir)
+log_id=${B0}/${V0}-0
+log_id=${log_id:1} # Remove initial slash
+log_id=${log_id//\//-} # Replace remaining slashes with dashes
+
+_check_sizes () {
+ local n=0
+ local sz
+ local total_sz=0
+
+ # We don't care about the sizes of the meta files. That would be
+ # embedding too much of the implementation into the test.
+ n=$(ls ${log_base}/${log_id}-meta-*.jnl | wc -l)
+ [ $n = 2 ] || return 1
+
+ # We *do* care about the sizes of the data files, which should exactly
+ # reflect the amount of data written via dd.
+ n=0
+ while read sz name; do
+ G_LOG "found journal ${name} size ${sz}MB"
+ n=$((n+1))
+ total_sz=$((total_sz+sz))
+ done < <(du -sm ${log_base}/${log_id}-data-*.jnl)
+ [ $n = 2 ] || return 1
+ # On our CentOS and NetBSD regression-test systems, but not on my Fedora
+ # development system, each file ends up being slightly larger than its
+ # data size because of metadata, and 'du' rounds that up to a full extra
+ # megabyte. We'll allow either result, because what we're really
+ # looking for is a complete failure to roll over from one file to
+ # another at the appropriate size.
+ [ $total_sz = 20 -o $total_sz = $((n+20)) ] || return 1
+
+ return 0
+}
+
+check_sizes () {
+ set -x
+ _check_sizes
+ ret=$?
+ set +x
+ return ret
+}
+
+if [ x"$OSTYPE" = x"NetBSD" ]; then
+ CREAT_OFLAG="creat,"
+else
+ CREAT_OFLAG=""
+fi
+
+TEST rm -f ${log_base}/${log_id}-*.log
+TEST glusterd
+TEST pidof glusterd
+
+# Get a simple volume set up and mounted with FDL active.
+TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0
+TEST $CLI volume set $V0 changelog.changelog off
+TEST $CLI volume set $V0 features.fdl on
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+# Generate some I/O and unmount/stop so we can see log sizes.
+TEST dd if=/dev/zero of=$M0/twentyMB bs=1048576 count=20 \
+ oflag=${CREAT_OFLAG}sync
+TEST umount $M0
+TEST $CLI volume stop $V0
+
+TEST _check_sizes
+
+cleanup
diff --git a/tests/features/fdl.t b/tests/features/fdl.t
new file mode 100644
index 00000000000..34d6d78228a
--- /dev/null
+++ b/tests/features/fdl.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+log_base=$($CLI --print-logdir)
+log_id=${B0}/${V0}-0
+log_id=${log_id:1} # Remove initial slash
+log_id=${log_id//\//-} # Replace remaining slashes with dashes
+FDL_META_FILE=${log_base}/${log_id}-meta-1.jnl
+FDL_DATA_FILE=${log_base}/${log_id}-data-1.jnl
+
+check_logfile() {
+ [ $(gf_logdump $FDL_META_FILE $FDL_DATA_FILE | grep $1 | wc -l) -ge $2 ]
+}
+
+if [ x"$OSTYPE" = x"NetBSD" ]; then
+ CREAT_OFLAG="creat,"
+else
+ CREAT_OFLAG=""
+fi
+
+TEST rm -f $FDL_META_FILE $FDL_DATA_FILE
+TEST glusterd
+TEST pidof glusterd
+
+# Get a simple volume set up and mounted with FDL active.
+TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0
+TEST $CLI volume set $V0 changelog.changelog off
+TEST $CLI volume set $V0 features.fdl on
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+# Generate some I/O and unmount.
+TEST mkdir -p $M0/abc/def
+TEST dd if=/dev/zero of=$M0/abc/def/ghi bs=128 count=2 \
+ oflag=${CREAT_OFLAG}sync
+TEST chmod 314 $M0/abc/def/ghi
+TEST rm -rf $M0/abc
+TEST umount $M0
+
+# Check that gf_logdump works, and shows the ops we just issued. There will be
+# more SETATTR ops than the one corresponding to our chmod, because some are
+# issued internally. We have to guess a bit about where the log will be.
+TEST check_logfile GF_FOP_MKDIR 2
+TEST check_logfile GF_FOP_CREATE 1
+TEST check_logfile GF_FOP_WRITE 2
+TEST check_logfile GF_FOP_SETATTR 1
+TEST check_logfile GF_FOP_UNLINK 1
+TEST check_logfile GF_FOP_RMDIR 2
+
+cleanup
diff --git a/tests/features/recon.t b/tests/features/recon.t
new file mode 100644
index 00000000000..7dda2a680e8
--- /dev/null
+++ b/tests/features/recon.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+log_base=$($CLI --print-logdir)
+log_id=${B0}/${V0}-0
+log_id=${log_id:1} # Remove initial slash
+log_id=${log_id//\//-} # Replace remaining slashes with dashes
+FDL_META_FILE=${log_base}/${log_id}-meta-1.jnl
+FDL_DATA_FILE=${log_base}/${log_id}-data-1.jnl
+
+tmpdir=$(mktemp -d -t ${0##*/}.XXXXXX)
+trap "rm -rf $tmpdir" EXIT
+
+write_file () {
+ echo "peekaboo" > $1
+}
+
+TEST rm -f $FDL_META_FILE $FDL_DATA_FILE
+TEST glusterd
+TEST pidof glusterd
+
+# Get a simple volume set up and mounted with FDL active.
+TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0
+TEST $CLI volume set $V0 features.fdl on
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+# Generate some I/O and then copy off the journal files for later.
+TEST mkdir -p $M0/abc/def
+TEST write_file $M0/abc/def/ghi
+#EST chmod 314 $M0/abc/def/ghi
+cp ${FDL_META_FILE} ${FDL_DATA_FILE} ${tmpdir}
+
+# Get back to an empty state and unmount.
+TEST rm -rf $M0/abc
+TEST umount $M0
+
+# Make sure we really are in an empty state. Otherwise the tests below could
+# pass just because we never cleaned up in the first place.
+TEST [ ! -d ${B0}/${V0}-0/abc ]
+
+# Create a stub volfile.
+vol_file=${GLUSTERD_WORKDIR}/vols/${V0}/${V0}.${H0}.${log_id}.vol
+vol_id_line=$(grep volume-id ${vol_file})
+cat > ${tmpdir}/recon.vol << EOF
+volume recon-posix
+ type storage/posix
+ option directory ${B0}/${V0}-0
+${vol_id_line}
+end-volume
+EOF
+
+TEST gf_recon ${tmpdir}/recon.vol ${tmpdir}/$(basename ${FDL_META_FILE}) \
+ ${tmpdir}/$(basename ${FDL_DATA_FILE})
+
+TEST [ -d ${B0}/${V0}-0/abc/def ]
+EXPECT "peekaboo" cat ${B0}/${V0}-0/abc/def/ghi
+# TBD: test permissions, xattrs
+
+cleanup
diff --git a/tests/include.rc b/tests/include.rc
index 139bc03ac8c..21a69465797 100644
--- a/tests/include.rc
+++ b/tests/include.rc
@@ -136,7 +136,7 @@ function G_LOG()
return
fi
local g_log_string;
- g_log_string="++++++++++ G_LOG:$0: TEST: $1 $@ ++++++++++"
+ g_log_string="++++++++++ G_LOG:$0: TEST: $@ ++++++++++"
g_log_string="`date -u +["%F %T.%6N"]`:$g_log_string"
local g_log_filename
for g_log_filename in `find $g_log_logdir/ -type f -name \*.log`;
@@ -541,10 +541,10 @@ function cleanup()
fi >&2
# tar logs at the start and end of every test
- if [ -n $LOGDIR ]
+ if [ -n "$LOGDIR" -a -z "$STOP_WASTING_SPACE" ]
then
tarname=$(basename $0 .t)
- tar -rvf ${LOGDIR}/${tarname}.tar ${LOGDIR}/* \
+ tar -rf ${LOGDIR}/${tarname}.tar ${LOGDIR}/* \
--exclude="*.tar" \
&& \
find $LOGDIR/* -maxdepth 0 -name '*.tar' -prune \
diff --git a/xlators/experimental/Makefile.am b/xlators/experimental/Makefile.am
index 06f04a193c8..a31512203f6 100644
--- a/xlators/experimental/Makefile.am
+++ b/xlators/experimental/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = nsr-client nsr-server
+SUBDIRS = nsr-client nsr-server fdl
CLEANFILES =
diff --git a/xlators/experimental/fdl/Makefile.am b/xlators/experimental/fdl/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/experimental/fdl/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/experimental/fdl/src/Makefile.am b/xlators/experimental/fdl/src/Makefile.am
new file mode 100644
index 00000000000..a05fc797b0a
--- /dev/null
+++ b/xlators/experimental/fdl/src/Makefile.am
@@ -0,0 +1,42 @@
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/experimental
+xlator_LTLIBRARIES = fdl.la
+
+noinst_HEADERS = jnl-types.h
+
+nodist_fdl_la_SOURCES = fdl.c
+fdl_la_LDFLAGS = -module -avoid-version
+fdl_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+sbin_PROGRAMS = gf_logdump gf_recon
+gf_logdump_SOURCES = logdump.c
+nodist_gf_logdump_SOURCES = libfdl.c
+gf_logdump_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\
+ $(top_builddir)/api/src/libgfapi.la
+
+# Eventually recon(ciliation) code will move elsewhere, but for now it's
+# easier to have it next to the similar logdump code.
+gf_recon_SOURCES = recon.c
+nodist_gf_recon_SOURCES = librecon.c
+gf_recon_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la\
+ $(top_builddir)/api/src/libgfapi.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/api/src -fPIC \
+ -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \
+ -DDATADIR=\"$(localstatedir)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+noinst_PYTHON = gen_fdl.py gen_dumper.py gen_recon.py
+EXTRA_DIST = fdl-tmpl.c dump-tmpl.c recon-tmpl.c
+
+CLEANFILES = $(nodist_fdl_la_SOURCES) $(nodist_gf_logdump_SOURCES)
+
+fdl.c: fdl-tmpl.c gen_fdl.py
+ $(PYTHON) $(srcdir)/gen_fdl.py $(srcdir)/fdl-tmpl.c > $@
+
+libfdl.c: dump-tmpl.c gen_dumper.py
+ $(PYTHON) $(srcdir)/gen_dumper.py $(srcdir)/dump-tmpl.c > $@
+
+librecon.c: recon-tmpl.c gen_recon.py
+ $(PYTHON) $(srcdir)/gen_recon.py $(srcdir)/recon-tmpl.c > $@
diff --git a/xlators/experimental/fdl/src/dump-tmpl.c b/xlators/experimental/fdl/src/dump-tmpl.c
new file mode 100644
index 00000000000..cac1071a9c1
--- /dev/null
+++ b/xlators/experimental/fdl/src/dump-tmpl.c
@@ -0,0 +1,156 @@
+#pragma fragment PROLOG
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glfs.h"
+#include "iatt.h"
+#include "xlator.h"
+#include "jnl-types.h"
+
+#pragma fragment DICT
+ {
+ int key_len, data_len;
+ char *key_ptr;
+ printf ("@ARGNAME@ = dict {\n");
+ for (;;) {
+ key_len = *((int *)new_meta);
+ new_meta += sizeof(int);
+ if (!key_len) {
+ break;
+ }
+ key_ptr = new_meta;
+ new_meta += key_len;
+ data_len = *((int *)new_meta);
+ new_meta += sizeof(int) + data_len;
+ printf (" %s = <%d bytes>\n", key_ptr, data_len);
+ }
+ printf ("}\n");
+ }
+
+#pragma fragment DOUBLE
+ printf ("@ARGNAME@ = @FORMAT@\n", *((uint64_t *)new_meta),
+ *((uint64_t *)new_meta));
+ new_meta += sizeof(uint64_t);
+
+#pragma fragment GFID
+ printf ("@ARGNAME@ = <gfid %s>\n", uuid_utoa(*((uuid_t *)new_meta)));
+ new_meta += 16;
+
+#pragma fragment INTEGER
+ printf ("@ARGNAME@ = @FORMAT@\n", *((uint32_t *)new_meta),
+ *((uint32_t *)new_meta));
+ new_meta += sizeof(uint32_t);
+
+#pragma fragment LOC
+ printf ("@ARGNAME@ = loc {\n");
+ printf (" gfid = %s\n", uuid_utoa(*((uuid_t *)new_meta)));
+ new_meta += 16;
+ printf (" pargfid = %s\n", uuid_utoa(*((uuid_t *)new_meta)));
+ new_meta += 16;
+ if (*(new_meta++)) {
+ printf (" name = %s\n", new_meta);
+ new_meta += (strlen(new_meta) + 1);
+ }
+ printf ("}\n");
+
+#pragma fragment STRING
+ if (*(new_meta++)) {
+ printf ("@ARGNAME@ = %s\n", new_meta);
+ new_meta += (strlen(new_meta) + 1);
+ }
+
+#pragma fragment VECTOR
+ {
+ size_t len = *((size_t *)new_meta);
+ new_meta += sizeof(len);
+ printf ("@ARGNAME@ = <%zu bytes>\n", len);
+ new_data += len;
+ }
+
+#pragma fragment IATT
+ {
+ ia_prot_t *myprot = ((ia_prot_t *)new_meta);
+ printf ("@ARGNAME@ = iatt {\n");
+ printf (" ia_prot = %c%c%c",
+ myprot->suid ? 'S' : '-',
+ myprot->sgid ? 'S' : '-',
+ myprot->sticky ? 'T' : '-');
+ printf ("%c%c%c",
+ myprot->owner.read ? 'r' : '-',
+ myprot->owner.write ? 'w' : '-',
+ myprot->owner.exec ? 'x' : '-');
+ printf ("%c%c%c",
+ myprot->group.read ? 'r' : '-',
+ myprot->group.write ? 'w' : '-',
+ myprot->group.exec ? 'x' : '-');
+ printf ("%c%c%c\n",
+ myprot->other.read ? 'r' : '-',
+ myprot->other.write ? 'w' : '-',
+ myprot->other.exec ? 'x' : '-');
+ new_meta += sizeof(ia_prot_t);
+ uint32_t *myints = (uint32_t *)new_meta;
+ printf (" ia_uid = %u\n", myints[0]);
+ printf (" ia_gid = %u\n", myints[1]);
+ printf (" ia_atime = %u.%09u\n", myints[2], myints[3]);
+ printf (" ia_mtime = %u.%09u\n", myints[4], myints[5]);
+ new_meta += sizeof(*myints) * 6;
+ }
+
+#pragma fragment FOP
+void
+fdl_dump_@NAME@ (char **old_meta, char **old_data)
+{
+ char *new_meta = *old_meta;
+ char *new_data = *old_data;
+
+ /* TBD: word size/endianness */
+@FUNCTION_BODY@
+
+ *old_meta = new_meta;
+ *old_data = new_data;
+}
+
+#pragma fragment CASE
+ case GF_FOP_@UPNAME@:
+ printf ("=== GF_FOP_@UPNAME@\n");
+ fdl_dump_@NAME@ (&new_meta, &new_data);
+ break;
+
+#pragma fragment EPILOG
+int
+fdl_dump (char **old_meta, char **old_data)
+{
+ char *new_meta = *old_meta;
+ char *new_data = *old_data;
+ static glfs_t *fs = NULL;
+ int recognized = 1;
+ event_header_t *eh;
+
+ /*
+ * We don't really call anything else in GFAPI, but this is the most
+ * convenient way to satisfy all of the spurious dependencies on how it
+ * or glusterfsd initialize (e.g. setting up THIS).
+ */
+ if (!fs) {
+ fs = glfs_new ("dummy");
+ }
+
+ eh = (event_header_t *)new_meta;
+ new_meta += sizeof (*eh);
+
+ /* TBD: check event_type instead of assuming NEW_REQUEST */
+
+ switch (eh->fop_type) {
+@SWITCH_BODY@
+
+ default:
+ printf ("unknown fop %u\n", eh->fop_type);
+ recognized = 0;
+ }
+
+ *old_meta = new_meta;
+ *old_data = new_data;
+ return recognized;
+}
diff --git a/xlators/experimental/fdl/src/fdl-tmpl.c b/xlators/experimental/fdl/src/fdl-tmpl.c
new file mode 100644
index 00000000000..8fcc6a8d6ff
--- /dev/null
+++ b/xlators/experimental/fdl/src/fdl-tmpl.c
@@ -0,0 +1,506 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include "call-stub.h"
+#include "iatt.h"
+#include "defaults.h"
+#include "syscall.h"
+#include "xlator.h"
+#include "jnl-types.h"
+
+/* TBD: make tunable */
+#define META_FILE_SIZE (1 << 20)
+#define DATA_FILE_SIZE (1 << 24)
+
+enum gf_fdl {
+ gf_fdl_mt_fdl_private_t = gf_common_mt_end + 1,
+ gf_fdl_mt_end
+};
+
+typedef struct {
+ char *type;
+ off_t size;
+ char *path;
+ int fd;
+ void * ptr;
+ off_t max_offset;
+} log_obj_t;
+
+typedef struct {
+ struct list_head reqs;
+ pthread_mutex_t req_lock;
+ pthread_cond_t req_cond;
+ char *log_dir;
+ pthread_t worker;
+ gf_boolean_t should_stop;
+ gf_boolean_t change_term;
+ log_obj_t meta_log;
+ log_obj_t data_log;
+ int term;
+ int first_term;
+} fdl_private_t;
+
+void
+fdl_enqueue (xlator_t *this, call_stub_t *stub)
+{
+ fdl_private_t *priv = this->private;
+
+ pthread_mutex_lock (&priv->req_lock);
+ list_add_tail (&stub->list, &priv->reqs);
+ pthread_mutex_unlock (&priv->req_lock);
+
+ pthread_cond_signal (&priv->req_cond);
+}
+
+#pragma generate
+
+char *
+fdl_open_term_log (xlator_t *this, log_obj_t *obj, int term)
+{
+ fdl_private_t *priv = this->private;
+ int ret;
+ char * ptr = NULL;
+
+ /*
+ * Use .jnl instead of .log so that we don't get test info (mistakenly)
+ * appended to our journal files.
+ */
+ if (this->ctx->cmd_args.log_ident) {
+ ret = gf_asprintf (&obj->path, "%s/%s-%s-%d.jnl",
+ priv->log_dir, this->ctx->cmd_args.log_ident,
+ obj->type, term);
+ }
+ else {
+ ret = gf_asprintf (&obj->path, "%s/fubar-%s-%d.jnl",
+ priv->log_dir, obj->type, term);
+ }
+ if ((ret <= 0) || !obj->path) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to construct log-file path");
+ goto err;
+ }
+
+ gf_log (this->name, GF_LOG_INFO, "opening %s (size %ld)",
+ obj->path, obj->size);
+
+ obj->fd = open (obj->path, O_RDWR|O_CREAT|O_TRUNC, 0666);
+ if (obj->fd < 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to open log file (%s)", strerror(errno));
+ goto err;
+ }
+
+#if !defined(GF_BSD_HOST_OS)
+ /*
+ * NetBSD can just go die in a fire. Even though it claims to support
+ * fallocate/posix_fallocate they don't actually *do* anything so the
+ * file size remains zero. Then mmap succeeds anyway, but any access
+ * to the mmap'ed region will segfault. It would be acceptable for
+ * fallocate to do what it says, for mmap to fail, or for access to
+ * extend the file. NetBSD managed to hit the trifecta of Getting
+ * Everything Wrong, and debugging in that environment to get this far
+ * has already been painful enough (systems I worked on in 1990 were
+ * better that way). We'll fall through to the lseek/write method, and
+ * performance will be worse, and TOO BAD.
+ */
+ if (sys_fallocate(obj->fd,0,0,obj->size) < 0)
+#endif
+ {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to fallocate space for log file");
+ /* Have to do this the ugly page-faulty way. */
+ (void) sys_lseek (obj->fd, obj->size-1, SEEK_SET);
+ (void) sys_write (obj->fd, "", 1);
+ }
+
+ ptr = mmap (NULL, obj->size, PROT_WRITE, MAP_SHARED, obj->fd, 0);
+ if (ptr == MAP_FAILED) {
+ gf_log (this->name, GF_LOG_ERROR, "failed to mmap log (%s)",
+ strerror(errno));
+ goto err;
+ }
+
+ obj->ptr = ptr;
+ obj->max_offset = 0;
+ return ptr;
+
+err:
+ if (obj->fd >= 0) {
+ sys_close (obj->fd);
+ obj->fd = (-1);
+ }
+ if (obj->path) {
+ GF_FREE (obj->path);
+ obj->path = NULL;
+ }
+ return ptr;
+}
+
+void
+fdl_close_term_log (xlator_t *this, log_obj_t *obj)
+{
+ fdl_private_t *priv = this->private;
+
+ if (obj->ptr) {
+ (void) munmap (obj->ptr, obj->size);
+ obj->ptr = NULL;
+ }
+
+ if (obj->fd >= 0) {
+ gf_log (this->name, GF_LOG_INFO,
+ "truncating term %d %s journal to %ld",
+ priv->term, obj->type, obj->max_offset);
+ if (sys_ftruncate(obj->fd,obj->max_offset) < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to truncate journal (%s)",
+ strerror(errno));
+ }
+ sys_close (obj->fd);
+ obj->fd = (-1);
+ }
+
+ if (obj->path) {
+ GF_FREE (obj->path);
+ obj->path = NULL;
+ }
+}
+
+gf_boolean_t
+fdl_change_term (xlator_t *this, char **meta_ptr, char **data_ptr)
+{
+ fdl_private_t *priv = this->private;
+
+ fdl_close_term_log (this, &priv->meta_log);
+ fdl_close_term_log (this, &priv->data_log);
+
+ ++(priv->term);
+
+ *meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term);
+ if (!*meta_ptr) {
+ return _gf_false;
+ }
+
+ *data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term);
+ if (!*data_ptr) {
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+void *
+fdl_worker (void *arg)
+{
+ xlator_t *this = arg;
+ fdl_private_t *priv = this->private;
+ call_stub_t *stub;
+ char * meta_ptr = NULL;
+ off_t *meta_offset = &priv->meta_log.max_offset;
+ char * data_ptr = NULL;
+ off_t *data_offset = &priv->data_log.max_offset;
+ unsigned long base_as_ul;
+ void * msync_ptr;
+ size_t msync_len;
+ gf_boolean_t recycle;
+ void *err_label = &&err_unlocked;
+
+ priv->meta_log.type = "meta";
+ priv->meta_log.size = META_FILE_SIZE;
+ priv->meta_log.path = NULL;
+ priv->meta_log.fd = (-1);
+ priv->meta_log.ptr = NULL;
+
+ priv->data_log.type = "data";
+ priv->data_log.size = DATA_FILE_SIZE;
+ priv->data_log.path = NULL;
+ priv->data_log.fd = (-1);
+ priv->data_log.ptr = NULL;
+
+ /* TBD: initial term should come from persistent storage (e.g. etcd) */
+ priv->first_term = ++(priv->term);
+ meta_ptr = fdl_open_term_log (this, &priv->meta_log, priv->term);
+ if (!meta_ptr) {
+ goto *err_label;
+ }
+ data_ptr = fdl_open_term_log (this, &priv->data_log, priv->term);
+ if (!data_ptr) {
+ fdl_close_term_log (this, &priv->meta_log);
+ goto *err_label;
+ }
+
+ for (;;) {
+ pthread_mutex_lock (&priv->req_lock);
+ err_label = &&err_locked;
+ while (list_empty(&priv->reqs)) {
+ pthread_cond_wait (&priv->req_cond, &priv->req_lock);
+ if (priv->should_stop) {
+ goto *err_label;
+ }
+ if (priv->change_term) {
+ if (!fdl_change_term(this, &meta_ptr,
+ &data_ptr)) {
+ goto *err_label;
+ }
+ priv->change_term = _gf_false;
+ continue;
+ }
+ }
+ stub = list_entry (priv->reqs.next, call_stub_t, list);
+ list_del_init (&stub->list);
+ pthread_mutex_unlock (&priv->req_lock);
+ err_label = &&err_unlocked;
+ /*
+ * TBD: batch requests
+ *
+ * What we should do here is gather up *all* of the requests
+ * that have accumulated since we were last at this point,
+ * blast them all out in one big writev, and then dispatch them
+ * all before coming back for more. That maximizes throughput,
+ * at some cost to latency (due to queuing effects at the log
+ * stage). Note that we're likely to be above io-threads, so
+ * the dispatch itself will be parallelized (at further cost to
+ * latency). For now, we just do the simplest thing and handle
+ * one request all the way through before fetching the next.
+ *
+ * So, why mmap/msync instead of writev/fdatasync? Because it's
+ * faster. Much faster. So much faster that I half-suspect
+ * cheating, but it's more convenient for now than having to
+ * ensure that everything's page-aligned for O_DIRECT (the only
+ * alternative that still might avoid ridiculous levels of
+ * local-FS overhead).
+ *
+ * TBD: check that msync really does get our data to disk.
+ */
+ gf_log (this->name, GF_LOG_DEBUG,
+ "logging %u+%u bytes for op %d",
+ stub->jnl_meta_len, stub->jnl_data_len, stub->fop);
+ recycle = _gf_false;
+ if ((*meta_offset + stub->jnl_meta_len) > priv->meta_log.size) {
+ recycle = _gf_true;
+ }
+ if ((*data_offset + stub->jnl_data_len) > priv->data_log.size) {
+ recycle = _gf_true;
+ }
+ if (recycle && !fdl_change_term(this,&meta_ptr,&data_ptr)) {
+ goto *err_label;
+ }
+ meta_ptr = priv->meta_log.ptr;
+ data_ptr = priv->data_log.ptr;
+ gf_log (this->name, GF_LOG_DEBUG, "serializing to %p/%p",
+ meta_ptr + *meta_offset, data_ptr + *data_offset);
+ stub->serialize (stub, meta_ptr + *meta_offset,
+ data_ptr + *data_offset);
+ if (stub->jnl_meta_len > 0) {
+ base_as_ul = (unsigned long) (meta_ptr + *meta_offset);
+ msync_ptr = (void *) (base_as_ul & ~0x0fff);
+ msync_len = (size_t) (base_as_ul & 0x0fff);
+ if (msync (msync_ptr, msync_len+stub->jnl_meta_len,
+ MS_SYNC) < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to log request meta (%s)",
+ strerror(errno));
+ }
+ *meta_offset += stub->jnl_meta_len;
+ }
+ if (stub->jnl_data_len > 0) {
+ base_as_ul = (unsigned long) (data_ptr + *data_offset);
+ msync_ptr = (void *) (base_as_ul & ~0x0fff);
+ msync_len = (size_t) (base_as_ul & 0x0fff);
+ if (msync (msync_ptr, msync_len+stub->jnl_data_len,
+ MS_SYNC) < 0) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "failed to log request data (%s)",
+ strerror(errno));
+ }
+ *data_offset += stub->jnl_data_len;
+ }
+ call_resume (stub);
+ }
+
+err_locked:
+ pthread_mutex_unlock (&priv->req_lock);
+err_unlocked:
+ fdl_close_term_log (this, &priv->meta_log);
+ fdl_close_term_log (this, &priv->data_log);
+ return NULL;
+}
+
+int32_t
+fdl_ipc (call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ fdl_private_t *priv = this->private;
+ dict_t *tdict;
+ int32_t gt_err = EIO;
+
+ switch (op) {
+
+ case FDL_IPC_CHANGE_TERM:
+ gf_log (this->name, GF_LOG_INFO, "got CHANGE_TERM op");
+ priv->change_term = _gf_true;
+ pthread_cond_signal (&priv->req_cond);
+ STACK_UNWIND_STRICT (ipc, frame, 0, 0, NULL);
+ break;
+
+ case FDL_IPC_GET_TERMS:
+ gf_log (this->name, GF_LOG_INFO, "got GET_TERMS op");
+ tdict = dict_new ();
+ if (!tdict) {
+ gt_err = ENOMEM;
+ goto gt_done;
+ }
+ if (dict_set_int32(tdict,"first",priv->first_term) != 0) {
+ goto gt_done;
+ }
+ if (dict_set_int32(tdict,"last",priv->term) != 0) {
+ goto gt_done;
+ }
+ gt_err = 0;
+ gt_done:
+ if (gt_err) {
+ STACK_UNWIND_STRICT (ipc, frame, -1, gt_err, NULL);
+ } else {
+ STACK_UNWIND_STRICT (ipc, frame, 0, 0, tdict);
+ }
+ if (tdict) {
+ dict_unref (tdict);
+ }
+ break;
+
+ default:
+ STACK_WIND_TAIL (frame,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ipc,
+ op, xdata);
+ }
+
+ return 0;
+}
+
+int
+fdl_init (xlator_t *this)
+{
+ fdl_private_t *priv = NULL;
+
+ priv = GF_CALLOC (1, sizeof (*priv), gf_fdl_mt_fdl_private_t);
+ if (!priv) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to allocate fdl_private");
+ goto err;
+ }
+
+ INIT_LIST_HEAD (&priv->reqs);
+ if (pthread_mutex_init (&priv->req_lock, NULL) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to initialize req_lock");
+ goto err;
+ }
+ if (pthread_cond_init (&priv->req_cond, NULL) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to initialize req_cond");
+ goto err;
+ }
+
+ GF_OPTION_INIT ("log-path", priv->log_dir, path, err);
+
+ if (pthread_create(&priv->worker,NULL,fdl_worker,this) != 0) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "failed to start fdl_worker");
+ goto err;
+ }
+
+ /*
+ * The rest of the fop table is automatically generated, so this is a
+ * bit cleaner than messing with the generation to add a hand-written
+ * exception.
+ */
+ this->fops->ipc = fdl_ipc;
+
+ this->private = priv;
+ return 0;
+
+err:
+ if (priv) {
+ GF_FREE(priv);
+ }
+ return -1;
+}
+
+void
+fdl_fini (xlator_t *this)
+{
+ fdl_private_t *priv = this->private;
+
+ if (priv) {
+ priv->should_stop = _gf_true;
+ pthread_cond_signal (&priv->req_cond);
+ pthread_join (priv->worker, NULL);
+ GF_FREE(priv);
+ }
+}
+
+int
+fdl_reconfigure (xlator_t *this, dict_t *options)
+{
+ fdl_private_t *priv = this->private;
+
+ GF_OPTION_RECONF ("log_dir", priv->log_dir, options, path, out);
+ /* TBD: react if it changed */
+
+out:
+ return 0;
+}
+
+int32_t
+mem_acct_init (xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO ("fdl", this, out);
+
+ ret = xlator_mem_acct_init (this, gf_fdl_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
+ "failed");
+ return ret;
+ }
+out:
+ return ret;
+}
+
+class_methods_t class_methods = {
+ .init = fdl_init,
+ .fini = fdl_fini,
+ .reconfigure = fdl_reconfigure,
+ .notify = default_notify,
+};
+
+struct volume_options options[] = {
+ { .key = {"log-path"},
+ .type = GF_OPTION_TYPE_PATH,
+ .default_value = DEFAULT_LOG_FILE_DIRECTORY,
+ .description = "Directory for FDL files."
+ },
+ { .key = {NULL} },
+};
+
+struct xlator_cbks cbks = {
+ .release = default_release,
+ .releasedir = default_releasedir,
+ .forget = default_forget,
+};
diff --git a/xlators/experimental/fdl/src/gen_dumper.py b/xlators/experimental/fdl/src/gen_dumper.py
new file mode 100755
index 00000000000..42db55d2cb3
--- /dev/null
+++ b/xlators/experimental/fdl/src/gen_dumper.py
@@ -0,0 +1,116 @@
+#!/usr/bin/python
+
+import os
+import re
+import sys
+
+curdir = os.path.dirname (sys.argv[0])
+gendir = os.path.join (curdir, '../../../../libglusterfs/src')
+sys.path.append (gendir)
+from generator import ops, fop_subs, cbk_subs, generate
+
+# See the big header comment at the start of gen_fdl.py to see how the stages
+# fit together. The big difference here is that *all* of the C code is in the
+# template file as labelled fragments, instead of as Python strings. That
+# makes it much easier to edit in one place, with proper syntax highlighting
+# and indentation.
+#
+# Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of
+# LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE.
+#
+# Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and
+# FOP_TEMPLATE. The expanded FOP code (including FUNCTION_BODY substitution
+# in the middle of each function) is emitted immediately; the expanded CASE
+# code is saved for the next stage.
+#
+# Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code
+# in the middle of EPILOG, to generate the whole output file.
+#
+# Another way of looking at it is to consider how the fragments appear in
+# the final output:
+#
+# PROLOG
+# FOP (expanded for CREATE)
+# FOP before FUNCTION_BODY
+# LOC, INTEGER, GFID, etc. (one per arg, by type)
+# FOP after FUNCTION_BODY
+# FOP (expanded for WRITEV)
+# FOP before FUNCTION_BODY
+# GFID, VECTOR, etc. (on per arg, by type)
+# FOP after FUNCTION_BODY
+# (more FOPs)
+# EPILOG
+# EPILOG before CASE
+# CASE statements (one per fop)
+# EPILOG after CASE
+
+typemap = {
+ 'dict_t *': ( "DICT", ""),
+ 'fd_t *': ( "GFID", ""),
+ 'dev_t': ( "DOUBLE", "%ld (0x%lx)"),
+ 'gf_xattrop_flags_t': ( "INTEGER", "%d (0x%x)"),
+ 'int32_t': ( "INTEGER", "%d (0x%x)"),
+ 'mode_t': ( "INTEGER", "%d (0x%x)"),
+ 'off_t': ( "DOUBLE", "%ld (0x%lx)"),
+ 'size_t': ( "DOUBLE", "%ld (0x%lx)"),
+ 'uint32_t': ( "INTEGER", "%d (0x%x)"),
+ 'loc_t *': ( "LOC", ""),
+ 'const char *': ( "STRING", ""),
+ 'struct iovec *': ( "VECTOR", ""),
+ 'struct iatt *': ( "IATT", ""),
+}
+
+def get_special_subs (args):
+ code = ""
+ for arg in args:
+ if (arg[0] != 'fop-arg') or (len(arg) < 4):
+ continue
+ recon_type, recon_fmt = typemap[arg[2]]
+ code += fragments[recon_type].replace("@ARGNAME@",arg[3]) \
+ .replace("@FORMAT@",recon_fmt)
+ return code
+
+def gen_functions ():
+ code = ""
+ for name, value in ops.iteritems():
+ if "journal" not in [ x[0] for x in value ]:
+ continue
+ fop_subs[name]["@FUNCTION_BODY@"] = get_special_subs(value)
+ # Print the FOP fragment with @FUNCTION_BODY@ in the middle.
+ code += generate(fragments["FOP"],name,fop_subs)
+ return code
+
+def gen_cases ():
+ code = ""
+ for name, value in ops.iteritems():
+ if "journal" not in [ x[0] for x in value ]:
+ continue
+ # Add the CASE fragment for this fop.
+ code += generate(fragments["CASE"],name,fop_subs)
+ return code
+
+def load_fragments (path="recon-tmpl.c"):
+ pragma_re = re.compile('pragma fragment (.*)')
+ cur_symbol = None
+ cur_value = ""
+ result = {}
+ for line in open(path,"r").readlines():
+ m = pragma_re.search(line)
+ if m:
+ if cur_symbol:
+ result[cur_symbol] = cur_value
+ cur_symbol = m.group(1)
+ cur_value = ""
+ else:
+ cur_value += line
+ if cur_symbol:
+ result[cur_symbol] = cur_value
+ return result
+
+if __name__ == "__main__":
+ fragments = load_fragments(sys.argv[1])
+ print "/* BEGIN GENERATED CODE - DO NOT MODIFY */"
+ print fragments["PROLOG"]
+ print gen_functions()
+ print fragments["EPILOG"].replace("@SWITCH_BODY@",gen_cases())
+ print "/* END GENERATED CODE */"
diff --git a/xlators/experimental/fdl/src/gen_fdl.py b/xlators/experimental/fdl/src/gen_fdl.py
new file mode 100755
index 00000000000..7f6b1aaaeaa
--- /dev/null
+++ b/xlators/experimental/fdl/src/gen_fdl.py
@@ -0,0 +1,328 @@
+#!/usr/bin/python
+
+import os
+import sys
+
+curdir = os.path.dirname (sys.argv[0])
+gendir = os.path.join (curdir, '../../../../libglusterfs/src')
+sys.path.append (gendir)
+from generator import ops, fop_subs, cbk_subs, generate
+
+# Generation occurs in three stages. In this case, it actually makes more
+# sense to discuss them in the *opposite* order of that in which they
+# actually happen.
+#
+# Stage 3 is to insert all of the generated code into a file, replacing the
+# "#pragma generate" that's already there. The file can thus contain all
+# sorts of stuff that's not specific to one fop, either before or after the
+# generated code as appropriate.
+#
+# Stage 2 is to generate all of the code *for a particular fop*, using a
+# string-valued template plus a table of substitution values. Most of these
+# are built in to the generator itself. However, we also add a couple that
+# are specific to this particular translator - LEN_CODE and SER_CODE. These
+# are per-fop functions to get the length or the contents (respectively) of
+# what we'll put in the log. As with stage 3 allowing per-file boilerplate
+# before and after generated code, this allows per-fop boilerplate before and
+# after generated code.
+#
+# Stage 1, therefore, is to create the LEN_CODE and SER_CODE substitutions for
+# each fop, and put them in the same table where e.g. NAME and SHORT_ARGS
+# already are. We do this by looking at the fop-description table in the
+# generator module, then doing out own template substitution to plug each
+# specific argument name into another string-valued template.
+#
+# So, what does this leave us with in terms of variables and files?
+#
+# For stage 1, we have a series of LEN_*_TEMPLATE and SERLZ_*_TEMPLATE
+# strings, which are used to generate the length and serialization code for
+# each argument type.
+#
+# For stage 2, we have a bunch of *_TEMPLATE strings (no LEN_ or SERLZ_
+# prefix), which are used (along with the output from stage 1) to generate
+# whole functions.
+#
+# For stage 3, we have a whole separate file (fdl_tmpl.c) into which we insert
+# the collection of all functions defined in stage 2.
+
+
+LEN_TEMPLATE = """
+void
+fdl_len_@NAME@ (call_stub_t *stub)
+{
+ uint32_t meta_len = sizeof (event_header_t);
+ uint32_t data_len = 0;
+
+ /* TBD: global stuff, e.g. uid/gid */
+@LEN_CODE@
+
+ /* TBD: pad extension length */
+ stub->jnl_meta_len = meta_len;
+ stub->jnl_data_len = data_len;
+}
+"""
+
+SER_TEMPLATE = """
+void
+fdl_serialize_@NAME@ (call_stub_t *stub, char *meta_buf, char *data_buf)
+{
+ event_header_t *eh;
+ unsigned long offset = 0;
+
+ /* TBD: word size/endianness */
+ eh = (event_header_t *)meta_buf;
+ eh->event_type = NEW_REQUEST;
+ eh->fop_type = GF_FOP_@UPNAME@;
+ eh->request_id = 0; // TBD
+ meta_buf += sizeof (*eh);
+@SER_CODE@
+ /* TBD: pad extension length */
+ eh->ext_length = offset;
+}
+"""
+
+CBK_TEMPLATE = """
+int32_t
+fdl_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ @LONG_ARGS@)
+{
+ STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+CONTINUE_TEMPLATE = """
+int32_t
+fdl_@NAME@_continue (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ STACK_WIND (frame, fdl_@NAME@_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+
+"""
+
+FOP_TEMPLATE = """
+int32_t
+fdl_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ call_stub_t *stub;
+
+ stub = fop_@NAME@_stub (frame, default_@NAME@,
+ @SHORT_ARGS@);
+ fdl_len_@NAME@ (stub);
+ stub->serialize = fdl_serialize_@NAME@;
+ fdl_enqueue (this, stub);
+
+ return 0;
+}
+"""
+
+LEN_DICT_TEMPLATE = """
+ if (@SRC@) {
+ data_pair_t *memb;
+ for (memb = @SRC@->members_list; memb; memb = memb->next) {
+ meta_len += sizeof(int);
+ meta_len += strlen(memb->key) + 1;
+ meta_len += sizeof(int);
+ meta_len += memb->value->len;
+ }
+ }
+ meta_len += sizeof(int);
+"""
+
+LEN_GFID_TEMPLATE = """
+ meta_len += 16;
+"""
+
+LEN_INTEGER_TEMPLATE = """
+ meta_len += sizeof (@SRC@);
+"""
+
+# 16 for gfid, 16 for pargfid, 1 for flag, 0/1 for terminating NUL
+LEN_LOC_TEMPLATE = """
+ if (@SRC@.name) {
+ meta_len += (strlen (@SRC@.name) + 34);
+ } else {
+ meta_len += 33;
+ }
+"""
+
+LEN_STRING_TEMPLATE = """
+ if (@SRC@) {
+ meta_len += (strlen (@SRC@) + 1);
+ } else {
+ meta_len += 1;
+ }
+"""
+
+LEN_VECTOR_TEMPLATE = """
+ meta_len += sizeof(size_t);
+ data_len += iov_length (@VEC@, @CNT@);
+"""
+
+LEN_IATT_TEMPLATE = """
+ meta_len += sizeof(@SRC@.ia_prot);
+ meta_len += sizeof(@SRC@.ia_uid);
+ meta_len += sizeof(@SRC@.ia_gid);
+ meta_len += sizeof(@SRC@.ia_atime);
+ meta_len += sizeof(@SRC@.ia_atime_nsec);
+ meta_len += sizeof(@SRC@.ia_mtime);
+ meta_len += sizeof(@SRC@.ia_mtime_nsec);
+"""
+
+SERLZ_DICT_TEMPLATE = """
+ if (@SRC@) {
+ data_pair_t *memb;
+ for (memb = @SRC@->members_list; memb; memb = memb->next) {
+ *((int *)(meta_buf+offset)) = strlen(memb->key) + 1;
+ offset += sizeof(int);
+ strcpy (meta_buf+offset, memb->key);
+ offset += strlen(memb->key) + 1;
+ *((int *)(meta_buf+offset)) = memb->value->len;
+ offset += sizeof(int);
+ memcpy (meta_buf+offset, memb->value->data, memb->value->len);
+ offset += memb->value->len;
+ }
+ }
+ *((int *)(meta_buf+offset)) = 0;
+ offset += sizeof(int);
+"""
+
+SERLZ_GFID_TEMPLATE = """
+ memcpy (meta_buf+offset, @SRC@->inode->gfid, 16);
+ offset += 16;
+"""
+
+SERLZ_INTEGER_TEMPLATE = """
+ memcpy (meta_buf+offset, &@SRC@, sizeof(@SRC@));
+ offset += sizeof(@SRC@);
+"""
+
+SERLZ_LOC_TEMPLATE = """
+ memcpy (meta_buf+offset, @SRC@.gfid, 16);
+ offset += 16;
+ memcpy (meta_buf+offset, @SRC@.pargfid, 16);
+ offset += 16;
+ if (@SRC@.name) {
+ *(meta_buf+offset) = 1;
+ ++offset;
+ strcpy (meta_buf+offset, @SRC@.name);
+ offset += (strlen (@SRC@.name) + 1);
+ } else {
+ *(meta_buf+offset) = 0;
+ ++offset;
+ }
+"""
+
+SERLZ_STRING_TEMPLATE = """
+ if (@SRC@) {
+ *(meta_buf+offset) = 1;
+ ++offset;
+ strcpy (meta_buf+offset, @SRC@);
+ offset += strlen(@SRC@);
+ } else {
+ *(meta_buf+offset) = 0;
+ ++offset;
+ }
+"""
+
+SERLZ_VECTOR_TEMPLATE = """
+ *((size_t *)(meta_buf+offset)) = iov_length (@VEC@, @CNT@);
+ offset += sizeof(size_t);
+ int32_t i;
+ for (i = 0; i < @CNT@; ++i) {
+ memcpy (data_buf, @VEC@[i].iov_base, @VEC@[i].iov_len);
+ data_buf += @VEC@[i].iov_len;
+ }
+"""
+
+# We don't need to save all of the fields - only those affected by chown,
+# chgrp, chmod, and utime.
+SERLZ_IATT_TEMPLATE = """
+ *((ia_prot_t *)(meta_buf+offset)) = @SRC@.ia_prot;
+ offset += sizeof(@SRC@.ia_prot);
+ *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_uid;
+ offset += sizeof(@SRC@.ia_uid);
+ *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_gid;
+ offset += sizeof(@SRC@.ia_gid);
+ *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime;
+ offset += sizeof(@SRC@.ia_atime);
+ *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_atime_nsec;
+ offset += sizeof(@SRC@.ia_atime_nsec);
+ *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime;
+ offset += sizeof(@SRC@.ia_mtime);
+ *((uint32_t *)(meta_buf+offset)) = @SRC@.ia_mtime_nsec;
+ offset += sizeof(@SRC@.ia_mtime_nsec);
+"""
+
+typemap = {
+ 'dict_t *': ( LEN_DICT_TEMPLATE, SERLZ_DICT_TEMPLATE),
+ 'fd_t *': ( LEN_GFID_TEMPLATE, SERLZ_GFID_TEMPLATE),
+ 'dev_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
+ 'gf_xattrop_flags_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
+ 'int32_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
+ 'mode_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
+ 'off_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
+ 'size_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
+ 'uint32_t': ( LEN_INTEGER_TEMPLATE, SERLZ_INTEGER_TEMPLATE),
+ 'loc_t *': ( LEN_LOC_TEMPLATE, SERLZ_LOC_TEMPLATE),
+ 'const char *': ( LEN_STRING_TEMPLATE, SERLZ_STRING_TEMPLATE),
+ 'struct iatt *': ( LEN_IATT_TEMPLATE, SERLZ_IATT_TEMPLATE),
+}
+
+def get_special_subs (args):
+ len_code = ""
+ ser_code = ""
+ for arg in args:
+ if (arg[0] != 'fop-arg') or (len(arg) < 4):
+ continue
+ # Let this throw an exception if we get an unknown field name. The
+ # broken build will remind whoever messed with the stub code that a
+ # corresponding update is needed here.
+ if arg[3] == "vector":
+ # Make it as obvious as possible that this is a special case.
+ len_code += LEN_VECTOR_TEMPLATE \
+ .replace("@VEC@","stub->args.vector") \
+ .replace("@CNT@","stub->args.count")
+ ser_code += SERLZ_VECTOR_TEMPLATE \
+ .replace("@VEC@","stub->args.vector") \
+ .replace("@CNT@","stub->args.count")
+ else:
+ len_tmpl, ser_tmpl = typemap[arg[2]]
+ src = "stub->args.%s" % arg[3]
+ len_code += len_tmpl.replace("@SRC@",src)
+ ser_code += ser_tmpl.replace("@SRC@",src)
+ return len_code, ser_code
+
+def gen_fdl ():
+ entrypoints = []
+ for name, value in ops.iteritems():
+ if "journal" not in [ x[0] for x in value ]:
+ continue
+ len_code, ser_code = get_special_subs(value)
+ fop_subs[name]["@LEN_CODE@"] = len_code[:-1]
+ fop_subs[name]["@SER_CODE@"] = ser_code[:-1]
+ print generate(LEN_TEMPLATE,name,fop_subs)
+ print generate(SER_TEMPLATE,name,fop_subs)
+ print generate(CBK_TEMPLATE,name,cbk_subs)
+ print generate(CONTINUE_TEMPLATE,name,fop_subs)
+ print generate(FOP_TEMPLATE,name,fop_subs)
+ entrypoints.append(name)
+ print "struct xlator_fops fops = {"
+ for ep in entrypoints:
+ print "\t.%s = fdl_%s," % (ep, ep)
+ print "};"
+
+for l in open(sys.argv[1],'r').readlines():
+ if l.find('#pragma generate') != -1:
+ print "/* BEGIN GENERATED CODE - DO NOT MODIFY */"
+ gen_fdl()
+ print "/* END GENERATED CODE */"
+ else:
+ print l[:-1]
diff --git a/xlators/experimental/fdl/src/gen_recon.py b/xlators/experimental/fdl/src/gen_recon.py
new file mode 100755
index 00000000000..26318f92d88
--- /dev/null
+++ b/xlators/experimental/fdl/src/gen_recon.py
@@ -0,0 +1,191 @@
+#!/usr/bin/python
+
+import os
+import re
+import string
+import sys
+
+curdir = os.path.dirname (sys.argv[0])
+gendir = os.path.join (curdir, '../../../../libglusterfs/src')
+sys.path.append (gendir)
+from generator import ops, fop_subs, cbk_subs, generate
+
+# See the big header comment at the start of gen_fdl.py to see how the stages
+# fit together. The big difference here is that *all* of the C code is in the
+# template file as labelled fragments, instead of as Python strings. That
+# makes it much easier to edit in one place, with proper syntax highlighting
+# and indentation.
+#
+# Stage 1 uses type-specific fragments to generate FUNCTION_BODY, instead of
+# LEN_*_TEMPLATE and SERLZ_*_TEMPLATE to generate LEN_CODE and SER_CODE.
+#
+# Stage 2 uses the FOP and CASE fragments instead of RECON_TEMPLATE and
+# FOP_TEMPLATE. The expanded FOP code (including FUNCTION_BODY substitution
+# in the middle of each function) is emitted immediately; the expanded CASE
+# code is saved for the next stage.
+#
+# Stage 3 uses the PROLOG and EPILOG fragments, with the expanded CASE code
+# in the middle of EPILOG, to generate the whole output file.
+#
+# Another way of looking at it is to consider how the fragments appear in
+# the final output:
+#
+# PROLOG
+# FOP (expanded for CREATE)
+# FOP before FUNCTION_BODY
+# LOC, INTEGER, GFID, etc. (one per arg, by type)
+# FOP after FUNCTION_BODY
+# FOP (expanded for WRITEV)
+# FOP before FUNCTION_BODY
+# GFID, VECTOR, etc. (one per arg, by type)
+# FOP after FUNCTION_BODY
+# (more FOPs)
+# EPILOG
+# EPILOG before CASE
+# CASE statements (one per fop)
+# EPILOG after CASE
+
+typemap = {
+ 'dict_t *': "DICT",
+ 'fd_t *': "FD",
+ 'dev_t': "DOUBLE",
+ 'gf_xattrop_flags_t': "INTEGER",
+ 'int32_t': "INTEGER",
+ 'mode_t': "INTEGER",
+ 'off_t': "DOUBLE",
+ 'size_t': "DOUBLE",
+ 'uint32_t': "INTEGER",
+ 'loc_t *': "LOC",
+ 'const char *': "STRING",
+ 'struct iovec *': "VECTOR",
+ 'struct iatt *': "IATT",
+ 'struct iobref *': "IOBREF",
+}
+
+def get_special_subs (name, args, fop_type):
+ code = ""
+ cleanups = ""
+ links = ""
+ s_args = []
+ for arg in args:
+ if arg[0] == 'extra':
+ code += "\t%s %s;\n\n" % (arg[2], arg[1])
+ s_args.append(arg[3])
+ continue
+ if arg[0] == 'link':
+ links += fragments["LINK"].replace("@INODE_ARG@",arg[1]) \
+ .replace("@IATT_ARG@",arg[2])
+ continue
+ if arg[0] != 'fop-arg':
+ continue
+ if (name, arg[1]) == ('writev', 'count'):
+ # Special case: just skip this. We can't mark it as 'nosync'
+ # because of the way the translator and dumper generators look for
+ # that after 'stub-name' which we don't define. Instead of adding a
+ # bunch of generic infrastructure for this one case, just pound it
+ # here.
+ continue
+ recon_type = typemap[arg[2]]
+ # print "/* %s.%s => %s (%s)*/" % (name, arg[1], recon_type, fop_type)
+ if (name == "create") and (arg[1] == "fd"):
+ # Special case: fd for create is new, not looked up.
+ # print "/* change to NEW_FD */"
+ recon_type = "NEW_FD"
+ elif (recon_type == "LOC") and (fop_type == "entry-op"):
+ # Need to treat this differently for inode vs. entry ops.
+ # Special case: link source is treated like inode-op.
+ if (name != "link") or (arg[1] != "oldloc"):
+ # print "/* change to PARENT_LOC */"
+ recon_type = "PARENT_LOC"
+ code += fragments[recon_type].replace("@ARGNAME@",arg[1]) \
+ .replace("@ARGTYPE@",arg[2])
+ cleanup_key = recon_type + "_CLEANUP"
+ if fragments.has_key(cleanup_key):
+ cleanups += fragments[cleanup_key].replace("@ARGNAME@",arg[1])
+ if 'nosync' in arg[4:]:
+ code += "\t(void)%s;\n" % arg[1];
+ continue
+ if arg[2] in ("loc_t *", "struct iatt *"):
+ # These are passed as pointers to the syncop, but they're actual
+ # structures in the generated code.
+ s_args.append("&"+arg[1]);
+ else:
+ s_args.append(arg[1])
+ # We have to handle a couple of special cases here, because some n00b
+ # defined the syncops with a different argument order than the fops they're
+ # based on.
+ if name == 'writev':
+ # Swap 'flags' and 'iobref'. Also, we need to add the iov count, which
+ # is not stored in or read from the journal. There are other ways to
+ # do that, but this is the only place we need anything similar and we
+ # already have to treat it as a special case so this is simplest.
+ s_args_str = 'fd, &vector, 1, off, iobref, flags, xdata'
+ elif name == 'symlink':
+ # Swap 'linkpath' and 'loc'.
+ s_args_str = '&loc, linkpath, &iatt, xdata'
+ else:
+ s_args_str = string.join (s_args, ", ")
+ return code, links, s_args_str, cleanups
+
+# TBD: probably need to generate type-specific cleanup code as well - e.g.
+# fd_unref for an fd_t, loc_wipe for a loc_t, and so on. All of these
+# generated CLEANUP fragments will go at the end of the function, with goto
+# labels. Meanwhile, the error-checking part of each type-specific fragment
+# (e.g. LOC or FD) will need to update the indirect label that we jump to when
+# an error is detected. This will probably get messy.
+def gen_functions ():
+ code = ""
+ for name, value in ops.iteritems():
+ fop_type = [ x[1] for x in value if x[0] == "journal" ]
+ if not fop_type:
+ continue
+ body, links, syncop_args, cleanups = get_special_subs (name, value,
+ fop_type[0])
+ fop_subs[name]["@FUNCTION_BODY@"] = body
+ fop_subs[name]["@LINKS@"] = links
+ fop_subs[name]["@SYNCOP_ARGS@"] = syncop_args
+ fop_subs[name]["@CLEANUPS@"] = cleanups
+ if name == "writev":
+ # Take advantage of the fact that, *during reconciliation*, the
+ # vector is always a single element. In normal I/O it's not.
+ fop_subs[name]["@SUCCESS_VALUE@"] = "vector.iov_len"
+ else:
+ fop_subs[name]["@SUCCESS_VALUE@"] = "GFAPI_SUCCESS"
+ # Print the FOP fragment with @FUNCTION_BODY@ in the middle.
+ code += generate(fragments["FOP"],name,fop_subs)
+ return code
+
+def gen_cases ():
+ code = ""
+ for name, value in ops.iteritems():
+ if "journal" not in [ x[0] for x in value ]:
+ continue
+ # Add the CASE fragment for this fop.
+ code += generate(fragments["CASE"],name,fop_subs)
+ return code
+
+def load_fragments (path="recon-tmpl.c"):
+ pragma_re = re.compile('pragma fragment (.*)')
+ cur_symbol = None
+ cur_value = ""
+ result = {}
+ for line in open(path,"r").readlines():
+ m = pragma_re.search(line)
+ if m:
+ if cur_symbol:
+ result[cur_symbol] = cur_value
+ cur_symbol = m.group(1)
+ cur_value = ""
+ else:
+ cur_value += line
+ if cur_symbol:
+ result[cur_symbol] = cur_value
+ return result
+
+if __name__ == "__main__":
+ fragments = load_fragments(sys.argv[1])
+ print "/* BEGIN GENERATED CODE - DO NOT MODIFY */"
+ print fragments["PROLOG"]
+ print gen_functions()
+ print fragments["EPILOG"].replace("@SWITCH_BODY@",gen_cases())
+ print "/* END GENERATED CODE */"
diff --git a/xlators/experimental/fdl/src/jnl-types.h b/xlators/experimental/fdl/src/jnl-types.h
new file mode 100644
index 00000000000..8cb39d01a25
--- /dev/null
+++ b/xlators/experimental/fdl/src/jnl-types.h
@@ -0,0 +1,14 @@
+#define NEW_REQUEST (uint8_t)'N'
+
+typedef struct {
+ uint8_t event_type; /* e.g. NEW_REQUEST */
+ uint8_t fop_type; /* e.g. GF_FOP_SETATTR */
+ uint16_t request_id;
+ uint32_t ext_length;
+} event_header_t;
+
+enum {
+ FDL_IPC_BASE = 0xfeedbee5, /* ... and they make honey */
+ FDL_IPC_CHANGE_TERM,
+ FDL_IPC_GET_TERMS,
+};
diff --git a/xlators/experimental/fdl/src/logdump.c b/xlators/experimental/fdl/src/logdump.c
new file mode 100644
index 00000000000..7c979c32a04
--- /dev/null
+++ b/xlators/experimental/fdl/src/logdump.c
@@ -0,0 +1,50 @@
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+extern int fdl_dump (char **, char **);
+
+int
+main (int argc, char **argv)
+{
+ int meta_fd = (-1);
+ char *meta_buf = NULL;
+ int data_fd = (-1);
+ char *data_buf = NULL;
+
+ meta_fd = open (argv[1], O_RDONLY);
+ if (meta_fd < 0) {
+ perror ("open");
+ return EXIT_FAILURE;
+ }
+
+ /* TBD: get proper length */
+ meta_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0);
+ if (meta_buf == MAP_FAILED) {
+ perror ("mmap");
+ return EXIT_FAILURE;
+ }
+
+ data_fd = open (argv[2], O_RDONLY);
+ if (data_fd < 0) {
+ perror ("open");
+ return EXIT_FAILURE;
+ }
+
+ /* TBD: get proper length */
+ data_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0);
+ if (data_buf == MAP_FAILED) {
+ perror ("mmap");
+ return EXIT_FAILURE;
+ }
+
+ for (;;) {
+ if (!fdl_dump(&meta_buf,&data_buf)) {
+ break;
+ }
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/xlators/experimental/fdl/src/recon-tmpl.c b/xlators/experimental/fdl/src/recon-tmpl.c
new file mode 100644
index 00000000000..523bda39418
--- /dev/null
+++ b/xlators/experimental/fdl/src/recon-tmpl.c
@@ -0,0 +1,305 @@
+#pragma fragment PROLOG
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs.h"
+#include "fd.h"
+#include "iatt.h"
+#include "syncop.h"
+#include "xlator.h"
+#include "glfs-internal.h"
+
+#include "jnl-types.h"
+
+#define GFAPI_SUCCESS 0
+
+inode_t *
+recon_get_inode (glfs_t *fs, uuid_t gfid)
+{
+ inode_t *inode;
+ loc_t loc = {NULL,};
+ struct iatt iatt;
+ int ret;
+ inode_t *newinode;
+
+ inode = inode_find (fs->active_subvol->itable, gfid);
+ if (inode) {
+ printf ("=== FOUND %s IN TABLE\n", uuid_utoa(gfid));
+ return inode;
+ }
+
+ loc.inode = inode_new (fs->active_subvol->itable);
+ if (!loc.inode) {
+ return NULL;
+ }
+ gf_uuid_copy (loc.inode->gfid, gfid);
+ gf_uuid_copy (loc.gfid, gfid);
+
+ printf ("=== DOING LOOKUP FOR %s\n", uuid_utoa(gfid));
+
+ ret = syncop_lookup (fs->active_subvol, &loc, &iatt,
+ NULL, NULL, NULL);
+ if (ret != GFAPI_SUCCESS) {
+ fprintf (stderr, "syncop_lookup failed (%d)\n", ret);
+ return NULL;
+ }
+
+ newinode = inode_link (loc.inode, NULL, NULL, &iatt);
+ if (newinode) {
+ inode_lookup (newinode);
+ }
+
+ return newinode;
+}
+
+#pragma fragment DICT
+ dict_t *@ARGNAME@;
+
+ @ARGNAME@ = dict_new();
+ if (!@ARGNAME@) {
+ goto *err_label;
+ }
+ err_label = &&cleanup_@ARGNAME@;
+
+ {
+ int key_len, data_len;
+ char *key_ptr;
+ int garbage;
+ for (;;) {
+ key_len = *((int *)new_meta);
+ new_meta += sizeof(int);
+ if (!key_len) {
+ break;
+ }
+ key_ptr = new_meta;
+ new_meta += key_len;
+ data_len = *((int *)new_meta);
+ new_meta += sizeof(int);
+ garbage = dict_set_static_bin (@ARGNAME@, key_ptr,
+ new_meta, data_len);
+ /* TBD: check error from dict_set_static_bin */
+ (void)garbage;
+ new_meta += data_len;
+ }
+ }
+
+#pragma fragment DICT_CLEANUP
+cleanup_@ARGNAME@:
+ dict_unref (@ARGNAME@);
+
+#pragma fragment DOUBLE
+ @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta);
+ new_meta += sizeof(uint64_t);
+
+#pragma fragment FD
+ inode_t *@ARGNAME@_ino;
+ fd_t *@ARGNAME@;
+
+ @ARGNAME@_ino = recon_get_inode (fs, *((uuid_t *)new_meta));
+ new_meta += 16;
+ if (!@ARGNAME@_ino) {
+ goto *err_label;
+ }
+ err_label = &&cleanup_@ARGNAME@_ino;
+
+ @ARGNAME@ = fd_anonymous (@ARGNAME@_ino);
+ if (!@ARGNAME@) {
+ goto *err_label;
+ }
+ err_label = &&cleanup_@ARGNAME@;
+
+#pragma fragment FD_CLEANUP
+cleanup_@ARGNAME@:
+ fd_unref (@ARGNAME@);
+cleanup_@ARGNAME@_ino:
+ inode_unref (@ARGNAME@_ino);
+
+#pragma fragment NEW_FD
+ /*
+ * This pseudo-type is only used for create, and in that case we know
+ * we'll be using loc.inode, so it's not worth generalizing to take an
+ * extra argument.
+ */
+ fd_t *@ARGNAME@ = fd_anonymous (loc.inode);
+
+ if (!fd) {
+ goto *err_label;
+ }
+ err_label = &&cleanup_@ARGNAME@;
+ new_meta += 16;
+
+#pragma fragment NEW_FD_CLEANUP
+cleanup_@ARGNAME@:
+ fd_unref (@ARGNAME@);
+
+#pragma fragment INTEGER
+ @ARGTYPE@ @ARGNAME@ = *((@ARGTYPE@ *)new_meta);
+
+ new_meta += sizeof(@ARGTYPE@);
+
+#pragma fragment LOC
+ loc_t @ARGNAME@ = { NULL, };
+
+ @ARGNAME@.inode = recon_get_inode (fs, *((uuid_t *)new_meta));
+ if (!@ARGNAME@.inode) {
+ goto *err_label;
+ }
+ err_label = &&cleanup_@ARGNAME@;
+ gf_uuid_copy (@ARGNAME@.gfid, @ARGNAME@.inode->gfid);
+ new_meta += 16;
+ new_meta += 16; /* skip over pargfid */
+ if (*(new_meta++)) {
+ @ARGNAME@.name = new_meta;
+ new_meta += strlen(new_meta) + 1;
+ }
+
+#pragma fragment LOC_CLEANUP
+cleanup_@ARGNAME@:
+ loc_wipe (&@ARGNAME@);
+
+#pragma fragment PARENT_LOC
+ loc_t @ARGNAME@ = { NULL, };
+
+ new_meta += 16; /* skip over gfid */
+ @ARGNAME@.parent = recon_get_inode (fs, *((uuid_t *)new_meta));
+ if (!@ARGNAME@.parent) {
+ goto *err_label;
+ }
+ err_label = &&cleanup_@ARGNAME@;
+ gf_uuid_copy (@ARGNAME@.pargfid, @ARGNAME@.parent->gfid);
+ new_meta += 16;
+ if (!*(new_meta++)) {
+ goto *err_label;
+ }
+ @ARGNAME@.name = new_meta;
+ new_meta += strlen(new_meta) + 1;
+
+ @ARGNAME@.inode = inode_new (fs->active_subvol->itable);
+ if (!@ARGNAME@.inode) {
+ goto *err_label;
+ }
+
+#pragma fragment PARENT_LOC_CLEANUP
+cleanup_@ARGNAME@:
+ loc_wipe (&@ARGNAME@);
+
+#pragma fragment STRING
+ char *@ARGNAME@;
+ if (*(new_meta++)) {
+ @ARGNAME@ = new_meta;
+ new_meta += (strlen(new_meta) + 1);
+ }
+ else {
+ goto *err_label;
+ }
+
+#pragma fragment VECTOR
+ struct iovec @ARGNAME@;
+
+ @ARGNAME@.iov_len = *((size_t *)new_meta);
+ new_meta += sizeof(@ARGNAME@.iov_len);
+ @ARGNAME@.iov_base = new_data;
+ new_data += @ARGNAME@.iov_len;
+
+#pragma fragment IATT
+ struct iatt @ARGNAME@;
+ {
+ @ARGNAME@.ia_prot = *((ia_prot_t *)new_meta);
+ new_meta += sizeof(ia_prot_t);
+ uint32_t *myints = (uint32_t *)new_meta;
+ @ARGNAME@.ia_uid = myints[0];
+ @ARGNAME@.ia_gid = myints[1];
+ @ARGNAME@.ia_atime = myints[2];
+ @ARGNAME@.ia_atime_nsec = myints[3];
+ @ARGNAME@.ia_mtime = myints[4];
+ @ARGNAME@.ia_mtime_nsec = myints[5];
+ new_meta += sizeof(*myints) * 6;
+ }
+
+#pragma fragment IOBREF
+ struct iobref *@ARGNAME@;
+
+ @ARGNAME@ = iobref_new();
+ if (!@ARGNAME@) {
+ goto *err_label;
+ }
+ err_label = &&cleanup_@ARGNAME@;
+
+#pragma fragment IOBREF_CLEANUP
+cleanup_@ARGNAME@:
+ iobref_unref (@ARGNAME@);
+
+#pragma fragment LINK
+ /* TBD: check error */
+ inode_t *new_inode = inode_link (@INODE_ARG@, NULL, NULL, @IATT_ARG@);
+ if (new_inode) {
+ inode_lookup (new_inode);
+ }
+
+#pragma fragment FOP
+int
+fdl_replay_@NAME@ (glfs_t *fs, char **old_meta, char **old_data)
+{
+ char *new_meta = *old_meta;
+ char *new_data = *old_data;
+ int ret;
+ int status = 0xbad;
+ void *err_label = &&done;
+
+@FUNCTION_BODY@
+
+ ret = syncop_@NAME@ (fs->active_subvol, @SYNCOP_ARGS@, NULL);
+ if (ret != @SUCCESS_VALUE@) {
+ fprintf (stderr, "syncop_@NAME@ returned %d", ret);
+ goto *err_label;
+ }
+
+@LINKS@
+
+ status = 0;
+
+@CLEANUPS@
+
+done:
+ *old_meta = new_meta;
+ *old_data = new_data;
+ return status;
+}
+
+#pragma fragment CASE
+ case GF_FOP_@UPNAME@:
+ printf ("=== GF_FOP_@UPNAME@\n");
+ if (fdl_replay_@NAME@ (fs, &new_meta, &new_data) != 0) {
+ goto done;
+ }
+ recognized = 1;
+ break;
+
+#pragma fragment EPILOG
+int
+recon_execute (glfs_t *fs, char **old_meta, char **old_data)
+{
+ char *new_meta = *old_meta;
+ char *new_data = *old_data;
+ int recognized = 0;
+ event_header_t *eh;
+
+ eh = (event_header_t *)new_meta;
+ new_meta += sizeof (*eh);
+
+ /* TBD: check event_type instead of assuming NEW_REQUEST */
+
+ switch (eh->fop_type) {
+@SWITCH_BODY@
+
+ default:
+ printf ("unknown fop %u\n", eh->fop_type);
+ }
+
+done:
+ *old_meta = new_meta;
+ *old_data = new_data;
+ return recognized;
+}
diff --git a/xlators/experimental/fdl/src/recon.c b/xlators/experimental/fdl/src/recon.c
new file mode 100644
index 00000000000..14168a011e0
--- /dev/null
+++ b/xlators/experimental/fdl/src/recon.c
@@ -0,0 +1,89 @@
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include "glusterfs.h"
+#include "fd.h"
+#include "syncop.h"
+#include "glfs-internal.h"
+
+#define GFAPI_SUCCESS 0
+
+extern int recon_execute (glfs_t *, char **, char **);
+
+int
+main (int argc, char **argv)
+{
+ glfs_t *fs;
+ int ret;
+ int meta_fd = (-1);
+ char *meta_buf = NULL;
+ int data_fd = (-1);
+ char *data_buf = NULL;
+
+ fs = glfs_new ("whocares");
+ if (!fs) {
+ fprintf (stderr, "glfs_new failed\n");
+ return EXIT_FAILURE;
+ }
+
+ if (getenv("RECON_DEBUG")) {
+ ret = glfs_set_logging (fs, "/dev/stderr", 7);
+ }
+ else {
+ ret = glfs_set_logging (fs, "/dev/null", 0);
+ }
+
+ if (ret != GFAPI_SUCCESS) {
+ fprintf (stderr, "glfs_set_logging failed (%d)\n", errno);
+ return EXIT_FAILURE;
+ }
+
+ ret = glfs_set_volfile (fs, argv[1]);
+ if (ret != GFAPI_SUCCESS) {
+ fprintf (stderr, "glfs_set_volfile failed (%d)\n", errno);
+ return EXIT_FAILURE;
+ }
+
+ ret = glfs_init (fs);
+ if (ret != GFAPI_SUCCESS) {
+ fprintf (stderr, "glfs_init failed (%d)\n", errno);
+ return EXIT_FAILURE;
+ }
+
+ meta_fd = open (argv[2], O_RDONLY);
+ if (meta_fd < 0) {
+ perror ("open");
+ return EXIT_FAILURE;
+ }
+
+ /* TBD: get proper length */
+ meta_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, meta_fd, 0);
+ if (meta_buf == MAP_FAILED) {
+ perror ("mmap");
+ return EXIT_FAILURE;
+ }
+
+ data_fd = open (argv[3], O_RDONLY);
+ if (data_fd < 0) {
+ perror ("open");
+ return EXIT_FAILURE;
+ }
+
+ /* TBD: get proper length */
+ data_buf = mmap (NULL, 1048576, PROT_READ, MAP_PRIVATE, data_fd, 0);
+ if (data_buf == MAP_FAILED) {
+ perror ("mmap");
+ return EXIT_FAILURE;
+ }
+
+ for (;;) {
+ if (!recon_execute(fs,&meta_buf,&data_buf)) {
+ break;
+ }
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
index 7e5783f4f30..649d9d8e9fa 100644
--- a/xlators/features/Makefile.am
+++ b/xlators/features/Makefile.am
@@ -1,5 +1,6 @@
-SUBDIRS = locks quota read-only mac-compat quiesce marker index barrier arbiter\
- protect compress changelog changetimerecorder ganesha gfid-access $(GLUPY_SUBDIR) qemu-block \
- upcall snapview-client snapview-server trash shard bit-rot #path-converter # filter
+SUBDIRS = locks quota read-only mac-compat quiesce marker index barrier \
+ arbiter protect compress changelog changetimerecorder ganesha \
+ gfid-access $(GLUPY_SUBDIR) qemu-block upcall snapview-client \
+ snapview-server trash shard bit-rot
CLEANFILES =
diff --git a/xlators/mgmt/glusterd/src/glusterd-volgen.c b/xlators/mgmt/glusterd/src/glusterd-volgen.c
index 2c52cf72a3f..3df4b3556cf 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volgen.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volgen.c
@@ -1783,6 +1783,30 @@ out:
return ret;
}
+/* Add this before (above) io-threads because it's not thread-safe yet. */
+static int
+brick_graph_add_fdl (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
+ dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
+{
+
+ xlator_t *xl = NULL;
+ int ret = -1;
+
+ if (!graph || !volinfo || !set_dict)
+ goto out;
+
+ if (dict_get_str_boolean (set_dict, "features.fdl", 0)) {
+ xl = volgen_graph_add (graph, "experimental/fdl",
+ volinfo->volname);
+ if (!xl)
+ goto out;
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
static int
brick_graph_add_iot (volgen_graph_t *graph, glusterd_volinfo_t *volinfo,
dict_t *set_dict, glusterd_brickinfo_t *brickinfo)
@@ -2359,6 +2383,7 @@ static volgen_brick_xlator_t server_graph_table[] = {
{brick_graph_add_index, "index"},
{brick_graph_add_barrier, NULL},
{brick_graph_add_marker, "marker"},
+ {brick_graph_add_fdl, "fdl"},
{brick_graph_add_iot, "io-threads"},
{brick_graph_add_upcall, "upcall"},
{brick_graph_add_pump, NULL},
diff --git a/xlators/mgmt/glusterd/src/glusterd-volume-set.c b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
index 1463ef72c71..c0059d83cfe 100644
--- a/xlators/mgmt/glusterd/src/glusterd-volume-set.c
+++ b/xlators/mgmt/glusterd/src/glusterd-volume-set.c
@@ -2711,6 +2711,15 @@ struct volopt_map_entry glusterd_volopt_map[] = {
.op_version = GD_OP_VERSION_4_0_0,
.description = "percent of rep_count-1 bricks that must be up"
},
+ /* Full Data Logging */
+ {
+ .key = "features.fdl",
+ .voltype = "features/fdl",
+ .option = "!fdl",
+ .op_version = GD_OP_VERSION_4_0_0,
+ .flags = OPT_FLAG_XLATOR_OPT,
+ .type = NO_DOC,
+ },
{ .key = NULL
}
};